Commit 1e740e63 authored by mmassaviol's avatar mmassaviol
Browse files

Update RNAseq workflow (debug edger deseq2)

parent 75f9a13f
FROM rocker/r-ver:3.5.3
ENV PATH /opt/biotools/bin:$PATH
ENV ROOTSYS /opt/biotools/root
ENV LD_LIBRARY_PATH '$LD_LIBRARY_PATH:$ROOTSYS/lib'
RUN apt-get update
RUN apt-get install -yq tzdata
RUN apt-get install -y locales
RUN locale-gen "en_US.UTF-8"
RUN export LC_ALL=en_US.UTF-8
RUN export LANG=en_US.UTF-8
RUN apt-get install -y curl wget apt-utils
RUN apt-get install -y gcc fort77 aptitude
RUN aptitude install -y g++ xorg-dev libreadline-dev gfortran
RUN apt-get install -y libssl-dev libxml2-dev libpcre3-dev liblzma-dev libbz2-dev libcurl4-openssl-dev liblapack3 git nano graphviz python3 python3-pip
RUN apt-get install -y autotools-dev automake cmake grep sed dpkg fuse zip build-essential pkg-config bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion zlib1g-dev libncurses5-dev libncursesw5-dev
RUN apt-get clean
RUN if [ ! -d "/opt/biotools" ];then mkdir /opt/biotools; fi
RUN if [ ! -d "/opt/biotools/bin" ];then mkdir /opt/biotools/bin; fi
RUN chmod 777 -R /opt/biotools/
ENV PATH /opt/biotools/bin:$PATH
RUN pip3 install snakemake==5.4.0 oyaml
RUN pip3 install multiqc==1.7
RUN fic=$(find /usr/ -name stacks.py) && sed -i 's/out_dict\[s_name\] = cdict/out_dict\[content\[0\]\] = cdict/' $fic
RUN Rscript -e 'install.packages("yaml",Ncpus=8,repos="https://cloud.r-project.org/")'
RUN Rscript -e 'install.packages("DT",Ncpus=8,repos="https://cloud.r-project.org/")'
RUN Rscript -e 'install.packages("shiny",Ncpus=8,repos="https://cloud.r-project.org/")'
RUN Rscript -e 'install.packages("shinydashboard",Ncpus=8,repos="https://cloud.r-project.org/")'
RUN Rscript -e 'install.packages("shinyjs",Ncpus=8,repos="https://cloud.r-project.org/")'
RUN Rscript -e 'install.packages("shinyFiles",Ncpus=8,repos="https://cloud.r-project.org/")'
RUN Rscript -e 'install.packages("BiocManager",Ncpus=8,repos="https://cloud.r-project.org/")'
FROM mmassaviol/mbb_workflows_base:latest
COPY files /workflow
COPY sagApp /sagApp
......@@ -71,13 +34,13 @@ RUN Rscript -e 'BiocManager::install("rhdf5", version = "3.8",Ncpus=8, clean=TRU
RUN Rscript -e 'BiocManager::install("GenomicFeatures", version = "3.8",Ncpus=8, clean=TRUE)'
RUN Rscript -e 'install.packages("pheatmap",Ncpus=8, clean=TRUE)'
RUN Rscript -e 'BiocManager::install("DESeq2", version = "3.8",Ncpus=8, clean=TRUE)'
RUN Rscript -e 'BiocManager::install("apeglm", version = "3.8",Ncpus=8, clean=TRUE)'
RUN Rscript -e 'BiocManager::install("BiocParallel", version = "3.8",Ncpus=8, clean=TRUE)'
RUN Rscript -e 'install.packages("pheatmap",Ncpus=8, clean=TRUE)'
EXPOSE 3838
CMD ["Rscript", "-e", "setwd('/sagApp/'); shiny::runApp('/sagApp/app.R',port=3838 , host='0.0.0.0')"]
......@@ -50,7 +50,6 @@ params:
deseq2_annotations: ''
deseq2_fitType: parametric
deseq2_betaPrior: false
deseq2_testType: LRT
samples: []
groups: []
steps:
......@@ -222,14 +221,7 @@ params_info:
rule: deseq2
type: checkbox
label: 'betaPrior: whether or not to put a zero-mean normal prior on the non-intercept
coefficients. By default, the beta prior is used only for the Wald test, but
can also be specified for the likelihood ratio test.'
deseq2_testType:
tool: deseq2
rule: deseq2
type: radio
label: 'Test type: Use either Wald significance tests, or the likelihood ratio
test on the difference in deviance between a full and reduced model formula'
coefficients.'
prepare_report_scripts: []
prepare_report_outputs: {}
outputs:
......
......@@ -13,7 +13,7 @@ library(ggplot2)
register(MulticoreParam(snakemake@config[["deseq2_threads"]]))
samples = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V1
groups = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V2
conditions = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V2
#samples = c("DRR016125","DRR016126","DRR016127","DRR016128","DRR016129","DRR016130","DRR016131","DRR016132")
#files = file.path("/home/mbb/Documents/results2/kallisto/quant",samples,"abundance.h5")
files = sort(snakemake@input[["counts"]])
......@@ -29,10 +29,10 @@ if (snakemake@config[["deseq2_tx2gene"]] == TRUE){
txi <- tximport(files, type = snakemake@config[["quantification"]], txOut = TRUE)
}
designMat <- model.matrix(~groups)
colnames(designMat)[2] = "groups"
designMat <- model.matrix(~conditions)
colnames(designMat)[2] = "condition"
sampleTable <- data.frame(condition = factor(groups))
sampleTable <- data.frame(condition = factor(conditions))
rownames(sampleTable) <- colnames(txi$counts)
dds <- DESeqDataSetFromTximport(txi, sampleTable, designMat)
......@@ -40,13 +40,10 @@ dds <- DESeqDataSetFromTximport(txi, sampleTable, designMat)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
dds$condition <- factor(dds$condition, levels = unique(groups))
dds$condition <- factor(dds$condition, levels = unique(conditions))
dds <- DESeq(dds, fitType = snakemake@config[["deseq2_fitType"]], betaPrior = as.logical(snakemake@config[["deseq2_betaPrior"]]), test="Wald")
if (snakemake@config[["deseq2_testType"]] == "LRT"){
dds <- DESeq(dds, fitType = snakemake@config[["deseq2_fitType"]], betaPrior = as.logical(snakemake@config[["deseq2_betaPrior"]]), test=snakemake@config[["deseq2_testType"]], reduced= ~ 1)
}else{
dds <- DESeq(dds, fitType = snakemake@config[["deseq2_fitType"]], betaPrior = as.logical(snakemake@config[["deseq2_betaPrior"]]), test=snakemake@config[["deseq2_testType"]])
}
res <- results(dds)
save.image(snakemake@output[["r_data"]])
......@@ -58,7 +55,7 @@ cpm_counts = cpm(txi$counts)
pca = prcomp(t(cpm_counts))
png(filename = snakemake@output[["PCA"]], height=800, width=800)
ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = groups)) + geom_point()
ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point()
dev.off()
options(digit=4)
......@@ -93,7 +90,7 @@ select <- order(rowMeans(counts(dds,normalized=TRUE)), decreasing=TRUE)[1:20]
df <- as.data.frame(colData(dds)[,c("condition")])
ntd <- normTransform(dds)
rownames(df) = samples
colnames(df) = "Groups"
colnames(df) = "Conditions"
assayNTD = assay(ntd)
dev.off()
......
......@@ -7,10 +7,10 @@ library(rhdf5)
library(GenomicFeatures)
library(ggplot2)
samples = unlist(names(snakemake@config[["groups"]]))
samples = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V1
conditions = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V2
files = snakemake@input[["counts"]]
names(files) = samples
groups = unlist(unname(snakemake@config[["groups"]]))
if (snakemake@config[["edger_tx2gene"]] == TRUE){
TxDb <- makeTxDbFromGFF(file = snakemake@config[["edger_annotations"]])
......@@ -34,8 +34,8 @@ dgelist <- calcNormFactors(dgelist, method=snakemake@config[["edger_normfact"]])
#plotMDS(dgelist)
#'grep' is a string matching function.
designMat <- model.matrix(~groups)
colnames(designMat)[2] = "groups"
designMat <- model.matrix(~conditions)
colnames(designMat)[2] = "Conditions"
de.com <- c()
dgelist$samples$group = designMat[,2]
......@@ -74,7 +74,7 @@ cpm_counts = cpm(cts)
pca = prcomp(t(cpm_counts))
png(filename = paste(snakemake@output[["PCA"]],sep = "/"), height=800, width=800)
ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = groups)) + geom_point()
ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point()
dev.off()
#plotMDS(dgelist, main = "Distances between gene expression profiles")
......@@ -98,7 +98,14 @@ with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log1
cpmTop = cpm_counts[rownames(topGenes$table),]
library(pheatmap)
df <- as.data.frame(conditions)
rownames(df) = samples
colnames(df) = "Conditions"
dev.off()
png(filename = paste(snakemake@output[["Heatmap"]],sep = "/"), height=800, width=800)
heatmap(cpmTop)
pheatmap(cpmTop, cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df)
#heatmap(cpmTop)
dev.off()
#plot_ly(x = colnames(cpmTop),y = rownames(cpmTop),z=cpmTop,type = "heatmap")
\ No newline at end of file
......@@ -112,11 +112,11 @@ singularity run --app appName this_container.sif
Rscript -e 'BiocManager::install("GenomicFeatures", version = "3.8",Ncpus=8, clean=TRUE)'
Rscript -e 'install.packages("pheatmap",Ncpus=8, clean=TRUE)'
Rscript -e 'BiocManager::install("DESeq2", version = "3.8",Ncpus=8, clean=TRUE)'
Rscript -e 'BiocManager::install("apeglm", version = "3.8",Ncpus=8, clean=TRUE)'
Rscript -e 'BiocManager::install("BiocParallel", version = "3.8",Ncpus=8, clean=TRUE)'
Rscript -e 'install.packages("pheatmap",Ncpus=8, clean=TRUE)'
import oyaml as yaml
import shutil
def read_yaml(filepath):
try:
......@@ -15,4 +16,11 @@ def write_yaml(filepath,data):
with open(filepath, 'w') as file:
yaml.dump(data, file, default_flow_style=False)
except IOError as e:
print("Error in file opening:", e)
\ No newline at end of file
print("Error in file opening:", e)
def copy_dir(src,dst):
try:
shutil.copytree(src,dst)
except FileExistsError:
shutil.rmtree(dst, ignore_errors=True)
shutil.copytree(src,dst)
\ No newline at end of file
......@@ -10,12 +10,18 @@ MenuGauche = sidebarMenu(id="sidebarmenu",
menuItem("Differential expression", tabName="DE", icon=icon("pencil", lib="font-awesome"), newtab=FALSE),
menuItem("Rule Graph", tabName="RULEGRAPH", icon=icon("gear", lib="font-awesome"), newtab=FALSE),
menuItem("Draw workflow graph", tabName="RULEGRAPH", icon=icon("gear", lib="font-awesome"), newtab=FALSE),
tags$br(),
downloadButton("DownloadParams", "Download config file", class="btn btn-light", style="color:black;margin: 6px 5px 6px 15px;"),
tags$br(),
tags$br(),
numericInput("cores", label = "Threads available", min = 1, max = 24, step = 1, width = "auto", value = 16),
selectInput("force_from", label = "Start again from a step : ", selected = "none", choices = list('none'='none','Trimming'='trimming','Quality check'='quality_check','Quantification'='quantification','Differential expression'='DE',"All"="all")), tags$br(),
selectInput("force_from", label = "Start again from a step : ", selected = "none", choices = list('No'='none','Trimming'='trimming','Quality check'='quality_check','Quantification'='quantification','Differential expression'='DE')), tags$br(),
actionButton("RunPipeline", "Run pipeline", icon("play"), class="btn btn-info"),
actionButton("StopPipeline", "Stop pipeline", icon("stop"), class="btn btn-secondary"),
......
......@@ -221,6 +221,13 @@ observeEvent(input$unlock,{
input_list <- reactiveValuesToList(input)
toggle_inputs(input_list,T,F)
})
output$DownloadParams <- downloadHandler(
filename = function() {
paste0("params", Sys.Date(), ".yaml", sep="")
},
content = function(file) {
save_params(file)
})
source("./server/opt_global.R", local=T)
......
......@@ -62,9 +62,7 @@ conditionalPanel(condition = "input.selectDE == 'deseq2'",box(title = "DESeq2",
radioButtons("deseq2_fitType", label = "Type of fitting of dispersions to the mean intensity", choices = list("parametric" = "parametric", "local" = "local", "mean" = "mean"), selected = "parametric", width = "auto"),
checkboxInput("deseq2_betaPrior", label = "betaPrior: whether or not to put a zero-mean normal prior on the non-intercept coefficients. By default, the beta prior is used only for the Wald test, but can also be specified for the likelihood ratio test.", value = FALSE),
radioButtons("deseq2_testType", label = "Test type: Use either Wald significance tests, or the likelihood ratio test on the difference in deviance between a full and reduced model formula", choices = list("LRT" = "LRT", "Wald" = "Wald"), selected = "LRT", width = "auto"),
checkboxInput("deseq2_betaPrior", label = "betaPrior: whether or not to put a zero-mean normal prior on the non-intercept coefficients.", value = FALSE),
p("DESeq2: Differential gene expression analysis based on the negative binomial distribution. Using normalized count data."),
......
......@@ -181,12 +181,6 @@ save_params <- function(path_param){
res = paste0(res, paste("deseq2_betaPrior:", "false", "\n", sep = " "))
}
if(!is.na(as.numeric(input$deseq2_testType))) {
res = paste0(res, paste("deseq2_testType:", input$deseq2_testType, "\n", sep = " "))
} else {
res = paste0(res, paste("deseq2_testType:", paste0('"', input$deseq2_testType, '"'), "\n", sep = " "))
}
a = yaml.load_file("/workflow/params.total.yml")
p = a[["params"]]
a["params"] = NULL
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment