Commit da48794e authored by mmassaviol's avatar mmassaviol
Browse files

Update RNAseq

parent 734bfaaa
......@@ -3,13 +3,12 @@ import re
import csv
import snakemake.utils
dir_path = "/workflow"
SAMPLES = config["samples"]
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
OUTPUTS = config["outputs"]
PARAMS_INFO = config["params_info"]
config = config["params"]
##########
......@@ -85,8 +84,6 @@ def prepare_report_inputs():
inputs = list()
for step in STEPS:
inputs.extend(step_outputs(step["name"]))
if (step["name"] == config["final_step"]):
break
return inputs
def prepare_report_scripts():
......@@ -96,8 +93,6 @@ def prepare_report_scripts():
script = tool+".prepare.report.R"
if (script in PREPARE_REPORT_SCRIPTS):
scripts.append("/workflow/scripts/"+script)
if (tool == config["final_step"]):
break
return scripts
def prepare_report_outputs():
......@@ -108,8 +103,6 @@ def prepare_report_outputs():
if (tool in PREPARE_REPORT_OUTPUTS.keys()):
for output in PREPARE_REPORT_OUTPUTS[tool]:
outputs.append(config["results_dir"]+"/"+tool+"/"+output)
if (tool == config["final_step"]):
break
return outputs
def multiqc_inputs():
......@@ -424,16 +417,19 @@ rule prepare_report:
i+=1
# Params list for Multiqc report
params_list = "params_name\tvalue\n"
head = """
# description: 'This is the list of the parameters for each rule'
params_list = "params_name\tdescription\tvalue\n"
head = """# description: 'This is the list of the parameters for each rule'
# section_name: 'Workflow parameters'
"""
for step in STEPS:
tool = config[step["name"]]
for key, value in config.items():
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]):
params_list += key + "\t'" + str(value) + "'\n"
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
if (key in PARAMS_INFO.keys() and "label" in PARAMS_INFO[key].keys()):
description = PARAMS_INFO[key]["label"]
else:
description = ''
params_list += key + "\t'" + description + "'\t'" + str(value) + "'\n"
with open(output.params_tab,"w") as out:
out.write(head)
......@@ -459,7 +455,7 @@ rule multiqc:
# Final Snakemake rule waiting for outputs of the final step choosen by user (default all steps)
rule all:
input:
workflow_outputs(config["final_step"])
workflow_outputs("all")
output:
Snakefile = config["results_dir"]+"/workflow/Snakefile",
get_samples = config["results_dir"]+"/workflow/get_samples.py",
......
import re
import sys
from tools import read_yaml
from tools import *
config = read_yaml(sys.argv[1])
......
#!/usr/bin/env python3
# This script will take a directory and a parameter to tell if the reads are paired end or single end and return the sample list and the suffix
# Needs 2 arguments: reads_directory, SeOrPe
# SeOrPe is SE for single end reads and PE for paired end reads
# Usage: ./get_samples.py reads_directory SeOrPe
import os
import re
import csv
import sys
def sample_list(dir, filesuffix=".fastq.gz"):
samples = list()
files = os.listdir(dir)
regex = re.compile("^(.+?)(_R1|_R2|)("+filesuffix+")")
for file in files:
res = re.match(regex, file)
if res:
if res.group(1) not in samples:
samples.append(res.group(1))
return sorted(samples)
def sample_list2(dir, SeOrPe):
def sample_list(dir, SeOrPe):
samples = list()
suffixes = list()
files = os.listdir(dir)
if SeOrPe == "PE":
regex = re.compile("^(.+?)(_R1|_R2)(.+)")
regex = re.compile(r"^(.+?)(_R1|_R2)(.+)")
else:
regex = re.compile("^(.+?)(\..*)")
regex = re.compile(r"^(.+?)(\..*)")
for file in files:
res = re.match(regex, file)
if res:
......@@ -34,25 +27,17 @@ def sample_list2(dir, SeOrPe):
suffixes.append(res.group(2))
if (len(set(suffixes)) == 1 ):
return {'samples': samples, 'suffix': set(suffixes)}
return {'samples': sorted(samples), 'suffix': list(set(suffixes))[0]}
else:
pass
exit("Files have different suffixes:" + ','.join(suffixes))
def group_list(dir, groupfile, filesuffix=".fastq.gz"):
samples = sample_list(dir,filesuffix)
if os.path.isfile(groupfile):
groups = dict()
with open(groupfile, mode="r") as infile:
reader = csv.reader(infile)
groups = {row[0]: row[1] for row in reader}
return {sample: groups[sample] for sample in samples}
def main():
if len(sys.argv) == 3:
print(sample_list(sys.argv[1],sys.argv[2]))
else:
return {sample: "group" for sample in samples}
#print(sample_list2(sys.argv[1],sys.argv[2]))
exit("""Needs 2 arguments: reads_directory, SeOrPe
Usage: ./get_samples.py reads_directory SeOrPe""")
if len(sys.argv) == 4:
print(group_list(sys.argv[1], sys.argv[2], sys.argv[3]))
else:
print(group_list(sys.argv[1], sys.argv[2]))
if __name__ == "__main__":
# execute only if run as a script
main()
......@@ -2,7 +2,6 @@ pipeline: RNAseq
params:
results_dir: /Results
sample_dir: /Data
sample_suffix: .fastq.gz
group_file_select: server
group_file: ''
SeOrPe: PE
......@@ -54,7 +53,6 @@ params:
deseq2_testType: LRT
samples: []
groups: []
final_step: all
steps:
- title: Trimming
name: trimming
......@@ -85,8 +83,6 @@ params_info:
type: output_dir
sample_dir:
type: input_dir
sample_suffix:
type: text
group_file_select:
type: select
group_file:
......@@ -97,14 +93,17 @@ params_info:
tool: trimmomatic
rule: trimmomatic_SE
type: numeric
label: Number of threads to use
trimmomatic_qc_score:
tool: trimmomatic
rule: trimmomatic_SE
type: radio
label: Quality score encoding
fastqc_threads:
tool: fastqc
rule: fastqc_PE
type: numeric
label: Number of threads to use
kallisto_index_input_select:
tool: kallisto
rule: kallisto_index
......@@ -113,22 +112,28 @@ params_info:
tool: kallisto
rule: kallisto_index
type: input_file
label: Reference fasta file
kallisto_quant_threads:
tool: kallisto
rule: kallisto_quant_PE
type: numeric
label: Number of threads to use
kallisto_quant_fragment_length_SE:
tool: kallisto
rule: kallisto_quant_SE
type: numeric
label: Estimated average fragment length
kallisto_quant_standard_deviation_SE:
tool: kallisto
rule: kallisto_quant_SE
type: numeric
label: Estimated standard deviation of fragment length
kallisto_quant_stranded_PE:
tool: kallisto
rule: kallisto_quant_PE
type: radio
label: For strand specific mode choose --fr-stranded if the first read is forward
and choose --rf-stranded if the first read is reverse
salmon_index_input_select:
tool: salmon
rule: salmon_index
......@@ -137,22 +142,27 @@ params_info:
tool: salmon
rule: salmon_index
type: input_file
label: Reference fasta file
salmon_index_threads:
tool: salmon
rule: salmon_index
type: numeric
label: Number of threads to use for index creation
salmon_quant_threads:
tool: salmon
rule: salmon_quant_PE
type: numeric
label: Number of threads to use
edger_threads:
tool: edger
rule: edger
type: numeric
label: Number of threads to use
edger_tx2gene:
tool: edger
rule: edger
type: checkbox
label: 'Aggregate transcripts counts to gene counts : '
edger_annotations_select:
tool: edger
rule: edger
......@@ -161,26 +171,38 @@ params_info:
tool: edger
rule: edger
type: input_file
label: 'Annotation file (gtf or gff) : '
edger_normfact:
tool: edger
rule: edger
type: radio
label: Calculate normalization factors to scale the raw library sizes.
edger_dispersion:
tool: edger
rule: edger
type: text
label: 'Dispersion: either a numeric vector of dispersions or a character string
indicating that dispersions should be taken from the data.'
edger_testType:
tool: edger
rule: edger
type: radio
label: 'Test type: exactTest: Computes p-values for differential abundance for
each gene between two samples, conditioning on the total count for each gene.
The counts in each group are assumed to follow a binomial distribution
glmLRT: Fits a negative binomial generalized log-linear model to the read counts
for each gene and conducts genewise statistical tests.'
deseq2_threads:
tool: deseq2
rule: deseq2
type: numeric
label: Number of threads to use
deseq2_tx2gene:
tool: deseq2
rule: deseq2
type: checkbox
label: 'Aggregate transcripts counts to gene counts : '
deseq2_annotations_select:
tool: deseq2
rule: deseq2
......@@ -189,18 +211,25 @@ params_info:
tool: deseq2
rule: deseq2
type: input_file
label: 'Annotation file (gtf or gff) : '
deseq2_fitType:
tool: deseq2
rule: deseq2
type: radio
label: Type of fitting of dispersions to the mean intensity
deseq2_betaPrior:
tool: deseq2
rule: deseq2
type: checkbox
label: 'betaPrior: whether or not to put a zero-mean normal prior on the non-intercept
coefficients. By default, the beta prior is used only for the Wald test, but
can also be specified for the likelihood ratio test.'
deseq2_testType:
tool: deseq2
rule: deseq2
type: radio
label: 'Test type: Use either Wald significance tests, or the likelihood ratio
test on the difference in deviance between a full and reduced model formula'
prepare_report_scripts: []
prepare_report_outputs: {}
outputs:
......
......@@ -21,9 +21,6 @@ echo "To get help for an app :\nsingularity help --app appName this_container.si
echo "To run an app :\nsingularity run --app appName this_container.sif"
########################
# App UI
########################
%apprun UI
exec Rscript -e "shiny::runApp('/sagApp/app.R',host='$1',port=$2)"
......@@ -33,9 +30,6 @@ You must also provide the host address and port where the shiny app will be laun
exemple : singularity run --app UI -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif 127.0.0.1 1234
########################
# App Snakemake
########################
%apprun Snakemake
configfile=$1
cores=$2
......@@ -48,9 +42,7 @@ To run the Snakemake app you should bind data and results directories like in th
You must also provide the configfile and the number of cores provided to snakemake command (you can add other parameters after these two)
exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif myconfig.yml 16 otherparams
########################
# App getConfigfile
########################
%apprun getConfigfile
exec cp /workflow/params.total.yml ./params.yml
......@@ -58,11 +50,21 @@ exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /p
To run the getConfigfile app you dont need to bind directories. This app will only copy the default parameters file from the container to your local disk.
exemple : singularity run --app getConfigfile this_container.sif
%apprun getSamples
exec python3 /workflow/get_samples.py $1 $2
%apphelp getSamples
To run the getSamples app you need to bind the data directory. This app will give you the list of samples detected in a given directory and their file suffix.
exemple : singularity run --app getSamples -B /path/to/data/directory:/Data this_container.sif /Data PE
%help
This container contains three apps (UI, Snakemake and getConfigfile).
This container contains four apps (UI, Snakemake, getConfigfile and getSamples).
* UI is a user interface to set up the workflow and launch it.
* Snakemake let you provide your configfile and other parameters to the snakemake command and launch it.
* getConfigfile give you a copy of a default parameters file to fill and use with the Snakemake app
* getConfigfile gives you a copy of a default parameters file to fill and use with the Snakemake app.
* getSamples gives you the list of samples detected in a given directory and their file suffix (usefull for filling samples and sample_suffix in parameters file).
To get help for an app :
singularity help --app appName this_container.sif
To run an app :
......
......@@ -15,7 +15,7 @@ MenuGauche = sidebarMenu(id="sidebarmenu",
tags$br(),
numericInput("cores", label = "Threads available", min = 1, max = 24, step = 1, width = "auto", value = 16),
selectInput("final_step", label = "Select the step to reach : ", selected = "all", choices = list('Trimming'='trimming','Quality check'='quality_check','Quantification'='quantification','Differential expression'='DE',"All"="all")), tags$br(),
selectInput("force_from", label = "Start again from a step : ", selected = "none", choices = list('none'='none','Trimming'='trimming','Quality check'='quality_check','Quantification'='quantification','Differential expression'='DE',"All"="all")), tags$br(),
actionButton("RunPipeline", "Run pipeline", icon("play"), class="btn btn-info"),
actionButton("StopPipeline", "Stop pipeline", icon("stop"), class="btn btn-secondary"),
......
......@@ -17,8 +17,6 @@ box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE,
)
,
textInput("sample_suffix", label = "Samples suffix: ", value = ".fastq.gz", width = "auto"),
box(title = "Group file (TSV with col1(sample name), col2 (group)): ", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE,
selectInput("group_file_select",label = "Select where to find the file", selected = "server", choices = c("On server" = "server", "On your machine" = "local")),
conditionalPanel(condition = "input.group_file_select == 'server'",
......
......@@ -13,12 +13,6 @@ save_params <- function(path_param){
res = paste0(res, paste("sample_dir:", paste0('"', input$sample_dir, '"'), "\n", sep = " "))
}
if(!is.na(as.numeric(input$sample_suffix))) {
res = paste0(res, paste("sample_suffix:", input$sample_suffix, "\n", sep = " "))
} else {
res = paste0(res, paste("sample_suffix:", paste0('"', input$sample_suffix, '"'), "\n", sep = " "))
}
res = paste0(res, paste("group_file_select:", paste0('"', input$group_file_select, '"'), "\n", sep = " "))
if(input$group_file_select == "server") {
res = paste0(res, paste("group_file:", paste0('"', input$group_file_server, '"'), "\n", sep = " "))
......@@ -193,8 +187,6 @@ save_params <- function(path_param){
res = paste0(res, paste("deseq2_testType:", paste0('"', input$deseq2_testType, '"'), "\n", sep = " "))
}
res = paste0(res, paste("final_step:", paste0('"', input$final_step, '"'), "\n", sep = " "))
a = yaml.load_file("/workflow/params.total.yml")
p = a[["params"]]
a["params"] = NULL
......@@ -208,30 +200,27 @@ save_params <- function(path_param){
return(result)
}
d = c(d,a)
samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," /Data/groups.csv ", input$sample_suffix),intern = T))
d$samples = names(samples)
names(samples) = NULL
d$groups = unlist(samples)
get_samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," ",input$SeOrPe),intern = T))
d$samples = get_samples$samples
d$params$sample_suffix = get_samples$suffix
write_yaml(d,path_param,handlers=list(logical = logical))
}
compare_params = function(dossierAnalyse){
if (!file.exists(paste0(dossierAnalyse,"/lastrun/params.yml"))){
return(c())
force_rule <- function(force_from){
if (input$force_from=="none"){
return("")
}
else{
new_params = yaml.load_file(paste0(dossierAnalyse,"/params.yml"))
old_params = yaml.load_file(paste0(dossierAnalyse,"/workflow/params.yml"))
changed = new_params[!(new_params %in% old_params)]
rules = c()
if (length(changed)>=1){
for (param in names(changed)){
if (!grepl("_threads$",param)){
rules = c(rules, new_params$params_info[[param]]$rule)
}
}
else if (input$force_from=="all"){ return("--forcerun all") }
else {
params = yaml.load_file(paste0(input$results_dir,"/params.yml"))
outputs = params[["outputs"]]
tool = params[["params"]][[force_from]]
if (length(outputs[[tool]])==1)
rule = names(outputs[[tool]])[[1]]
else{
rule = names(outputs[[tool]])[[grep(input$SeOrPe,names(outputs[[tool]]))]]
}
return(unique(rules))
return(paste0("--forcerun ",rule))
}
}
#' Event when use RULEGRAPH button
......@@ -280,21 +269,10 @@ observeEvent(input$RunPipeline, {
if (!file.exists(paste0(input$results_dir,"/logs/runlog.txt"))){
file.create(paste0(input$results_dir,"/logs/runlog.txt"))
}
forcerun = compare_params(input$results_dir)
if (length(forcerun>1)){
rules = paste(forcerun, collapse=" ")
forcerun = paste(" --forcerun ",rules)
showModal(modalDialog(
title = "Params have changed since the last run",
forcerun
))
}
else{
forcerun = ""
}
system(paste0("touch ",input$results_dir,"/logs/workflow.running"),wait = T)
system(paste0("snakemake -s /workflow/Snakefile --configfile ",input$results_dir,"/params.yml -d ",input$results_dir," all --rulegraph | dot -Tpng -Gratio=0.75 > ",input$results_dir,"/Rule_graph_mqc.png"))
system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " --forcerun all -d ", input$results_dir , " --cores ", input$cores, " all ", forcerun),wait = FALSE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt"))
force = force_rule(input$force_from)
system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " -d ", input$results_dir , " --cores ", input$cores, " all ", force),wait = FALSE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt"))
tags$iframe(src="results/multiqc_report.html",width="100%", height="900px")},
error = function(e){
system(paste0("touch ",input$results_dir,"/logs/workflow_end.error"),wait = T)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment