Commit 03c2a4fb authored by mmassaviol's avatar mmassaviol
Browse files

Update Metabarcoding

parent 6eb9efd6
......@@ -3,8 +3,6 @@ import re
import snakemake.utils
import csv
dir_path = "/workflow"
#############
# Wildcards #
#############
......@@ -14,6 +12,7 @@ STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
OUTPUTS = config["outputs"]
PARAMS_INFO = config["params_info"]
config = config["params"]
##########
......@@ -75,8 +74,6 @@ def prepare_report_inputs():
inputs = list()
for step in STEPS:
inputs.extend(step_outputs(step["name"]))
if (step["name"] == config["final_step"]):
break
return inputs
def prepare_report_scripts():
......@@ -86,8 +83,6 @@ def prepare_report_scripts():
script = tool+".prepare.report.R"
if (script in PREPARE_REPORT_SCRIPTS):
scripts.append("/workflow/scripts/"+script)
if (tool == config["final_step"]):
break
return scripts
def prepare_report_outputs():
......@@ -98,8 +93,6 @@ def prepare_report_outputs():
if (tool in PREPARE_REPORT_OUTPUTS.keys()):
for output in PREPARE_REPORT_OUTPUTS[tool]:
outputs.append(config["results_dir"]+"/"+tool+"/"+output)
if (tool == config["final_step"]):
break
return outputs
def multiqc_inputs():
......@@ -425,16 +418,19 @@ rule prepare_report:
i+=1
# Params list for Multiqc report
params_list = "params_name\tvalue\n"
head = """
# description: 'This is the list of the parameters for each rule'
params_list = "params_name\tdescription\tvalue\n"
head = """# description: 'This is the list of the parameters for each rule'
# section_name: 'Workflow parameters'
"""
for step in STEPS:
tool = config[step["name"]]
for key, value in config.items():
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]):
params_list += key + "\t'" + str(value) + "'\n"
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
if (key in PARAMS_INFO.keys() and "label" in PARAMS_INFO[key].keys()):
description = PARAMS_INFO[key]["label"]
else:
description = ''
params_list += key + "\t'" + description + "'\t'" + str(value) + "'\n"
with open(output.params_tab,"w") as out:
out.write(head)
......@@ -460,7 +456,7 @@ rule multiqc:
# Final Snakemake rule waiting for outputs of the final step choosen by user (default all steps)
rule all:
input:
workflow_outputs(config["final_step"])
workflow_outputs("all")
output:
Snakefile = config["results_dir"]+"/workflow/Snakefile",
get_samples = config["results_dir"]+"/workflow/get_samples.py",
......
#!/usr/bin/env python3
# This script will take a directory and a parameter to tell if the reads are paired end or single end and return the sample list and the suffix
# Needs 2 arguments: reads_directory, SeOrPe
# SeOrPe is SE for single end reads and PE for paired end reads
# Usage: ./get_samples.py reads_directory SeOrPe
import os
import re
import csv
import sys
def sample_list(dir, filesuffix=".fastq.gz"):
samples = list()
files = os.listdir(dir)
regex = re.compile("^(.+?)(_R1|_R2|)("+filesuffix+")")
for file in files:
res = re.match(regex, file)
if res:
if res.group(1) not in samples:
samples.append(res.group(1))
return sorted(samples)
def sample_list2(dir, SeOrPe):
def sample_list(dir, SeOrPe):
samples = list()
suffixes = list()
files = os.listdir(dir)
if SeOrPe == "PE":
regex = re.compile("^(.+?)(_R1|_R2)(.+)")
regex = re.compile(r"^(.+?)(_R1|_R2)(.+)")
else:
regex = re.compile("^(.+?)(\..*)")
regex = re.compile(r"^(.+?)(\..*)")
for file in files:
res = re.match(regex, file)
if res:
......@@ -34,25 +27,17 @@ def sample_list2(dir, SeOrPe):
suffixes.append(res.group(2))
if (len(set(suffixes)) == 1 ):
return {'samples': samples, 'suffix': set(suffixes)}
return {'samples': sorted(samples), 'suffix': list(set(suffixes))[0]}
else:
pass
exit("Files have different suffixes:" + ','.join(suffixes))
def group_list(dir, groupfile, filesuffix=".fastq.gz"):
samples = sample_list(dir,filesuffix)
if os.path.isfile(groupfile):
groups = dict()
with open(groupfile, mode="r") as infile:
reader = csv.reader(infile)
groups = {row[0]: row[1] for row in reader}
return {sample: groups[sample] for sample in samples}
def main():
if len(sys.argv) == 3:
print(sample_list(sys.argv[1],sys.argv[2]))
else:
return {sample: "group" for sample in samples}
#print(sample_list2(sys.argv[1],sys.argv[2]))
exit("""Needs 2 arguments: reads_directory, SeOrPe
Usage: ./get_samples.py reads_directory SeOrPe""")
if len(sys.argv) == 4:
print(group_list(sys.argv[1], sys.argv[2], sys.argv[3]))
else:
print(group_list(sys.argv[1], sys.argv[2]))
if __name__ == "__main__":
# execute only if run as a script
main()
......@@ -2,7 +2,6 @@ pipeline: Metabarcoding
params:
results_dir: /Results
sample_dir: /Data
sample_suffix: .fastq.gz
SeOrPe: PE
trimming: 'null'
cutadapt_PE_output_dir: cutadapt_PE
......@@ -27,7 +26,6 @@ params:
dada2_maxEE_reverse: -1
samples: []
groups: []
final_step: all
steps:
- title: Trimming
name: trimming
......@@ -45,70 +43,92 @@ params_info:
type: output_dir
sample_dir:
type: input_dir
sample_suffix:
type: text
SeOrPe:
type: radio
cutadapt_threads:
tool: cutadapt
rule: cutadapt_SE
type: numeric
label: Number of threads to use
cutadapt_qc_score:
tool: cutadapt
rule: cutadapt_SE
type: radio
label: Quality score encoding
cutadapt_qc_min:
tool: cutadapt
rule: cutadapt_SE
type: numeric
label: trim low-quality 3'ends from reads before adapter removal
cutadapt_max_N:
tool: cutadapt
rule: cutadapt_PE
type: numeric
label: Discard reads with more than X N bases. If X is an number between 0 and
1, it is interpreted as a fraction of the read length
cutadapt_a:
tool: cutadapt
rule: cutadapt_SE
type: text
label: Forward adapter sequence
cutadapt_A:
tool: cutadapt
rule: cutadapt_PE
type: text
label: Reverse adapter sequence
dada2_threads:
tool: dada2
rule: dada2
type: numeric
label: Number of threads to use
dada2_no_filter_and_trim:
tool: dada2
rule: dada2
type: checkbox
label: Skip filter and trim step
dada2_remove_chim:
tool: dada2
rule: dada2
type: checkbox
label: Remove chimeras
dada2_marker_type:
tool: dada2
rule: dada2
type: select
label: Marker type
dada2_taxonomypath:
tool: dada2
rule: dada2
type: select
label: Taxonomy source (choose unite for ITS marker type)
dada2_truncLen_forward:
tool: dada2
rule: dada2
type: numeric
label: 'truncLen forward : Truncate reads after truncLen bases. Reads shorter
than this are discarded'
dada2_truncLen_reverse:
tool: dada2
rule: dada2
type: numeric
label: 'truncLen reverse : Truncate reads after truncLen bases. Reads shorter
than this are discarded'
dada2_maxEE_forward:
tool: dada2
rule: dada2
type: numeric
label: 'maxEE forward : After truncation, reads with higher than maxEE ''expected
errors'' will be discarded. Expected errors are calculated from the nominal
definition of the quality score: EE = sum(10^(-Q/10)). Type -1 for Infinity
(no filtering).'
dada2_maxEE_reverse:
tool: dada2
rule: dada2
type: numeric
label: 'maxEE reverse : After truncation, reads with higher than maxEE ''expected
errors'' will be discarded. Expected errors are calculated from the nominal
definition of the quality score: EE = sum(10^(-Q/10)). Type -1 for Infinity
(no filtering).'
prepare_report_scripts: []
prepare_report_outputs: {}
outputs:
......
......@@ -21,9 +21,6 @@ echo "To get help for an app :\nsingularity help --app appName this_container.si
echo "To run an app :\nsingularity run --app appName this_container.sif"
########################
# App UI
########################
%apprun UI
exec Rscript -e "shiny::runApp('/sagApp/app.R',host='$1',port=$2)"
......@@ -33,9 +30,6 @@ You must also provide the host address and port where the shiny app will be laun
exemple : singularity run --app UI -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif 127.0.0.1 1234
########################
# App Snakemake
########################
%apprun Snakemake
configfile=$1
cores=$2
......@@ -48,9 +42,7 @@ To run the Snakemake app you should bind data and results directories like in th
You must also provide the configfile and the number of cores provided to snakemake command (you can add other parameters after these two)
exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif myconfig.yml 16 otherparams
########################
# App getConfigfile
########################
%apprun getConfigfile
exec cp /workflow/params.total.yml ./params.yml
......@@ -58,11 +50,21 @@ exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /p
To run the getConfigfile app you dont need to bind directories. This app will only copy the default parameters file from the container to your local disk.
exemple : singularity run --app getConfigfile this_container.sif
%apprun getSamples
exec python3 /workflow/get_samples.py $1 $2
%apphelp getSamples
To run the getSamples app you need to bind the data directory. This app will give you the list of samples detected in a given directory and their file suffix.
exemple : singularity run --app getSamples -B /path/to/data/directory:/Data this_container.sif /Data PE
%help
This container contains three apps (UI, Snakemake and getConfigfile).
This container contains four apps (UI, Snakemake, getConfigfile and getSamples).
* UI is a user interface to set up the workflow and launch it.
* Snakemake let you provide your configfile and other parameters to the snakemake command and launch it.
* getConfigfile give you a copy of a default parameters file to fill and use with the Snakemake app
* getConfigfile gives you a copy of a default parameters file to fill and use with the Snakemake app.
* getSamples gives you the list of samples detected in a given directory and their file suffix (usefull for filling samples and sample_suffix in parameters file).
To get help for an app :
singularity help --app appName this_container.sif
To run an app :
......
......@@ -11,7 +11,7 @@ MenuGauche = sidebarMenu(id="sidebarmenu",
tags$br(),
numericInput("cores", label = "Threads available", min = 1, max = 24, step = 1, width = "auto", value = 16),
selectInput("final_step", label = "Select the step to reach : ", selected = "all", choices = list('Trimming'='trimming','Dada2'='dada2',"All"="all")), tags$br(),
selectInput("force_from", label = "Start again from a step : ", selected = "none", choices = list('none'='none','Trimming'='trimming','Dada2'='dada2',"All"="all")), tags$br(),
actionButton("RunPipeline", "Run pipeline", icon("play"), class="btn btn-info"),
actionButton("StopPipeline", "Stop pipeline", icon("stop"), class="btn btn-secondary"),
......
......@@ -17,8 +17,6 @@ box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE,
)
,
textInput("sample_suffix", label = "Samples suffix: ", value = ".fastq.gz", width = "auto"),
radioButtons("SeOrPe", label = "Single end reads (SE) or Paired end reads (PE): ", choices = list("Single end" = "SE", "Paired end" = "PE"), selected = "PE", width = "auto"),
textAreaInput("memo", label = "Text area for the user", value = "")
......
......@@ -13,12 +13,6 @@ save_params <- function(path_param){
res = paste0(res, paste("sample_dir:", paste0('"', input$sample_dir, '"'), "\n", sep = " "))
}
if(!is.na(as.numeric(input$sample_suffix))) {
res = paste0(res, paste("sample_suffix:", input$sample_suffix, "\n", sep = " "))
} else {
res = paste0(res, paste("sample_suffix:", paste0('"', input$sample_suffix, '"'), "\n", sep = " "))
}
if(!is.na(as.numeric(input$SeOrPe))) {
res = paste0(res, paste("SeOrPe:", input$SeOrPe, "\n", sep = " "))
} else {
......@@ -125,8 +119,6 @@ save_params <- function(path_param){
res = paste0(res, paste("dada2_maxEE_reverse:", paste0('"', input$dada2_maxEE_reverse, '"'), "\n", sep = " "))
}
res = paste0(res, paste("final_step:", paste0('"', input$final_step, '"'), "\n", sep = " "))
a = yaml.load_file("/workflow/params.total.yml")
p = a[["params"]]
a["params"] = NULL
......@@ -140,30 +132,27 @@ save_params <- function(path_param){
return(result)
}
d = c(d,a)
samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," /Data/groups.csv ", input$sample_suffix),intern = T))
d$samples = names(samples)
names(samples) = NULL
d$groups = unlist(samples)
get_samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," ",input$SeOrPe),intern = T))
d$samples = get_samples$samples
d$params$sample_suffix = get_samples$suffix
write_yaml(d,path_param,handlers=list(logical = logical))
}
compare_params = function(dossierAnalyse){
if (!file.exists(paste0(dossierAnalyse,"/lastrun/params.yml"))){
return(c())
force_rule <- function(force_from){
if (input$force_from=="none"){
return("")
}
else{
new_params = yaml.load_file(paste0(dossierAnalyse,"/params.yml"))
old_params = yaml.load_file(paste0(dossierAnalyse,"/workflow/params.yml"))
changed = new_params[!(new_params %in% old_params)]
rules = c()
if (length(changed)>=1){
for (param in names(changed)){
if (!grepl("_threads$",param)){
rules = c(rules, new_params$params_info[[param]]$rule)
}
}
else if (input$force_from=="all"){ return("--forcerun all") }
else {
params = yaml.load_file(paste0(input$results_dir,"/params.yml"))
outputs = params[["outputs"]]
tool = params[["params"]][[force_from]]
if (length(outputs[[tool]])==1)
rule = names(outputs[[tool]])[[1]]
else{
rule = names(outputs[[tool]])[[grep(input$SeOrPe,names(outputs[[tool]]))]]
}
return(unique(rules))
return(paste0("--forcerun ",rule))
}
}
#' Event when use RULEGRAPH button
......@@ -212,21 +201,10 @@ observeEvent(input$RunPipeline, {
if (!file.exists(paste0(input$results_dir,"/logs/runlog.txt"))){
file.create(paste0(input$results_dir,"/logs/runlog.txt"))
}
forcerun = compare_params(input$results_dir)
if (length(forcerun>1)){
rules = paste(forcerun, collapse=" ")
forcerun = paste(" --forcerun ",rules)
showModal(modalDialog(
title = "Params have changed since the last run",
forcerun
))
}
else{
forcerun = ""
}
system(paste0("touch ",input$results_dir,"/logs/workflow.running"),wait = T)
system(paste0("snakemake -s /workflow/Snakefile --configfile ",input$results_dir,"/params.yml -d ",input$results_dir," all --rulegraph | dot -Tpng -Gratio=0.75 > ",input$results_dir,"/Rule_graph_mqc.png"))
system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " --forcerun all -d ", input$results_dir , " --cores ", input$cores, " all ", forcerun),wait = FALSE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt"))
force = force_rule(input$force_from)
system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " -d ", input$results_dir , " --cores ", input$cores, " all ", force),wait = FALSE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt"))
tags$iframe(src="results/multiqc_report.html",width="100%", height="900px")},
error = function(e){
system(paste0("touch ",input$results_dir,"/logs/workflow_end.error"),wait = T)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment