Commit 099ecf64 authored by mmassaviol's avatar mmassaviol
Browse files

Remove get_samples and add raw_inputs system

parent ba826f7b
......@@ -124,6 +124,12 @@ def generate_workflow_params(pipeline):
template["title"] = "Global parameters :"
for option in workflow["options"]:
template["content"].append(option)
if "input" in workflow:
raw_input = read_yaml("raw_inputs/" + workflow["input"] + ".yaml")
for option in raw_input["options"]:
template["content"].append(option)
template["content"].append(
yaml.load("{name: memo, type: textArea, value: '', label: 'Text area for the user'}")
)
......
......@@ -128,6 +128,9 @@ def generate_snakefile(pipeline, out_file):
else:
params_equals = list()
if "input" in params:
res += "from "+params["input"]+" import "+params["input"]+"\n"
path = "workflows/" + pipeline + "/" + pipeline + ".snakefile"
with open(path, "r") as mainrule:
res += mainrule.read()
......@@ -327,6 +330,9 @@ def generate_pipeline_files(workflow, out_dir, local_config="default"):
out_dir + "/files/scripts",
)
if "input" in workflow_yaml:
shutil.copy("./raw_inputs/"+workflow_yaml["input"]+".py", out_dir+"/files")
shutil.copy("./get_samples.py", out_dir+"/files")
shutil.copy("./generate_multiqc_config.py", out_dir+"/files")
shutil.copy("./tools.py", out_dir+"/files")
......
......@@ -33,7 +33,7 @@ def generate(name):
result += "\n"
result += "SAMPLES = config[\"samples\"]\n"
#result += "SAMPLES = config[\"samples\"]\n"
result += "STEPS = config[\"steps\"]\n"
result += "PREPARE_REPORT_OUTPUTS = config[\"prepare_report_outputs\"]\n"
result += "PREPARE_REPORT_SCRIPTS = config[\"prepare_report_scripts\"]\n"
......@@ -51,16 +51,23 @@ def generate(name):
result += "\n"
result += "# Generic input functions\n"
result += "## get raw_reads\n"
result += "def raw_reads():\n"
result += "\tinputs = dict()\n"
result += "\tif (config[\"SeOrPe\"] == \"PE\"):\n"
result += "\t\tinputs[\"read\"] = config['sample_dir']+'/{sample}_R1'+config[\"sample_suffix\"]\n"
result += "\t\tinputs[\"read2\"] = config['sample_dir']+'/{sample}_R2'+config[\"sample_suffix\"]\n"
result += "\telse:\n"
result += "\t\tinputs[\"read\"] = config['sample_dir']+'/{sample}'+config[\"sample_suffix\"]\n"
result += "\treturn inputs\n"
if "input" in yaml:
raw_inputs_yaml = read_yaml("./raw_inputs/"+yaml["input"]+".yaml")
result += "# raw_inputs function call\n"
result += yaml["input"]+" = "+raw_inputs_yaml["function_call"]+"\n"
result += "SAMPLES = "+yaml["input"]+"['samples']\n"
#else : # OLD (to replace for all workflows with input raw_reads)
# result += "# Generic input functions\n"
# result += "## get raw_reads\n"
# result += "def raw_reads():\n"
# result += "\tinputs = dict()\n"
# result += "\tif (config[\"SeOrPe\"] == \"PE\"):\n"
# result += "\t\tinputs[\"read\"] = config['sample_dir']+'/{sample}_R1'+config[\"sample_suffix\"]\n"
# result += "\t\tinputs[\"read2\"] = config['sample_dir']+'/{sample}_R2'+config[\"sample_suffix\"]\n"
# result += "\telse:\n"
# result += "\t\tinputs[\"read\"] = config['sample_dir']+'/{sample}'+config[\"sample_suffix\"]\n"
# result += "\treturn inputs\n"
# result += "raw_reads = raw_reads()\n"
result += "\n"
......@@ -81,7 +88,7 @@ def generate(name):
if param["origin_command"] == "raw_reads":
result += "\tinputs[\"" + param["input_name"] + "\"] = raw_reads()[\"read\"]\n"
result += "\tinputs[\"" + param["input_name"] + "\"] = raw_reads[\""+param["origin_name"]+"\"]\n"
else:
result += "\tinputs[\"" + param["input_name"] + "\"] = rules." + param["origin_command"] + ".output." + param["origin_name"] + "\n"
......@@ -110,10 +117,14 @@ def generate(name):
for step in yaml["steps_in"]:
if ("step_name" in step) and ("tool_name" in step):
result += "\tif (step == \"" + step["step_name"] + "\"):\n"
result += "\t\toutputs = rules." + step["tool_name"] + ".output\n"
if ("step_name" in step) and ("rule_name" in step):
add = ""
if ("_SE" in step["rule_name"]):
add = "and config['SeOrPe'] == 'SE'"
if ("_PE" in step["rule_name"]):
add = "and config['SeOrPe'] == 'PE'"
result += "\tif (step == \"" + step["step_name"] + "\" " + add + " ):\n"
result += "\t\toutputs = rules." + step["rule_name"] + ".output\n"
result += "\t\t\n"
......
import os
import re
import sys
def raw_reads(results_dir, sample_dir, SeOrPe):
samples = list()
PE_mark = "" # _R or _
suffixes = list()
dicoSamples = dict() # sample_name: file(s)
files = os.listdir(sample_dir)
if SeOrPe == "PE":
regex = re.compile(r"^(.+)(_R1|_R2|_1|_2)(.+)")
else:
regex = re.compile(r"^(.+?)(\..*)")
for file in files:
res = re.match(regex, file)
if res:
if res.group(1) not in samples:
samples.append(res.group(1))
if SeOrPe == "PE":
suffixes.append(res.group(3))
if len(res.group(2)) == 3:
PE_mark = "_R"
else:
PE_mark = "_"
else:
suffixes.append(res.group(2))
if res.group(1) not in dicoSamples.keys():
dicoSamples[res.group(1)] = list()
dicoSamples[res.group(1)].append(file)
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab:
if SeOrPe == "PE":
sampleTab.write("sample\tfile_read_1\tfile_read_2")
else:
sampleTab.write("sample\tfile_read_1")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'suffix': suffix, 'dico': dicoSamples}
if SeOrPe == "SE":
out ["read"] = os.path.join(sample_dir,"{sample}"+suffix)
else:
out ["read"] = os.path.join(sample_dir,"{sample}"+PE_mark+"1"+suffix)
out ["read2"] = os.path.join(sample_dir,"{sample}"+PE_mark+"2"+suffix)
return out
else:
exit("Files have different suffixes:" + ','.join(suffixes))
#print(raw_reads(sys.argv[1],sys.argv[2],sys.argv[3]))
\ No newline at end of file
{
name: raw_reads,
function_call: "raw_reads(config['results_dir'], config['sample_dir'], config['SeOrPe'])",
options: [
{
name: "sample_dir",
type: "input_dir",
value: "/Data",
label: "Directory containing the fastq files: ",
volumes: [Data: "/Data", Results: "/Results"]
},
{
name: "SeOrPe",
type: "radio",
value: "SE",
choices: [Single end: SE, Paired end: PE],
label: "Single end reads (SE) or Paired end reads (PE): "
}
]
}
\ No newline at end of file
......@@ -7,7 +7,6 @@ import csv
# Wildcards #
#############
SAMPLES = config["samples"]
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
......@@ -19,28 +18,14 @@ config = config["params"]
# Inputs #
##########
# Generic input functions
## get raw_reads
def raw_reads():
inputs = dict()
if (config["SeOrPe"] == "PE"):
inputs["read"] = config['sample_dir']+'/{sample}_R1'+config["sample_suffix"]
inputs["read2"] = config['sample_dir']+'/{sample}_R2'+config["sample_suffix"]
elif (config["SeOrPe"] == "SE"):
inputs["read"] = config['sample_dir']+'/{sample}'+config["sample_suffix"]
else:
sys.exit("SeOrPe should be SE or PE")
return inputs
## get reads (trimmed or raw)
def reads():
return raw_reads()
# raw_inputs function call
raw_reads = raw_reads(config['results_dir'], config['sample_dir'], config['SeOrPe'])
SAMPLES = raw_reads['samples']
# Tools inputs functions
def fastp_inputs():
return raw_reads()
return raw_reads
def jellyfish_count_inputs():
inputs = dict()
......@@ -51,9 +36,9 @@ def jellyfish_count_inputs():
else:
inputs["read"] = expand(rules.fastp_SE.output.read,sample=SAMPLES)
else:
inputs["read"] = expand(raw_reads()["read"],sample=SAMPLES)
inputs["read"] = expand(raw_reads["read"],sample=SAMPLES)
if (config["SeOrPe"] == "PE"):
inputs["read2"] = expand(raw_reads()["read2"],sample=SAMPLES)
inputs["read2"] = expand(raw_reads["read2"],sample=SAMPLES)
return inputs
{import global_functions}
......
......@@ -4,6 +4,7 @@
description: "",
version: "0.0.1",
author: "MBB",
input: raw_reads,
steps:
[
{ title: Preprocessing, name: preprocessing, tools: [fastp,"null"], default: "null" },
......@@ -18,19 +19,5 @@
label: "Results directory: ",
volumes: [Results: "/Results"]
},
{
name: "sample_dir",
type: "input_dir",
value: "/Data",
label: "Data directory: ",
volumes: [Data: "/Data", Results: "/Results"]
},
{
name: "SeOrPe",
type: "radio",
value: "PE",
choices: [Single end: SE, Paired end: PE],
label: "Single end reads (SE) or Paired end reads (PE): ",
},
],
}
\ No newline at end of file
......@@ -7,7 +7,6 @@ import csv
# Wildcards #
#############
SAMPLES = config["samples"]
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
......@@ -19,28 +18,21 @@ config = config["params"]
# Inputs #
##########
# Generic input functions
## get raw_reads
def raw_reads():
inputs = dict()
if (config["SeOrPe"] == "PE"):
inputs["read"] = config['sample_dir']+'/{sample}_R1'+config["sample_suffix"]
inputs["read2"] = config['sample_dir']+'/{sample}_R2'+config["sample_suffix"]
else:
inputs["read"] = config['sample_dir']+'/{sample}'+config["sample_suffix"]
return inputs
# raw_inputs function call
raw_reads = raw_reads(config['results_dir'], config['sample_dir'], config['SeOrPe'])
SAMPLES = raw_reads['samples']
# Tools inputs functions
def cutadapt_inputs():
return raw_reads()
return raw_reads
def dada2_inputs():
inputs = dict()
if config["trimming"] == 'null':
inputs["read"] = expand(raw_reads()["read"],sample=SAMPLES)
inputs["read"] = expand(raw_reads["read"],sample=SAMPLES)
if (config["SeOrPe"] == "PE"):
inputs["read2"] = expand(raw_reads()["read2"],sample=SAMPLES)
inputs["read2"] = expand(raw_reads["read2"],sample=SAMPLES)
if config["trimming"] == 'cutadapt':
if (config["SeOrPe"] == "SE"):
inputs["read"] = expand(rules.cutadapt_SE.output[0],sample=SAMPLES)
......@@ -65,9 +57,9 @@ def dada2_learn_errors_inputs():
inputs["filtRs"] = expand(rules.cutadapt_PE.output.read2_trimmed,sample=SAMPLES)
inputs["cutadapt_stats"] = config["results_dir"]+"/"+config["cutadapt_PE_output_dir"]+"/cutadapt_stats.tsv"
else: # no trimming
inputs["filtFs"] = expand(raw_reads()["read"],sample=SAMPLES)
inputs["filtFs"] = expand(raw_reads["read"],sample=SAMPLES)
if (config["SeOrPe"] == "PE"):
inputs["filtRs"] = expand(raw_reads()["read2"],sample=SAMPLES)
inputs["filtRs"] = expand(raw_reads["read2"],sample=SAMPLES)
return inputs
{import global_functions}
......
......@@ -4,6 +4,7 @@
description: ,
version: "0.0.1",
author: "MBB",
input: raw_reads,
steps:
[
{ title: Trimming, name: trimming, tools: [cutadapt, "null"], default: "null" },
......@@ -18,25 +19,5 @@
label: "Results directory: ",
volumes: [Results: "/Results"]
},
{
name: "sample_dir",
type: "input_dir",
value: "/Data",
label: "Data directory: ",
volumes: [Data: "/Data", Results: "/Results"]
},
#{
# name: "sample_suffix",
# type: "text",
# value: ".fastq.gz",
# label: "Samples suffix: ",
#},
{
name: "SeOrPe",
type: "radio",
value: "PE",
choices: [Single end: SE, Paired end: PE],
label: "Single end reads (SE) or Paired end reads (PE): ",
},
],
}
\ No newline at end of file
......@@ -7,7 +7,6 @@ import csv
# Wildcards #
#############
SAMPLES = config["samples"]
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
......@@ -19,28 +18,21 @@ config = config["params"]
# Inputs #
##########
# Generic input functions
## get raw_reads
def raw_reads():
inputs = dict()
if (config["SeOrPe"] == "PE"):
inputs["read"] = config['sample_dir']+'/{sample}_R1'+config["sample_suffix"]
inputs["read2"] = config['sample_dir']+'/{sample}_R2'+config["sample_suffix"]
else:
inputs["read"] = config['sample_dir']+'/{sample}'+config["sample_suffix"]
return inputs
# raw_inputs function call
raw_reads = raw_reads(config['results_dir'], config['sample_dir'], config['SeOrPe'])
SAMPLES = raw_reads['samples']
# Tools inputs functions
def cutadapt_inputs():
return raw_reads()
return raw_reads
def mitoz_filter_inputs():
inputs = dict()
if config["trimming"] == 'null':
inputs["read"] = expand(raw_reads()["read"],sample=SAMPLES)
inputs["read"] = expand(raw_reads["read"],sample=SAMPLES)
if (config["SeOrPe"] == "PE"):
inputs["read2"] = expand(raw_reads()["read2"],sample=SAMPLES)
inputs["read2"] = expand(raw_reads["read2"],sample=SAMPLES)
if config["trimming"] == 'cutadapt':
if (config["SeOrPe"] == "SE"):
inputs["read"] = expand(rules.cutadapt_SE.output.read,sample=SAMPLES)
......
......@@ -4,6 +4,7 @@
description: ,
version: "0.0.1",
author: "MBB",
input: raw_reads,
steps:
[
{ title: Trimming, name: trimming, tools: [cutadapt, "null"], default: "null" },
......@@ -42,22 +43,9 @@
choices: [Single end: SE, Paired end: PE],
label: "Single end reads (SE) or Paired end reads (PE): ",
},
{
name: "clade",
type: "radio",
value: "Arthropoda",
choices: [
Arthropoda: Arthropoda,
Chordata: Chordata,
Echinodermata: Echinodermata,
Annelida-segmented-worms: Annelida-segmented-worms,
Bryozoa: Bryozoa,
Mollusca: Mollusca,
Nematoda: Nematoda,
Nemertea-ribbon-worms: Nemertea-ribbon-worms,
Porifera-sponges: Porifera-sponges
],
label: "Taxa group for MitoZ : ",
},
],
params_equals: [
{ param_A: mitoz_filter_clade, param_B: mitoz_assemble_clade },
{ param_A: mitoz_filter_clade, param_B: mitoz_annotate_clade },
]
}
\ No newline at end of file
......@@ -7,7 +7,6 @@ import csv
# Wildcards #
#############
SAMPLES = config["samples"]
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
......@@ -19,38 +18,24 @@ config = config["params"]
# Inputs #
##########
# Generic input functions
## get raw_reads
def raw_reads():
inputs = dict()
if (config["SeOrPe"] == "PE"):
inputs["read"] = config['sample_dir']+'/{sample}_R1'+config["sample_suffix"]
inputs["read2"] = config['sample_dir']+'/{sample}_R2'+config["sample_suffix"]
elif (config["SeOrPe"] == "SE"):
inputs["read"] = config['sample_dir']+'/{sample}'+config["sample_suffix"]
else:
sys.exit("SeOrPe should be SE or PE")
return inputs
## get reads (trimmed or raw)
def reads():
return raw_reads()
# raw_inputs function call
raw_reads = raw_reads(config['results_dir'], config['sample_dir'], config['SeOrPe'])
SAMPLES = raw_reads['samples']
# Tools inputs functions
def fastqc_inputs():
inputs = dict()
inputs["read"] = expand(raw_reads()["read"],sample=SAMPLES)
inputs["read"] = expand(raw_reads["read"],sample=SAMPLES)
if (config["SeOrPe"] == "PE"):
inputs["read2"] = expand(raw_reads()["read2"],sample=SAMPLES)
inputs["read2"] = expand(raw_reads["read2"],sample=SAMPLES)
return inputs
def megahit_inputs():
inputs = dict()
inputs["read"] = expand(raw_reads()["read"],sample=SAMPLES)
inputs["read"] = expand(raw_reads["read"],sample=SAMPLES)
if (config["SeOrPe"] == "PE"):
inputs["read2"] = expand(raw_reads()["read2"],sample=SAMPLES)
inputs["read2"] = expand(raw_reads["read2"],sample=SAMPLES)
return inputs
def mitoz_findmitoscaf_inputs():
......@@ -64,9 +49,9 @@ def mitoz_findmitoscaf_inputs():
def mitoz_annotate_inputs():
inputs = dict()
inputs["fastafile"] = rules.mitoz_findmitoscaf.output.mitogenome
inputs["read"] = expand(raw_reads()["read"],sample=SAMPLES)
inputs["read"] = expand(raw_reads["read"],sample=SAMPLES)
if (config["SeOrPe"] == "PE"):
inputs["read2"] = expand(raw_reads()["read2"],sample=SAMPLES)
inputs["read2"] = expand(raw_reads["read2"],sample=SAMPLES)
return inputs
def igv_visualize_inputs():
......
......@@ -4,6 +4,7 @@
description: ,
version: "0.0.1",
author: "MBB",
input: raw_reads,
steps:
[
{ title: Quality check, name: quality_check, tools: [fastqc,"null"], default: fastqc },
......@@ -21,47 +22,8 @@
label: "Results directory: ",
volumes: [Results: "/Results"]
},
{
name: "help_sample",
type: "help",
label: "The Data directory must only contain the reads files (fastq). In case of paired end reads, the files names must contain '_R1' or '_R2'",
},
{
name: "sample_dir",
type: "input_dir",
value: "/Data",
label: "Data directory: ",
volumes: [Data: "/Data", Results: "/Results"]
},
#{
# name: "sample_suffix",
# type: "text",
# value: ".fastq.gz",
# label: "Samples suffix: ",
#},
{
name: "SeOrPe",
type: "radio",
value: "PE",
choices: [Single end: SE, Paired end: PE],
label: "Single end reads (SE) or Paired end reads (PE): ",
},
{
name: "clade",
type: "radio",
value: "Arthropoda",
choices: [
Arthropoda: Arthropoda,
Chordata: Chordata,
Echinodermata: Echinodermata,
Annelida-segmented-worms: Annelida-segmented-worms,
Bryozoa: Bryozoa,
Mollusca: Mollusca,
Nematoda: Nematoda,
Nemertea-ribbon-worms: Nemertea-ribbon-worms,
Porifera-sponges: Porifera-sponges
],
label: "Taxa group for MitoZ : ",
},
],
params_equals: [
{ param_A: find_mitoscaf_clade, param_B: mitoz_annotate_clade },
]
}
\ No newline at end of file
......@@ -7,7 +7,6 @@ import csv
# Wildcards #
#############
SAMPLES = config["samples"]
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
......@@ -29,24 +28,15 @@ individus = get_individus()
# Inputs #
##########
# Generic input functions
## get raw_reads
def raw_reads():
inputs = dict()
if (config["SeOrPe"] == "PE"):
inputs["read"] = config['sample_dir']+'/{sample}_R1'+config["sample_suffix"]
inputs["read2"] = config['sample_dir']+'/{sample}_R2'+config["sample_suffix"]
elif (config["SeOrPe"] == "SE"):
inputs["read"] = config['sample_dir']+'/{sample}'+config["sample_suffix"]
else:
sys.exit("SeOrPe should be SE or PE")
return inputs
# raw_inputs function call
raw_reads = raw_reads(config['results_dir'], config['sample_dir'], config['SeOrPe'])
SAMPLES = raw_reads['samples']
## get reads (trimmed or raw)
def reads():
inputs = dict()
if (config["trimming"] == "null"):
return raw_reads()
return raw_reads
elif (config["trimming"] == "trimmomatic"):
if (config["SeOrPe"] == "SE"):
inputs["read"] = rules.trimmomatic_SE.output.read
......@@ -58,14 +48,14 @@ def reads():
# Tools inputs functions
def fastqc_inputs():
return raw_reads()
return raw_reads
def process_radtags_inputs():
return raw_reads()
return raw_reads
def ustacks_inputs():
if (config["demultiplexing"] == "null"):
return raw_reads()
return raw_reads
else:
inputs = dict()
if (config["SeOrPe"] == "PE"):
......
......@@ -4,6 +4,7 @@
description: RADseq analysis of data without reference genome,
version: "0.0.1",
author: "MBB",