from tools import * from raw_reads import raw_reads workdir: config['params']['results_dir'] import os import re import snakemake.utils import csv ############# # Wildcards # ############# STEPS = config["steps"] PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"] PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"] OUTPUTS = config["outputs"] PARAMS_INFO = config["params_info"] MULTIQC = config["multiqc"] config = config["params"] ########## # Inputs # ########## # raw_inputs function call raw_reads = raw_reads(config['results_dir'], config['sample_dir'], config['SeOrPe']) config.update(raw_reads) SAMPLES = raw_reads['samples'] def get_individus(): if (config["demultiplexing"] == "null"): return SAMPLES else: with open(config["demultiplexing__demultiplexing_astrid_cruaud_barcodes"], mode="r") as infile: reader = csv.reader(infile,delimiter='\t') next(reader, None) # remove header (first line) return [row[0] for row in reader] INDIVIDUS = get_individus() # Tools inputs functions def quality_control__fastqc_PE_inputs(): inputs = dict() inputs["read"] = raw_reads["read"] inputs["read2"] = raw_reads["read2"] return inputs def quality_control__fastqc_SE_inputs(): inputs = dict() inputs["read"] = raw_reads["read"] return inputs def qcfilter_adaptertrim__trimmomatic_PE_inputs(): inputs = dict() inputs["read"] = raw_reads["read"] inputs["read2"] = raw_reads["read2"] return inputs def qcfilter_adaptertrim__trimmomatic_SE_inputs(): inputs = dict() inputs["read"] = raw_reads["read"] return inputs def merge_overlapps__flash_inputs(): inputs = dict() inputs["read"] = rules.qcfilter_adaptertrim__trimmomatic_PE.output.readFP inputs["read2"] = rules.qcfilter_adaptertrim__trimmomatic_PE.output.readRP return inputs def demultiplexing__demultiplexing_astrid_cruaud_inputs(): inputs = dict() inputs["extended_frags"] = expand(rules.merge_overlapps__flash.output.extendedFrags, sample=SAMPLES) return inputs def assembling__velvet_inputs(): inputs = dict() inputs["merged_reads"] = config["results_dir"]+"/"+config["demultiplexing__demultiplexing_astrid_cruaud_output_dir"]+"/{sample}_fq" return inputs def contigmaptouce__lastz_inputs(): inputs = dict() inputs["contigs"] = rules.assembling__velvet.output.contigs return inputs def mapping_check_bf__samtools_stats_inputs(): inputs = dict() inputs["bam"] = rules.contigmaptouce__lastz.output.align_on_ref return inputs def mapping_check_af__samtools_stats_inputs(): inputs = dict() inputs["bam"] = rules.contigmaptouce__lastz.output.filtered return inputs def convert_to_phylip__sam_to_phylip_inputs(): inputs = dict() inputs["sams"] = expand(rules.contigmaptouce__lastz.output.filtered, sample=INDIVIDUS) return inputs def prepare_report_inputs(): inputs = list() for step in STEPS: inputs.extend(step_outputs(step["name"])) return inputs def prepare_report_scripts(): scripts = list() for step in STEPS: tool = config[step["name"]] prefix = tool+".prepare.report." if type(PREPARE_REPORT_SCRIPTS) == type(""): if prefix in PREPARE_REPORT_SCRIPTS: scripts.append("/workflow/scripts/"+PREPARE_REPORT_SCRIPTS) else : script = [s for s in PREPARE_REPORT_SCRIPTS if prefix in s] if (len(script)==1): scripts.append("/workflow/scripts/"+script[0]) return scripts def prepare_report_outputs(): outputs = list() outputs.append(config["results_dir"] + "/outputs_mqc.csv") for step in STEPS: tool = config[step["name"]] if (tool in PREPARE_REPORT_OUTPUTS.keys()): if type(PREPARE_REPORT_OUTPUTS[tool]) == type(""): outputs.append(config["results_dir"]+"/"+tool+"/"+PREPARE_REPORT_OUTPUTS[tool]) else: for output in PREPARE_REPORT_OUTPUTS[tool]: outputs.append(config["results_dir"]+"/"+tool+"/"+output) return outputs def multiqc_inputs(): # Need prepare_report inputs and outputs in case prepare_reports has no outputs return prepare_report_outputs() ########### # Outputs # ########### def step_outputs(step): outputs = list() if (step == "quality_control" and config['SeOrPe'] == 'PE' ): outputs = expand(rules.quality_control__fastqc_PE.output, sample=SAMPLES) if (step == "qcfilter_adaptertrim" and config['SeOrPe'] == 'PE' ): outputs = expand(rules.qcfilter_adaptertrim__trimmomatic_PE.output, sample=SAMPLES) if (step == "merge_overlapps" ): outputs = expand(rules.merge_overlapps__flash.output, sample=SAMPLES) if (step == "demultiplexing" ): outputs = rules.demultiplexing__demultiplexing_astrid_cruaud.output if (step == "assembling" ): outputs = expand(rules.assembling__velvet.output, sample=INDIVIDUS) if (step == "contigmaptouce" ): outputs = expand(rules.contigmaptouce__lastz.output, sample=INDIVIDUS) if (step == "mapping_check_bf" ): outputs = expand(rules.mapping_check_bf__samtools_stats.output, sample=INDIVIDUS) if (step == "mapping_check_af" ): outputs = expand(rules.mapping_check_af__samtools_stats.output, sample=INDIVIDUS) if (step == "convert_to_phylip" ): outputs = rules.convert_to_phylip__sam_to_phylip.output if (step == "all"): outputs = list(rules.multiqc.output) return outputs # get outputs for each choosen tools def workflow_outputs(step): outputs = list() outputs.extend(step_outputs(step)) return outputs ######### # Rules # ######### rule quality_control__fastqc_SE: input: **quality_control__fastqc_SE_inputs() output: html = config["results_dir"]+'/'+config["quality_control__fastqc_SE_output_dir"]+'/{sample}'+config["sample_suffix"].split(".")[0]+'_fastqc.html', zip = config["results_dir"]+'/'+config["quality_control__fastqc_SE_output_dir"]+'/{sample}'+config["sample_suffix"].split(".")[0]+'_fastqc.zip', log: config["results_dir"]+'/logs/' + config["quality_control__fastqc_SE_output_dir"] + '/{sample}_fastqc_log.txt' threads: 1 params: command = config["quality_control__fastqc_SE_command"], output_dir = config["results_dir"]+'/'+config["quality_control__fastqc_SE_output_dir"] shell: "{params.command} " "{input.read} " "-t {threads} " "-o {params.output_dir} " "--quiet " "|& tee {log}" rule quality_control__fastqc_PE: input: **quality_control__fastqc_PE_inputs() output: html1 = config["results_dir"]+'/'+config["quality_control__fastqc_PE_output_dir"]+'/{sample}_R1'+config["sample_suffix"].split(".")[0]+'_fastqc.html', zip1 = config["results_dir"]+'/'+config["quality_control__fastqc_PE_output_dir"]+'/{sample}_R1'+config["sample_suffix"].split(".")[0]+'_fastqc.zip', html2 = config["results_dir"]+'/'+config["quality_control__fastqc_PE_output_dir"]+'/{sample}_R2'+config["sample_suffix"].split(".")[0]+'_fastqc.html', zip2 = config["results_dir"]+'/'+config["quality_control__fastqc_PE_output_dir"]+'/{sample}_R2'+config["sample_suffix"].split(".")[0]+'_fastqc.zip', log: config["results_dir"]+'/logs/' + config["quality_control__fastqc_PE_output_dir"] + '/{sample}_fastqc_log.txt' threads: 2 params: command = config["quality_control__fastqc_PE_command"], output_dir = config["results_dir"]+'/'+config["quality_control__fastqc_PE_output_dir"] shell: "{params.command} " "{input.read} " "{input.read2} " "-t {threads} " "-o {params.output_dir} " "--quiet " "|& tee {log}" ruleorder: quality_control__fastqc_PE > quality_control__fastqc_SE rule qcfilter_adaptertrim__trimmomatic_PE: input: **qcfilter_adaptertrim__trimmomatic_PE_inputs() output: readFP = config["results_dir"]+"/"+config["qcfilter_adaptertrim__trimmomatic_PE_output_dir"]+"/{sample}_forward_paired.fq.gz", readFU = config["results_dir"]+"/"+config["qcfilter_adaptertrim__trimmomatic_PE_output_dir"]+"/{sample}_forward_unpaired.fq.gz", readRP = config["results_dir"]+"/"+config["qcfilter_adaptertrim__trimmomatic_PE_output_dir"]+"/{sample}_reverse_paired.fq.gz", readRU = config["results_dir"]+"/"+config["qcfilter_adaptertrim__trimmomatic_PE_output_dir"]+"/{sample}_reverse_unpaired.fq.gz", log: config["results_dir"]+'/logs/' + config["qcfilter_adaptertrim__trimmomatic_PE_output_dir"] + '/{sample}_trimmomatic_log.txt' params: command = config["qcfilter_adaptertrim__trimmomatic_PE_command"], qc_score = config["qcfilter_adaptertrim__trimmomatic_qc_score"], ILLUMINACLIP = "ILLUMINACLIP:" + config["qcfilter_adaptertrim__trimmomatic_fastaWithAdapters"] + ":" + config["qcfilter_adaptertrim__trimmomatic_illuminaclip"] if (config["qcfilter_adaptertrim__trimmomatic_fastaWithAdapters"] != "") else "", otherparams = config["qcfilter_adaptertrim__trimmomatic_otherparams"] threads: config["qcfilter_adaptertrim__trimmomatic_threads"] shell: "{params.command} " "{params.qc_score} " "-threads {threads} " "{input.read} " "{input.read2} " "{output.readFP} " "{output.readFU} " "{output.readRP} " "{output.readRU} " "{params.ILLUMINACLIP} " "{params.otherparams} " "|& tee {log}" rule qcfilter_adaptertrim__trimmomatic_SE: input: **qcfilter_adaptertrim__trimmomatic_SE_inputs() output: read = config["results_dir"]+"/"+config["qcfilter_adaptertrim__trimmomatic_SE_output_dir"]+"/{sample}_trimmed.fq.gz", log: config["results_dir"]+'/logs/' + config["qcfilter_adaptertrim__trimmomatic_PE_output_dir"] + '/{sample}_trimmomatic_log.txt' params: command = config["qcfilter_adaptertrim__trimmomatic_SE_command"], qc_score = config["qcfilter_adaptertrim__trimmomatic_qc_score"], ILLUMINACLIP = "ILLUMINACLIP:" + config["qcfilter_adaptertrim__trimmomatic_fastaWithAdapters"] + ":" + config["qcfilter_adaptertrim__trimmomatic_illuminaclip"] if (config["qcfilter_adaptertrim__trimmomatic_fastaWithAdapters"] != "") else "", otherparams = config["qcfilter_adaptertrim__trimmomatic_otherparams"] threads: config["qcfilter_adaptertrim__trimmomatic_threads"] shell: "{params.command} " "{params.qc_score} " "-threads {threads} " "{input} " "{output} " "{params.ILLUMINACLIP} " "{params.otherparams} " "|& tee {log}" ruleorder: qcfilter_adaptertrim__trimmomatic_PE > qcfilter_adaptertrim__trimmomatic_SE rule merge_overlapps__flash: input: **merge_overlapps__flash_inputs() output: extendedFrags = config["results_dir"] + "/" + config["merge_overlapps__flash_output_dir"] + "/{sample}.extendedFrags.fastq", notCombined_1 = config["results_dir"] + "/" + config["merge_overlapps__flash_output_dir"] + "/{sample}.notCombined_1.fastq", notCombined_2 = config["results_dir"] + "/" + config["merge_overlapps__flash_output_dir"] + "/{sample}.notCombined_2.fastq", hist = config["results_dir"] + "/" + config["merge_overlapps__flash_output_dir"] + "/{sample}.flash.hist", histogram = config["results_dir"] + "/" + config["merge_overlapps__flash_output_dir"] + "/{sample}.histogram", params: output_dir = config["results_dir"] + "/" + config["merge_overlapps__flash_output_dir"], command = config["merge_overlapps__flash_command"], min_overlap = config["merge_overlapps__flash_min_overlap"], max_overlap = config["merge_overlapps__flash_max_overlap"], max_mismatch_density = config["merge_overlapps__flash_max_mismatch_density"], tmp_hist = config["results_dir"] + "/" + config["merge_overlapps__flash_output_dir"] + "/{sample}.hist", # output from flash to be renamed for multiqc detection log: config["results_dir"]+"/logs/" + config["merge_overlapps__flash_output_dir"] + "/{sample}_flash_log.txt" threads: config["merge_overlapps__flash_threads"] shell: "{params.command} " "-x {params.max_mismatch_density} " "-m {params.min_overlap} " "-M {params.max_overlap} " "-d {params.output_dir} " "-o {wildcards.sample} " "-t {threads} " "{input.read} " "{input.read2} " "|& tee {log}; " "mv {params.tmp_hist} {output.hist}" # with flash in hist name it is detected by multiqc rule demultiplexing__demultiplexing_astrid_cruaud: input: **demultiplexing__demultiplexing_astrid_cruaud_inputs() output: demultiplexed = expand(config["results_dir"] + "/" + config["demultiplexing__demultiplexing_astrid_cruaud_output_dir"] + "/{sample}_fq", sample=INDIVIDUS), stats = config["results_dir"] + "/" + config["demultiplexing__demultiplexing_astrid_cruaud_output_dir"] + "/stat_demultiplexing_mqc.csv", params: output_dir = config["results_dir"] + "/" + config["demultiplexing__demultiplexing_astrid_cruaud_output_dir"], barcodes = config["demultiplexing__demultiplexing_astrid_cruaud_barcodes"] log: config["results_dir"]+"/logs/" + config["demultiplexing__demultiplexing_astrid_cruaud_output_dir"] + "/demultiplexing_astrid_cruaud_log.txt" threads: config["demultiplexing__demultiplexing_astrid_cruaud_threads"] shell: "cd {params.output_dir} && " #"touch {output.demultiplexed} && " # tail to remove header (sample barcode_P1 barcode_P2 barcode_P2_rev term) "tail -n +2 {params.barcodes} | parallel --jobs {threads} --colsep '\\t' \"grep -B 1 --no-group-separator -i '^{{2}}.*{{4}}$' {input.extended_frags} > {{1}}'_fq'\" |& tee {log} ; " # test if --no-group-separator works with the next parts of the workflow # nb reads/sample : "echo -e \"# description: 'Number of reads for each individu after demultiplexing'\\n# section_name: 'Demultiplexing stats'\\n# plot_type: 'table'\\n# pconfig:\\n# format: '{{:,.0f}}'\\nIndividu\\tNumber of reads\" > {output.stats}; " "for i in $(ls *_fq); do echo -e $i'\\t'$(grep -ic '@' $i); done >> {output.stats} 2>> {log}; " # count with grep @ because no risk of @ in quality sequence as it is no more present in the file # remove barcodes from beginning / end of reads "tail -n +2 {params.barcodes} | parallel --jobs {threads} --colsep '\\t' \"sed -i 's/^{{2}}//g;s/{{4}}$//g' {{1}}'_fq'\" |& tee -a {log}" rule assembling__velvet: input: **assembling__velvet_inputs(), output: contigs = config["results_dir"] + "/" + config["assembling__velvet_output_dir"] + "/{sample}/contigs.fa", stats = config["results_dir"] + "/" + config["assembling__velvet_output_dir"] + "/{sample}/stats.txt", LastGraph = config["results_dir"] + "/" + config["assembling__velvet_output_dir"] + "/{sample}/LastGraph", params: output_dir = config["results_dir"] + "/" + config["assembling__velvet_output_dir"]+ "/{sample}", command = config["assembling__velvet_command"], min_contig_lgth = config["assembling__velvet_min_contig_lgth"], hash_length = config["assembling__velvet_hash_length"], log: config["results_dir"] + "/logs/" + config["assembling__velvet_output_dir"] + "/{sample}_velvet_log.txt" shell: # VELVETH "{params.command}h " "{params.output_dir} " "{params.hash_length} " "-fastq " "{input.merged_reads} " "|& tee {log}; " # VELVETG "{params.command}g " "{params.output_dir} " "-min_contig_lgth {params.min_contig_lgth} " "|& tee -a {log}" rule contigmaptouce__lastz: input: **contigmaptouce__lastz_inputs(), reference_fasta = config["contigmaptouce__lastz_reference_fasta"], output: align_on_ref = config["results_dir"] + "/" + config["contigmaptouce__lastz_output_dir"] + "/{sample}.vs_probes_default.sam", filtered = config["results_dir"] + "/" + config["contigmaptouce__lastz_output_dir"] + "/filtered/{sample}.vs_probes_default.filtered.sam", stats = config["results_dir"] + "/" + config["contigmaptouce__lastz_output_dir"] + "/stats/{sample}_lastz_sam_stats.tsv", params: output_dir = config["results_dir"] + "/" + config["contigmaptouce__lastz_output_dir"]+ "/", command = config["contigmaptouce__lastz_command"] log: config["results_dir"] + "/logs/" + config["contigmaptouce__lastz_output_dir"] + "/{sample}_lastz_log.txt" shell: "{params.command} " "{input.reference_fasta}[multiple,unmask] " "{input.contigs}[unmask] " "--gapped " "--format=softsam " "--ambiguous=iupac " "--output='{output.align_on_ref}' " "|& tee {log}; " # stats "NB_ENTRIES=$(samtools view {output.align_on_ref} | wc -l); " "if [ $NB_ENTRIES -gt 0 ]; then " "NB_LOCUS=$(samtools view {output.align_on_ref} | cut -f 3 | sort -u | wc -l); " "LIST_MULTI_LOCUS_MAPPED=$(samtools view {output.align_on_ref} | cut -f 1,3 | uniq | awk 'BEGIN {{ FS=OFS=SUBSEP=\"\\t\"; RS=\"\\n\"}}{{arr[$2]+=1 }}END {{for (i in arr) if (arr[i] > 1) print i}}' -); " "NB_MULTI_LOCUS_MAPPED=$(echo $LIST_MULTI_LOCUS_MAPPED | wc -w); " "LIST_MULTI_CLUSTER_MAPPED=$(samtools view {output.align_on_ref} | cut -f 1,3 | uniq | awk 'BEGIN {{ FS=OFS=SUBSEP=\"\\t\"; RS=\"\\n\"}}{{arr[$1]+=1 }}END {{for (i in arr) if (arr[i] > 1) print i}}' -); " "NB_MULTI_CLUSTER_MAPPED=$(echo $LIST_MULTI_CLUSTER_MAPPED | wc -w); " "printf '%s\\n' \"${{LIST_MULTI_CLUSTER_MAPPED[@]}}\" > /tmp/{wildcards.sample}_lastz.tmp; " "printf '%s\\n' \"${{LIST_MULTI_LOCUS_MAPPED[@]}}\" >> /tmp/{wildcards.sample}_lastz.tmp; " "sed -i '/^$/d' /tmp/{wildcards.sample}_lastz.tmp; " # filter locus and clusters multimapped "samtools view -h {output.align_on_ref} | grep -v -f /tmp/{wildcards.sample}_lastz.tmp - > {output.filtered}; " "else " "NB_LOCUS=0;" "NB_MULTI_LOCUS_MAPPED=0;" "NB_MULTI_CLUSTER_MAPPED=0;" "touch {output.filtered}; " "fi;\n" "NB_ENTRIES_AFTER_FILTER=$(samtools view {output.filtered} | wc -l); " "echo -e NB_ENTRIES'\\t'NB_LOCUS'\\t'NB_MULTI_LOCUS_MAPPED'\\t'NB_MULTI_CLUSTER_MAPPED'\\t'NB_ENTRIES_AFTER_FILTER'\\n'$NB_ENTRIES'\\t'$NB_LOCUS'\\t'$NB_MULTI_LOCUS_MAPPED'\\t'$NB_MULTI_CLUSTER_MAPPED'\\t'$NB_ENTRIES_AFTER_FILTER > {output.stats}; " rule mapping_check_bf__samtools_stats: input: **mapping_check_bf__samtools_stats_inputs() output: stats = config["results_dir"]+'/' + config["mapping_check_bf__samtools_stats_output_dir"] + '/{sample}.txt', log: config["results_dir"]+'/logs/' + config["mapping_check_bf__samtools_stats_output_dir"] + '/{sample}_samtools_stats_log.txt' threads: config["mapping_check_bf__samtools_stats_threads"] params: command = config["mapping_check_bf__samtools_stats_command"], output_dir = config["results_dir"]+'/'+config["mapping_check_bf__samtools_stats_output_dir"] shell: "{params.command} {input.bam} -@ {threads} 2> {log} > {output.stats}" rule mapping_check_af__samtools_stats: input: **mapping_check_af__samtools_stats_inputs() output: stats = config["results_dir"]+'/' + config["mapping_check_af__samtools_stats_output_dir"] + '/{sample}.txt', log: config["results_dir"]+'/logs/' + config["mapping_check_af__samtools_stats_output_dir"] + '/{sample}_samtools_stats_log.txt' threads: config["mapping_check_af__samtools_stats_threads"] params: command = config["mapping_check_af__samtools_stats_command"], output_dir = config["results_dir"]+'/'+config["mapping_check_af__samtools_stats_output_dir"] shell: "{params.command} {input.bam} -@ {threads} 2> {log} > {output.stats}" rule convert_to_phylip__sam_to_phylip: input: **convert_to_phylip__sam_to_phylip_inputs(), output: phylips = directory(config["results_dir"] + "/" + config["convert_to_phylip__sam_to_phylip_output_dir"] + "/"), params: output_dir = config["results_dir"] + "/" + config["convert_to_phylip__sam_to_phylip_output_dir"]+ "/", input_dir = lambda w, input: os.path.dirname(input.sams[0]) log: config["results_dir"] + "/logs/" + config["convert_to_phylip__sam_to_phylip_output_dir"] + "/sam_to_phylip_log.txt" threads: 1 shell: "for file in $(ls {params.input_dir}/*.sam);\n" "do\n" " NB_ENTRIES=$(samtools view $file | wc -l);\n" " if [ $NB_ENTRIES -gt 0 ]; then\n" " sample=$(basename $file | cut -f 1 -d '.');\n" # Create sam header for each locus if not done yet (needed for samtools fasta) " grep \"^@SQ\" $file | awk '{{split($2,a,\":\"); if(system(\"test -f {params.output_dir}/\"a[2]\".sam\")!=0) {{print $0 > \"{params.output_dir}/\"a[2]\".sam\"}} close(\"{params.output_dir}/\"a[2])\".sam\"}}';\n" # Group entries by locus " samtools view $file | awk -v sample=\"$sample\" 'BEGIN {{OFS=\"\t\"}} {{$1=sample; print $0 >> \"{params.output_dir}/\"$3\".sam\"}} close(\"{params.output_dir}/\"$3\".sam\")';\n" " fi;\n" "done;\n" "echo 'Group by reference done';\n" # sam to fasta "for file in $(ls {params.output_dir}/*.sam);\n" "do\n" " NB_ENTRIES=$(samtools view $file | wc -l);\n" " if [ $NB_ENTRIES -gt 0 ]; then\n" " outname=$(echo $file | cut -f 1 -d '.');\n" " samtools fasta $file > ${{outname}}.fasta;\n" " fi;\n" "done;" "echo 'Turn sam into fasta done';\n" # fasta to phylip "for file in $(ls {params.output_dir}/*.fasta);\n" "do\n" " outname=$(echo $file | cut -f 1 -d '.');\n" " seqret -sequence $file -osf phylip3 -outseq ${{outname}}.phylip;\n" "done;" "echo 'Turn fasta into phylip done';\n" import collections rule prepare_report: input: *prepare_report_inputs(), output: *prepare_report_outputs(), config_multiqc = config["results_dir"] + "/config_multiqc.yaml", params_tab = config["results_dir"] + "/params_tab_mqc.csv", versions_tab = config["results_dir"] + "/Tools_version_mqc.csv", citations_tab = config["results_dir"] + "/Citations_mqc.csv" params: params_file = workflow.overwrite_configfiles, results_dir = config["results_dir"] log: config["results_dir"]+"/logs/prepare_report_log.txt" run: # Specific scripts for each tool for script in prepare_report_scripts(): if (os.path.splitext(script)[1] in [".R",".r"]): shell("Rscript "+script+" {params.params_file} |& tee {log}") elif (os.path.splitext(script)[1] in [".py"]): shell("python3 "+script+" {params.params_file} |& tee {log}") elif (os.path.splitext(script)[1] in [".sh"]): shell("/bin/bash "+script+" {params.params_file} |& tee {log}") # Outputs files for Multiqc report outfile = config["results_dir"] + "/outputs_mqc.csv" head = """ # description: 'This is the list of the files generated by each step of the workflow' # section_name: 'Workflow outputs' """ with open(outfile,"w") as out: out.write(head) out.write("step\ttool\tfile\tdescription\n")#\tname for step in STEPS: tool = config[step["name"]] i=1 for command in OUTPUTS[step["name"] + "__" + tool]: if ("SeOrPe" not in config.keys() or (config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))): outputs = OUTPUTS[step["name"] + "__" + tool][command] for files in outputs: name = files["file"] if 'file' in files.keys() else files["directory"] path = config[command+"_output_dir"] + "/" + name #config["results_dir"] +"/"+ out.write(str(i)+"-"+step["title"]+"\t"+tool+"\t"+path+"\t"+files["description"]+"\n")#"\t"+files["name"]+ i+=1 # Params list for Multiqc report params_list = "params_name\tdescription\tvalue\n" head = """# description: 'This is the list of the parameters for each rule' # section_name: 'Workflow parameters' """ for step in STEPS: tool = config[step["name"]] for key, value in config.items(): if (tool in key and tool != "null" and "_command" not in key) or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ("SeOrPe" not in config.keys() or (config["SeOrPe"] == "SE" and not("_PE" in key)) or (config["SeOrPe"] == "PE" and not("_SE" in key))): if (key in PARAMS_INFO.keys() and "label" in PARAMS_INFO[key].keys()): description = PARAMS_INFO[key]["label"] else: description = '' params_list += key + "\t'" + description + "'\t'" + str(value) + "'\n" with open(output.params_tab,"w") as out: out.write(head) out.write(params_list) # Tools version with open(output.versions_tab,"w") as out: versions = read_yaml("/workflow/versions.yaml") head = """# description: 'This is the list of the tools used and their version' # section_name: 'Tools version' """ out.write(head) out.write("Tool\tVersion"+"\n") for tool, version in versions["base_tools"].items(): out.write(tool+"\t"+str(version)+"\n") for step in STEPS: tool = config[step["name"]] if (tool in versions.keys()): out.write(tool+"\t"+str(versions[tool])+"\n") # Citations with open(output.citations_tab,"w") as out: citations = read_yaml("/workflow/citations.yaml") head = """# description: 'This is the list of the citations of used tools' # section_name: 'Citations' """ out.write(head) out.write("Tool\tCitation\n") for tool, citation in citations["base_tools"].items(): out.write(tool+"\t"+" ; ".join(citation)+"\n") final_citations = collections.OrderedDict() for step in STEPS: tool = config[step["name"]] if (tool in citations.keys()): final_citations.update(citations[tool]) for tool, citation in final_citations.items(): out.write(tool+"\t"+" ; ".join(citation)+"\n") # Config for Multiqc report shell("python3 /workflow/generate_multiqc_config.py {params.params_file} {output.config_multiqc}") rule multiqc: input: multiqc_inputs(), config_multiqc = config["results_dir"] + "/config_multiqc.yaml" output: multiqc_dir = directory(config["results_dir"]+"/multiqc_data"), multiqc_report = config["results_dir"]+"/multiqc_report.html" params: output_dir = config["results_dir"] log: config["results_dir"]+'/logs/multiqc/multiqc_log.txt' run: modules_to_run = "-m custom_content " for module in MULTIQC.values(): if (module != "custom"): modules_to_run += "-m " + module + " " shell( "multiqc --config {input.config_multiqc} " + "-o {params.output_dir} " + "-f {params.output_dir} " + modules_to_run + "|& tee {log}" ) # Final Snakemake rule waiting for outputs of the final step choosen by user (default all steps) rule all: input: workflow_outputs("all") output: Snakefile = config["results_dir"]+"/workflow/Snakefile", get_samples = config["results_dir"]+"/workflow/get_samples.py", scripts = directory(config["results_dir"]+"/workflow/scripts"), params = config["results_dir"]+"/workflow/params.yml" params: params_file = workflow.overwrite_configfiles, shell: "cp /workflow/Snakefile {output.Snakefile} && " "cp /workflow/get_samples.py {output.get_samples} && " "cp -r /workflow/scripts {output.scripts} && " "cp {params.params_file} {output.params}" onsuccess: print("Workflow finished with SUCCESS") shell("touch "+config["results_dir"]+"/logs/workflow_end.ok") onerror: print("An ERROR occurred") shell("cat {log} > "+config["results_dir"]+"/logs/workflow_end.error") #shell("mail -s "an error occurred" youremail@provider.com < {log}")