Commit d60af896 authored by mmassaviol's avatar mmassaviol
Browse files

Initial commit

parents
FROM mbbteam/mbb_workflows_base:latest as alltools
RUN cd /opt/biotools \
&& wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-0.38.zip \
&& unzip Trimmomatic-0.38.zip \
&& echo -e '#!/bin/bash java -jar /opt/biotools/Trimmomatic-0.38/trimmomatic-0.38.jar' > bin/trimmomatic \
&& chmod 777 bin/trimmomatic \
&& rm Trimmomatic-0.38.zip
RUN apt -y update && apt install -y openjdk-8-jre
RUN cd /opt/biotools \
&& git clone https://github.com/voutcn/megahit.git \
&& cd megahit \
&& git submodule update --init \
&& mkdir build && cd build \
&& cmake .. -DCMAKE_BUILD_TYPE=Release \
&& make -j8 \
&& make simple_test \
&& make install \
&& cd /opt/biotools \
&& rm -rf megahit
RUN apt-get update \
&& cd /opt/biotools/bin \
&& wget https://raw.githubusercontent.com/tseemann/any2fasta/master/any2fasta \
&& chmod +x any2fasta
RUN apt-get install -y bioperl ncbi-blast+ libjson-perl libtext-csv-perl libpath-tiny-perl liblwp-protocol-https-perl libwww-perl \
&& cd /opt/biotools \
&& git clone --depth 1 --branch v1.0.1 https://github.com/tseemann/abricate.git \
&& ./abricate/bin/abricate --check \
&& ./abricate/bin/abricate --setupdb \
&& ./abricate/bin/abricate ./abricate/test/assembly.fa \
&& cd bin \
&& ln -s /opt/biotools/abricate/bin/abricate abricate
RUN cd /opt/biotools \
&& wget https://bitbucket.org/berkeleylab/metabat/downloads/metabat-static-binary-linux-x64_v2.12.1.tar.gz \
&& tar -xvzf metabat-static-binary-linux-x64_v2.12.1.tar.gz \
&& rm metabat-static-binary-linux-x64_v2.12.1.tar.gz \
&& cd bin \
&& ln -s /opt/biotools/metabat/metabat metabat
RUN apt-get install -y zlib1g-dev pkg-config libfreetype6-dev libpng-dev python-matplotlib python-setuptools
RUN cd /opt/biotools \
&& wget https://github.com/ablab/quast/releases/download/quast_5.0.2/quast-5.0.2.tar.gz \
&& tar -zxvf quast-5.0.2.tar.gz \
&& rm quast-5.0.2.tar.gz \
&& cd quast-5.0.2/ \
&& python3 ./setup.py install_full
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
#This part is necessary to run on ISEM cluster
RUN mkdir -p /share/apps/bin \
&& mkdir -p /share/apps/lib \
&& mkdir -p /share/apps/gridengine \
&& mkdir -p /share/bio \
&& mkdir -p /opt/gridengine \
&& mkdir -p /export/scrach \
&& mkdir -p /usr/lib64 \
&& ln -s /bin/bash /bin/mbb_bash \
&& ln -s /bin/bash /bin/isem_bash \
&& /usr/sbin/groupadd --system --gid 400 sge \
&& /usr/sbin/useradd --system --uid 400 --gid 400 -c GridEngine --shell /bin/true --home /opt/gridengine sge
EXPOSE 3838
CMD ["Rscript", "-e", "setwd('/sagApp/'); shiny::runApp('/sagApp/app.R',port=3838 , host='0.0.0.0')"]
FROM alltools
COPY files /workflow
COPY sagApp /sagApp
from tools import *
from raw_reads import raw_reads
workdir: config['params']['results_dir']
import os
import re
import snakemake.utils
import csv
#############
# Wildcards #
#############
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
OUTPUTS = config["outputs"]
PARAMS_INFO = config["params_info"]
MULTIQC = config["multiqc"]
config = config["params"]
##########
# Inputs #
##########
# raw_inputs function call
raw_reads = raw_reads(config['results_dir'], config['sample_dir'], config['SeOrPe'])
config.update(raw_reads)
SAMPLES = raw_reads['samples']
# Tools inputs functions
def quality__trimmomatic_SE_inputs():
inputs = dict()
inputs["read"] = raw_reads["read"]
return inputs
def quality__trimmomatic_PE_inputs():
inputs = dict()
inputs["read"] = raw_reads["read"]
inputs["read2"] = raw_reads["read2"]
return inputs
def metagenomic__abricate_inputs():
inputs = dict()
inputs["contigs"] = rules.assembly__megahit_SE.output.contigs
return inputs
def metagenomic_2__metabat_inputs():
inputs = dict()
inputs["contigs"] = rules.assembly__megahit_SE.output.contigs
return inputs
def assembly__megahit_SE_inputs():
inputs = dict()
inputs["read"] = expand(rules.quality__trimmomatic_SE.output.read, sample = SAMPLES)
return inputs
def assembly__megahit_PE_inputs():
inputs = dict()
return inputs
def assembly_quality__quast_inputs():
inputs = dict()
inputs["assembly"] = rules.assembly__megahit_SE.output.contigs
return inputs
def prepare_report_inputs():
inputs = list()
for step in STEPS:
inputs.extend(step_outputs(step["name"]))
return inputs
def prepare_report_scripts():
scripts = list()
for step in STEPS:
tool = config[step["name"]]
prefix = tool+".prepare.report."
if type(PREPARE_REPORT_SCRIPTS) == type(""):
if prefix in PREPARE_REPORT_SCRIPTS:
scripts.append("/workflow/scripts/"+PREPARE_REPORT_SCRIPTS)
else :
script = [s for s in PREPARE_REPORT_SCRIPTS if prefix in s]
if (len(script)==1):
scripts.append("/workflow/scripts/"+script[0])
return scripts
def prepare_report_outputs():
outputs = list()
outputs.append(config["results_dir"] + "/outputs_mqc.csv")
for step in STEPS:
tool = config[step["name"]]
if (tool in PREPARE_REPORT_OUTPUTS.keys()):
if type(PREPARE_REPORT_OUTPUTS[tool]) == type(""):
outputs.append(config["results_dir"]+"/"+tool+"/"+PREPARE_REPORT_OUTPUTS[tool])
else:
for output in PREPARE_REPORT_OUTPUTS[tool]:
outputs.append(config["results_dir"]+"/"+tool+"/"+output)
return outputs
def multiqc_inputs():
# Need prepare_report inputs and outputs in case prepare_reports has no outputs
return prepare_report_outputs()
###########
# Outputs #
###########
def step_outputs(step):
outputs = list()
if (step == "quality" and config['SeOrPe'] == 'SE'):
outputs = expand(rules.quality__trimmomatic_SE.output, sample = SAMPLES)
if (step == "metagenomic"):
outputs = rules.metagenomic__abricate.output
if (step == "metagenomic_2"):
outputs = rules.metagenomic_2__metabat.output
if (step == "assembly" and config['SeOrPe'] == 'SE'):
outputs = rules.assembly__megahit_SE.output
if (step == "assembly_quality"):
outputs = rules.assembly_quality__quast.output
if (step == "all"):
outputs = list(rules.multiqc.output)
return outputs
# get outputs for each choosen tools
def workflow_outputs(step):
outputs = list()
outputs.extend(step_outputs(step))
return outputs
#########
# Rules #
#########
rule quality__trimmomatic_PE:
input:
**quality__trimmomatic_PE_inputs()
output:
readFP = config["results_dir"]+"/"+config["quality__trimmomatic_PE_output_dir"]+"/{sample}_forward_paired.fq.gz",
readFU = config["results_dir"]+"/"+config["quality__trimmomatic_PE_output_dir"]+"/{sample}_forward_unpaired.fq.gz",
readRP = config["results_dir"]+"/"+config["quality__trimmomatic_PE_output_dir"]+"/{sample}_reverse_paired.fq.gz",
readRU = config["results_dir"]+"/"+config["quality__trimmomatic_PE_output_dir"]+"/{sample}_reverse_unpaired.fq.gz",
log:
config["results_dir"]+'/logs/' + config["quality__trimmomatic_PE_output_dir"] + '/{sample}_trimmomatic_log.txt'
params:
command = config["quality__trimmomatic_PE_command"],
qc_score = config["quality__trimmomatic_qc_score"],
ILLUMINACLIP = "ILLUMINACLIP:" + config["quality__trimmomatic_fastaWithAdapters"] + ":" + config["quality__trimmomatic_illuminaclip"] if (config["quality__trimmomatic_fastaWithAdapters"] != "") else "",
otherparams = config["quality__trimmomatic_otherparams"]
threads:
config["quality__trimmomatic_threads"]
shell:
"{params.command} "
"{params.qc_score} "
"-threads {threads} "
"{input.read} "
"{input.read2} "
"{output.readFP} "
"{output.readFU} "
"{output.readRP} "
"{output.readRU} "
"{params.ILLUMINACLIP} "
"{params.otherparams} "
"|& tee {log}"
rule quality__trimmomatic_SE:
input:
**quality__trimmomatic_SE_inputs()
output:
read = config["results_dir"]+"/"+config["quality__trimmomatic_SE_output_dir"]+"/{sample}_trimmed.fq.gz",
log:
config["results_dir"]+'/logs/' + config["quality__trimmomatic_PE_output_dir"] + '/{sample}_trimmomatic_log.txt'
params:
command = config["quality__trimmomatic_SE_command"],
qc_score = config["quality__trimmomatic_qc_score"],
ILLUMINACLIP = "ILLUMINACLIP:" + config["quality__trimmomatic_fastaWithAdapters"] + ":" + config["quality__trimmomatic_illuminaclip"] if (config["quality__trimmomatic_fastaWithAdapters"] != "") else "",
otherparams = config["quality__trimmomatic_otherparams"]
threads:
config["quality__trimmomatic_threads"]
shell:
"{params.command} "
"{params.qc_score} "
"-threads {threads} "
"{input} "
"{output} "
"{params.ILLUMINACLIP} "
"{params.otherparams} "
"|& tee {log}"
ruleorder: quality__trimmomatic_PE > quality__trimmomatic_SE
import time
rule assembly__megahit_SE:
input:
**assembly__megahit_SE_inputs()
output:
contigs = config["results_dir"] + "/" + config["assembly__megahit_SE_output_dir"] + "/assembly.contigs.fa"
log:
config["results_dir"]+'/logs/' + config["assembly__megahit_SE_output_dir"] + '/megahit_log.txt'
threads:
config["assembly__megahit_threads"]
params:
command = config["assembly__megahit_SE_command"],
output_dir = config["results_dir"]+'/'+config["assembly__megahit_SE_output_dir"]
run:
readline = ""
for r1 in input.read:
readline += "-r "+r1+" "
shell(
"{params.command} "+
readline+
"-t {threads} "+
"-o {params.output_dir}.tmp "+
"--out-prefix assembly "+
#"--k-min 39 "+
#"--k-max 79 "+
"; mv {params.output_dir}.tmp/* {params.output_dir}/ && rm -rf {params.output_dir}.tmp/ "+
"|& tee {log}; "
"tail -n 2 {params.output_dir}/assembly.log | head -n 1 > {params.output_dir}/stats.txt"
)
# prepare report assembly stats
time.sleep(5) # wait for stat file
with open(params.output_dir+'/stats.txt',"r") as stats_file:
stats = stats_file.readline()
matches = re.match(r"^[\d -:]*- (\d+) contigs, total (\d+) bp, min (\d+) bp, max (\d+) bp, avg (\d+) bp, N50 (\d+) bp$",stats)
with open(params.output_dir+'/Assembly_stats_mqc.tsv',"w") as staTab:
staTab.write("# id: 'assembly_stats'\n")
staTab.write("# section_name: 'Megahit Assembly Stats'\n")
staTab.write("# plot_type: 'table'\n")
staTab.write("Stat\tValue\n")
staTab.write("Nombre de contigs\t"+matches.group(1)+"\n")
staTab.write("Nombre de pb total\t"+matches.group(2)+"\n")
staTab.write("Min pb\t"+matches.group(3)+"\n")
staTab.write("Max pb\t"+matches.group(4)+"\n")
staTab.write("Moyenne pb\t"+matches.group(5)+"\n")
staTab.write("N50\t"+matches.group(6))
rule assembly__megahit_PE:
input:
**assembly__megahit_PE_inputs()
output:
contigs = config["results_dir"] + "/" + config["assembly__megahit_PE_output_dir"] + "/assembly.contigs.fa"
log:
config["results_dir"]+'/logs/' + config["assembly__megahit_PE_output_dir"] + '/megahit_log.txt'
threads:
config["assembly__megahit_threads"]
params:
command = config["assembly__megahit_PE_command"],
output_dir = config["results_dir"]+'/'+config["assembly__megahit_PE_output_dir"]
run:
readline = ""
for r1,r2 in zip(input.read,input.read2):
readline += "-1 "+r1+" -2 "+r2+" "
shell(
"{params.command} "+
readline+
"-t {threads} "+
"-o {params.output_dir}.tmp "+
"--out-prefix assembly "+
#"--k-min 39 "+
#"--k-max 79 "+
"; mv {params.output_dir}.tmp/* {params.output_dir}/ && rm -rf {params.output_dir}.tmp/ "+
"|& tee {log}; "
"tail -n 2 {params.output_dir}/assembly.log | head -n 1 > {params.output_dir}/stats.txt"
)
# prepare report assembly stats
time.sleep(5) # wait for stat file
with open(params.output_dir+'/stats.txt',"r") as stats_file:
stats = stats_file.readline()
matches = re.match(r"^[\d -:]*- (\d+) contigs, total (\d+) bp, min (\d+) bp, max (\d+) bp, avg (\d+) bp, N50 (\d+) bp$",stats)
with open(params.output_dir+'/Assembly_stats_mqc.tsv',"w") as staTab:
staTab.write("# id: 'assembly_stats'\n")
staTab.write("# section_name: 'Megahit Assembly Stats'\n")
staTab.write("# plot_type: 'table'\n")
staTab.write("Stat\tValue\n")
staTab.write("Nombre de contigs\t"+matches.group(1)+"\n")
staTab.write("Nombre de pb total\t"+matches.group(2)+"\n")
staTab.write("Min pb\t"+matches.group(3)+"\n")
staTab.write("Max pb\t"+matches.group(4)+"\n")
staTab.write("Moyenne pb\t"+matches.group(5)+"\n")
staTab.write("N50\t"+matches.group(6))
rule metagenomic__abricate:
input:
**metagenomic__abricate_inputs()
output:
tsv = config["results_dir"] + "/" + config["metagenomic__abricate_output_dir"] + "/output.tsv",
params:
output_dir = config["results_dir"] + "/" + config["metagenomic__abricate_output_dir"]+ "/",
command = config["metagenomic__abricate_command"],
database = config["metagenomic__abricate_database"],
minid = config["metagenomic__abricate_minid"],
mincov = config["metagenomic__abricate_mincov"],
log:
config["results_dir"] + "/logs/" + config["metagenomic__abricate_output_dir"] + "/abricate_log.txt"
threads:
config["metagenomic__abricate_threads"]
shell:
"{params.command} "
"{input.contigs} "
"--db {params.database} "
"--minid {params.minid} "
"--mincov {params.mincov} "
"{input.contigs} "
"> {output.tsv} "
"2> >(tee {log} >&2)"
rule metagenomic_2__metabat:
input:
**metagenomic_2__metabat_inputs(),
output:
fasta_bins = directory(config["results_dir"] + "/" + config["metagenomic_2__metabat_output_dir"] + "/"),
params:
output_dir = config["results_dir"] + "/" + config["metagenomic_2__metabat_output_dir"]+ "/",
command = config["metagenomic_2__metabat_command"],
mincontig = config["metagenomic_2__metabat_mincontig"],
log:
config["results_dir"] + "/logs/" + config["metagenomic_2__metabat_output_dir"] + "/metabat_log.txt"
threads:
config["metagenomic_2__metabat_threads"]
shell:
"{params.command} "
"-i {input.contigs} "
"--outFile {params.output_dir}/out "
"--minContig {params.mincontig} "
"-t {threads} "
"|& tee {log}"
rule assembly_quality__quast:
input:
**assembly_quality__quast_inputs(),
output:
report = config["results_dir"] + "/" + config["assembly_quality__quast_output_dir"] + "/report.tsv",
params:
command = config["assembly_quality__quast_command"],
output_dir = config["results_dir"] + "/" + config["assembly_quality__quast_output_dir"]+ "/",
log:
config["results_dir"] + "/logs/" + config["assembly_quality__quast_output_dir"] + "/quast_log.txt"
threads:
config["assembly_quality__quast_threads"]
shell:
"{params.command} "
"{input.assembly} "
"-o {params.output_dir} "
"--threads {threads}"
import collections
rule prepare_report:
input:
*prepare_report_inputs(),
output:
*prepare_report_outputs(),
config_multiqc = config["results_dir"] + "/config_multiqc.yaml",
params_tab = config["results_dir"] + "/params_tab_mqc.csv",
versions_tab = config["results_dir"] + "/Tools_version_mqc.csv",
citations_tab = config["results_dir"] + "/Citations_mqc.csv"
params:
params_file = workflow.overwrite_configfiles,
results_dir = config["results_dir"]
log:
config["results_dir"]+"/logs/prepare_report_log.txt"
run:
# Specific scripts for each tool
for script in prepare_report_scripts():
if (os.path.splitext(script)[1] in [".R",".r"]):
shell("Rscript "+script+" {params.params_file} |& tee {log}")
elif (os.path.splitext(script)[1] in [".py"]):
shell("python3 "+script+" {params.params_file} |& tee {log}")
elif (os.path.splitext(script)[1] in [".sh"]):
shell("/bin/bash "+script+" {params.params_file} |& tee {log}")
# Outputs files for Multiqc report
outfile = config["results_dir"] + "/outputs_mqc.csv"
head = """
# description: 'This is the list of the files generated by each step of the workflow'
# section_name: 'Workflow outputs'
"""
with open(outfile,"w") as out:
out.write(head)
out.write("step\ttool\tfile\tdescription\n")#\tname
for step in STEPS:
tool = config[step["name"]]
i=1
for command in OUTPUTS[step["name"] + "__" + tool]:
if ("SeOrPe" not in config.keys() or (config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
outputs = OUTPUTS[step["name"] + "__" + tool][command]
for files in outputs:
name = files["file"] if 'file' in files.keys() else files["directory"]
path = config[command+"_output_dir"] + "/" + name #config["results_dir"] +"/"+
out.write(str(i)+"-"+step["title"]+"\t"+tool+"\t"+path+"\t"+files["description"]+"\n")#"\t"+files["name"]+
i+=1
# Params list for Multiqc report
params_list = "params_name\tdescription\tvalue\n"
head = """# description: 'This is the list of the parameters for each rule'
# section_name: 'Workflow parameters'
"""
for step in STEPS:
tool = config[step["name"]]
for key, value in config.items():
if (tool in key and tool != "null" and "_command" not in key) or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ("SeOrPe" not in config.keys() or (config["SeOrPe"] == "SE" and not("_PE" in key)) or (config["SeOrPe"] == "PE" and not("_SE" in key))):
if (key in PARAMS_INFO.keys() and "label" in PARAMS_INFO[key].keys()):
description = PARAMS_INFO[key]["label"]
else:
description = ''
params_list += key + "\t'" + description + "'\t'" + str(value) + "'\n"
with open(output.params_tab,"w") as out:
out.write(head)
out.write(params_list)
# Tools version
with open(output.versions_tab,"w") as out:
versions = read_yaml("/workflow/versions.yaml")
head = """# description: 'This is the list of the tools used and their version'
# section_name: 'Tools version'
"""
out.write(head)
out.write("Tool\tVersion"+"\n")
for tool, version in versions["base_tools"].items():
out.write(tool+"\t"+str(version)+"\n")
for step in STEPS:
tool = config[step["name"]]
if (tool in versions.keys()):
out.write(tool+"\t"+str(versions[tool])+"\n")
# Citations
with open(output.citations_tab,"w") as out:
citations = read_yaml("/workflow/citations.yaml")
head = """# description: 'This is the list of the citations of used tools'
# section_name: 'Citations'
"""
out.write(head)
out.write("Tool\tCitation\n")
for tool, citation in citations["base_tools"].items():
out.write(tool+"\t"+" ; ".join(citation)+"\n")
final_citations = collections.OrderedDict()
for step in STEPS:
tool = config[step["name"]]
if (tool in citations.keys()):
final_citations.update(citations[tool])
for tool, citation in final_citations.items():
out.write(tool+"\t"+" ; ".join(citation)+"\n")
# Config for Multiqc report
shell("python3 /workflow/generate_multiqc_config.py {params.params_file} {output.config_multiqc}")
rule multiqc:
input:
multiqc_inputs(),
config_multiqc = config["results_dir"] + "/config_multiqc.yaml"
output:
multiqc_dir = directory(config["results_dir"]+"/multiqc_data"),
multiqc_report = config["results_dir"]+"/multiqc_report.html"
params:
output_dir = config["results_dir"]
log:
config["results_dir"]+'/logs/multiqc/multiqc_log.txt'
run:
modules_to_run = "-m custom_content "
for module in MULTIQC.values():
if (module != "custom"):
modules_to_run += "-m " + module + " "
shell(
"multiqc --config {input.config_multiqc} " +
"-o {params.output_dir} " +
"-f {params.output_dir} " +
modules_to_run +
"|& tee {log}"
)
# Final Snakemake rule waiting for outputs of the final step choosen by user (default all steps)
rule all:
input:
workflow_outputs("all")
output:
Snakefile = config["results_dir"]+"/workflow/Snakefile",
scripts = directory(config["results_dir"]+"/workflow/scripts"),
params = config["results_dir"]+"/workflow/params.yml"
params:
params_file = workflow.overwrite_configfiles,
shell:
"cp /workflow/Snakefile {output.Snakefile} && "
"cp -r /workflow/scripts {output.scripts} && "
"cp {params.params_file} {output.params}"
onsuccess:
print("Workflow finished with SUCCESS")
shell("touch "+config["results_dir"]+"/logs/workflow_end.ok")
onerror:
print("An ERROR occurred")
shell("cat {log} > "+config["results_dir"]+"/logs/workflow_end.error")
#shell("mail -s "an error occurred" youremail@provider.com < {log}")
base_tools:
MBB_platform:
- Montpellier Bioinformatics Biodiversity platform supported by the LabEx CeMEB,
an ANR "Investissements d'avenir" program (ANR-10-LABX-04-01).
snakemake:
- "K\xF6ster, Johannes and Rahmann, Sven. Snakemake - A scalable bioinformatics\
\ workflow engine. Bioinformatics 2012."
multiqc:
- "Philip Ewels, M\xE5ns Magnusson, Sverker Lundin, Max K\xE4ller, MultiQC: summarize\
\ analysis results for multiple tools and samples in a single report, Bioinformatics,\
\ Volume 32, Issue 19, 1 October 2016, Pages 3047\u20133048, https://doi.org/10.1093/bioinformatics/btw354"
shiny:
- 'Winston Chang, Joe Cheng, JJ Allaire, Yihui Xie and Jonathan McPherson (2019).
shiny: Web Application Framework for R. https://CRAN.R-project.org/package=shiny'
Docker:
- 'Dirk Merkel. 2014. Docker: lightweight Linux containers for consistent development
and deployment. Linux J. 2014, 239, Article 2 (March 2014), 1 pages.'
trimmomatic:
trimmomatic:
- "Anthony M. Bolger, Marc Lohse, Bjoern Usadel, Trimmomatic: a flexible trimmer\
\ for Illumina sequence data, Bioinformatics, Volume 30, Issue 15, 1 August 2014,\
\ Pages 2114\u20132120, https://doi.org/10.1093/bioinformatics/btu170"
megahit:
megahit:
- "Li, D., Luo, R., Liu, C.-M., Leung, C.-M., Ting, H.-F., Sadakane, K., Yamashita,\
\ H., & Lam, T.-W. (2016). MEGAHIT v1.0: A fast and scalable metagenome assembler\
\ driven by advanced methodologies and community practices. Methods, 102, 3\u2011\
11. https://doi.org/10.1016/j.ymeth.2016.02.020"
abricate:
abricate:
- Seemann T, Abricate, Github https://github.com/tseemann/abricate
metabat:
metabat:
- Kang DD, Froula J, Egan R, Wang Z. 2015. MetaBAT, an efficient tool for accurately
reconstructing single genomes from complex microbial communities. PeerJ 3:e1165
https://doi.org/10.7717/peerj.1165
quast:
quast:
- "Alexey Gurevich, Vladislav Saveliev, Nikolay Vyahhi, Glenn Tesler, QUAST: quality\
\ assessment tool for genome assemblies, Bioinformatics, Volume 29, Issue 8, 15\
\ April 2013, Pages 1072\u20131075, https://doi.org/10.1093/bioinformatics/btt086"
import re
import sys
from tools import read_yaml
config = read_yaml(sys.argv[1])
def files_or_dirs_to_ignore():
# files