Commit 57fdbfc1 authored by mmassaviol's avatar mmassaviol
Browse files

Debug citation file generation

parent 6eec9b38
citations: [
citations: {
snakemake: [
"Köster, Johannes and Rahmann, Sven. Snakemake - A scalable bioinformatics workflow engine. Bioinformatics 2012."
],
......@@ -8,4 +8,4 @@ citations: [
shiny: [
"Winston Chang, Joe Cheng, JJ Allaire, Yihui Xie and Jonathan McPherson (2019). shiny: Web Application Framework for R. https://CRAN.R-project.org/package=shiny"
]
]
\ No newline at end of file
}
\ No newline at end of file
versions: {
snakemake: 5.9.1,
multiqc: 1.8,
}
\ No newline at end of file
......@@ -305,6 +305,8 @@ def scripts_list(pipeline):
def generate_tools_info(pipeline):
versions = collections.OrderedDict()
base_yaml = read_yaml("Docker_base/versions.yaml")
versions["base_tools"] = base_yaml["versions"]
pipeline_config = read_yaml("workflows/" + pipeline + "/" + pipeline + ".yaml")
for step in pipeline_config["steps"]:
for tool in step["tools"]:
......@@ -313,6 +315,19 @@ def generate_tools_info(pipeline):
versions[tool] = tool_config["version"]
return versions
def generate_citations_file(pipeline):
citations = collections.OrderedDict()
base_yaml = read_yaml("Docker_base/citations.yaml")
citations["base_tools"] = base_yaml["citations"]
pipeline_config = read_yaml("workflows/" + pipeline + "/" + pipeline + ".yaml")
for step in pipeline_config["steps"]:
for tool in step["tools"]:
if (tool != "null"):
tool_config = read_yaml("tools/" + tool + "/" + tool + ".yaml")
if ("citations" in tool_config.keys()):
citations[tool] = (tool_config["citations"])
return citations
def generate_pipeline_files(workflow, out_dir, local_config="default"):
workflow_yaml = read_yaml("workflows/" + workflow + "/" + workflow + ".yaml")
......@@ -327,6 +342,7 @@ def generate_pipeline_files(workflow, out_dir, local_config="default"):
# Pipeline files generation
write_yaml(final_yaml, generate_params(workflow))
write_yaml(out_dir + "/files/versions.yaml", generate_tools_info(workflow))
write_yaml(out_dir + "/files/citations.yaml", generate_citations_file(workflow))
generate_snakefile(workflow, out_dir + "/files/Snakefile")
# Scripts copy in tempdir
......
......@@ -61,12 +61,12 @@
"apt install -y openjdk-8-jre"
]
},
citations: [
citations: {
picard_tools: [
"Picard Toolkit. 2019. Broad Institute, GitHub Repository. http://broadinstitute.github.io/picard/; Broad Institute"
],
samtools: [
"Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue 16, 15 August 2009, Pages 2078–2079, https://doi.org/10.1093/bioinformatics/btp352"
]
]
}
}
......@@ -67,12 +67,12 @@
"apt install -y tabix"
}
},
citations: [
citations: {
bcftools: [
"Heng Li, A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data, Bioinformatics, Volume 27, Issue 21, 1 November 2011, Pages 2987–2993, https://doi.org/10.1093/bioinformatics/btr509"
],
tabix: [
"Heng Li, Tabix: fast retrieval of sequence features from generic TAB-delimited files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718–719, https://doi.org/10.1093/bioinformatics/btq671"
]
]
}
}
......@@ -96,9 +96,9 @@
"python3 setup.py install"
]
},
citations: [
citations: {
blast: [
"Camacho, C., Coulouris, G., Avagyan, V. et al. BLAST+: architecture and applications. BMC Bioinformatics 10, 421 (2009). https://doi.org/10.1186/1471-2105-10-421"
]
]
}
}
......@@ -182,12 +182,12 @@
"rm -r samtools-1.9 samtools-1.9.tar.bz2"
]
},
citations: [
citations: {
bowtie: [
"Langmead, B., Trapnell, C., Pop, M. et al. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biol 10, R25 (2009). https://doi.org/10.1186/gb-2009-10-3-r25"
],
samtools: [
"Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue 16, 15 August 2009, Pages 2078–2079, https://doi.org/10.1093/bioinformatics/btp352"
]
]
}
}
\ No newline at end of file
......@@ -134,12 +134,12 @@
"rm -r samtools-1.9 samtools-1.9.tar.bz2"
]
},
citations: [
citations: {
bowtie2: [
"Langmead, B., Salzberg, S. Fast gapped-read alignment with Bowtie 2. Nat Methods 9, 357–359 (2012). https://doi.org/10.1038/nmeth.1923"
],
samtools: [
"Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue 16, 15 August 2009, Pages 2078–2079, https://doi.org/10.1093/bioinformatics/btp352"
]
]
}
}
\ No newline at end of file
......@@ -108,12 +108,12 @@
"rm -r samtools-1.9 samtools-1.9.tar.bz2"
]
},
citations: [
citations: {
bwa-mem2: [
"Vasimuddin Md, Sanchit Misra, Heng Li, Srinivas Aluru. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. IEEE Parallel and Distributed Processing Symposium (IPDPS), 2019"
],
samtools: [
"Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue 16, 15 August 2009, Pages 2078–2079, https://doi.org/10.1093/bioinformatics/btp352"
]
]
}
}
\ No newline at end of file
......@@ -146,7 +146,7 @@
"rm -r samtools-1.9 samtools-1.9.tar.bz2"
]
},
citations: [
citations: {
bwa: [
"Heng Li, Richard Durbin, Fast and accurate short read alignment with Burrows–Wheeler transform, Bioinformatics, Volume 25, Issue 14, 15 July 2009, Pages 1754–1760, https://doi.org/10.1093/bioinformatics/btp324"
],
......@@ -156,5 +156,5 @@
samtools: [
"Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue 16, 15 August 2009, Pages 2078–2079, https://doi.org/10.1093/bioinformatics/btp352"
]
]
}
}
\ No newline at end of file
......@@ -64,9 +64,9 @@
"cd .. && rm -r stacks-2.5 stacks-2.5.tar.gz"
]
},
citations: [
citations: {
stacks: [
" Rochette, NC, Rivera‐Colón, AG, Catchen, JM. Stacks 2: Analytical methods for paired‐end sequencing improve RADseq‐based population genomics. Mol Ecol. 2019; 28: 4737– 4754. https://doi.org/10.1111/mec.15253"
]
]
}
}
......@@ -128,12 +128,12 @@
"apt-get install -y pigz"
]
},
citations: [
citations: {
cutadapt: [
"Martin, M. (2011). Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, 17(1), pp. 10-12. doi:https://doi.org/10.14806/ej.17.1.200"
],
pigz: [
"Copyright(C) 2007-2017 MarkAdler <madler@alumni.caltech.edu>"
]
]
}
}
\ No newline at end of file
......@@ -174,7 +174,7 @@
type: directory
}
],
citations: [
citations: {
dada2: [
"Callahan BJ, McMurdie PJ, Rosen MJ, Han AW, Johnson AJ, Holmes SP. DADA2: High-resolution sample inference from Illumina amplicon data. Nat Methods. 2016;13(7):581–583. doi:10.1038/nmeth.3869"
],
......@@ -188,5 +188,5 @@
"Ondov BD, Bergman NH, and Phillippy AM. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics. 2011 Sep 30; 12(1):385"
],
]
}
}
......@@ -141,12 +141,12 @@
"python3 setup.py install"
]
},
citations: [
citations: {
deepvariant: [
"A universal SNP and small-indel variant caller using deep neural networks. Nature Biotechnology 36, 983–987 (2018). Ryan Poplin, Pi-Chuan Chang, David Alexander, Scott Schwartz, Thomas Colthurst, Alexander Ku, Dan Newburger, Jojo Dijamco, Nam Nguyen, Pegah T. Afshar, Sam S. Gross, Lizzie Dorfman, Cory Y. McLean, Mark A. DePristo, doi: https://doi.org/10.1038/nbt.4235"
],
samtools: [
"Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue 16, 15 August 2009, Pages 2078–2079, https://doi.org/10.1093/bioinformatics/btp352"
]
]
}
}
\ No newline at end of file
......@@ -76,7 +76,7 @@
pheatmap: ['Rscript -e ''install.packages("pheatmap",Ncpus=8, clean=TRUE);library("pheatmap")''']
},
script: deseq2.script.R,
citations: [
citations: {
DESeq2: [
"Love, M.I., Huber, W. & Anders, S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol 15, 550 (2014). https://doi.org/10.1186/s13059-014-0550-8"
],
......@@ -89,5 +89,5 @@
pheatmap: [
"Kolde, R. (2012). Pheatmap: pretty heatmaps. R package version, 61, 617"
]
]
}
}
......@@ -81,7 +81,7 @@
pheatmap: ['Rscript -e ''install.packages("pheatmap",Ncpus=8, clean=TRUE);library("pheatmap"))''']
},
script: edger.script.R,
citations: [
citations: {
edger: [
"Robinson MD, McCarthy DJ, Smyth GK (2010). edgeR: a Bioconductor package for differential expression analysis of digital gene expression data. Bioinformatics, 26(1), 139-140. doi: 10.1093/bioinformatics/btp616"
],
......@@ -97,5 +97,5 @@
pheatmap: [
"Kolde, R. (2012). Pheatmap: pretty heatmaps. R package version, 61, 617"
]
]
}
}
......@@ -176,9 +176,9 @@
"rm -r fastp-0.20.0 v0.20.0.tar.gz "
]
},
citations: [
citations: {
fastp: [
"Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 01 September 2018, Pages i884–i890, https://doi.org/10.1093/bioinformatics/bty560"
]
]
}
}
......@@ -44,9 +44,9 @@
install: {
fastqc: ["apt install -y fastqc=0.11.5+dfsg-6"]
},
citations: [
citations: {
fastqc: [
"Andrews, S. (2010). FASTQC. A quality control tool for high throughput sequence data"
]
]
}
}
......@@ -37,9 +37,9 @@
"chmod +x freebayes",
],
},
citations: [
citations: {
freebayes: [
"Garrison, E., & Marth, G. (2012). Haplotype-based variant detection from short-read sequencing. arXiv preprint arXiv:1207.3907."
]
]
}
}
......@@ -55,9 +55,9 @@
"apt install -y openjdk-8-jre"
]
},
citations: [
citations: {
gatk: [
"McKenna, A., Hanna, M., Banks, E., Sivachenko, A., Cibulskis, K., Kernytsky, A., Garimella, K., Altshuler, D., Gabriel, S., Daly, M., & DePristo, M. A. (2010). The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data. Genome Research, 20(9), 1297‑1303. https://doi.org/10.1101/gr.107524.110 "
],
]
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment