Commit f4ecac1d authored by mmassaviol's avatar mmassaviol
Browse files

Update variant calling workflow

parent f64be80a
......@@ -53,5 +53,5 @@ then
echo Results were written to : $2
echo " "
else
echo Failed to run the docker container !!
echo Failed to run the docker container
fi
......@@ -93,5 +93,5 @@ then
echo " "
echo To start a Bash session inside the container : docker exec -it $CONTAINER_ID /bin/bash
else
echo Failed to run the docker container !!
echo Failed to run the docker container
fi
......@@ -114,5 +114,5 @@ then
echo " "
echo XX étant le nombre de coeurs qui seront utilisés par le workflow.
else
echo Failed to run the docker container !!
echo Failed to run the docker container
fi
......@@ -77,5 +77,5 @@ then
echo " "
echo To start a Bash session inside the container : docker exec -it $IMG_NAME /bin/bash
else
echo Failed to run the docker container !!
echo Failed to run the docker container
fi
This diff is collapsed.
......@@ -17,40 +17,40 @@ base_tools:
and deployment. Linux J. 2014, 239, Article 2 (March 2014), 1 pages.'
fastp:
fastp:
- "Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one\
\ FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 01 September 2018,\
\ Pages i884\u2013i890, https://doi.org/10.1093/bioinformatics/bty560"
- 'Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one
FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 01 September 2018, Pages
i884-i890, https://doi.org/10.1093/bioinformatics/bty560'
bwa:
bwa:
- "Heng Li, Richard Durbin, Fast and accurate short read alignment with Burrows\u2013\
Wheeler transform, Bioinformatics, Volume 25, Issue 14, 15 July 2009, Pages 1754\u2013\
1760, https://doi.org/10.1093/bioinformatics/btp324"
- Heng Li, Richard Durbin, Fast and accurate short read alignment with Burrows-Wheeler
transform, Bioinformatics, Volume 25, Issue 14, 15 July 2009, Pages 1754-1760,
https://doi.org/10.1093/bioinformatics/btp324
bwa-mem:
- Li H. (2013) Aligning sequence reads, clone sequences and assembly contigs with
BWA-MEM. arXiv:1303.3997v1 [q-bio.GN]
samtools:
- "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor\
\ Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing\
\ Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume\
\ 25, Issue 16, 15 August 2009, Pages 2078\u20132079, https://doi.org/10.1093/bioinformatics/btp352"
- Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor
Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup,
The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue
16, 15 August 2009, Pages 2078-2079, https://doi.org/10.1093/bioinformatics/btp352
bowtie:
bowtie:
- Langmead, B., Trapnell, C., Pop, M. et al. Ultrafast and memory-efficient alignment
of short DNA sequences to the human genome. Genome Biol 10, R25 (2009). https://doi.org/10.1186/gb-2009-10-3-r25
samtools:
- "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor\
\ Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing\
\ Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume\
\ 25, Issue 16, 15 August 2009, Pages 2078\u20132079, https://doi.org/10.1093/bioinformatics/btp352"
- Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor
Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup,
The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue
16, 15 August 2009, Pages 2078-2079, https://doi.org/10.1093/bioinformatics/btp352
Picard_MarkDuplicates:
picard_tools:
- Picard Toolkit. 2019. Broad Institute, GitHub Repository. http://broadinstitute.github.io/picard/;
Broad Institute
samtools:
- "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor\
\ Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing\
\ Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume\
\ 25, Issue 16, 15 August 2009, Pages 2078\u20132079, https://doi.org/10.1093/bioinformatics/btp352"
- Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor
Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup,
The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue
16, 15 August 2009, Pages 2078-2079, https://doi.org/10.1093/bioinformatics/btp352
gatk_prepare_fasta:
picard_tools:
- Picard Toolkit. 2019. Broad Institute, GitHub Repository. http://broadinstitute.github.io/picard/;
......@@ -73,42 +73,38 @@ gatk_haplotype_caller:
\ The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation\
\ DNA sequencing data. Genome Research, 20(9), 1297\u20111303. https://doi.org/10.1101/gr.107524.110 "
bcftools:
- "Heng Li, A statistical framework for SNP calling, mutation discovery, association\
\ mapping and population genetical parameter estimation from sequencing data,\
\ Bioinformatics, Volume 27, Issue 21, 1 November 2011, Pages 2987\u20132993,\
\ https://doi.org/10.1093/bioinformatics/btr509"
- Heng Li, A statistical framework for SNP calling, mutation discovery, association
mapping and population genetical parameter estimation from sequencing data, Bioinformatics,
Volume 27, Issue 21, 1 November 2011, Pages 2987-2993, https://doi.org/10.1093/bioinformatics/btr509
samtools:
- "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor\
\ Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing\
\ Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume\
\ 25, Issue 16, 15 August 2009, Pages 2078\u20132079, https://doi.org/10.1093/bioinformatics/btp352"
bcftools_mpileup:
- Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor
Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup,
The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue
16, 15 August 2009, Pages 2078-2079, https://doi.org/10.1093/bioinformatics/btp352
bcftools_mpileup_and_call:
bcftools:
- "Heng Li, A statistical framework for SNP calling, mutation discovery, association\
\ mapping and population genetical parameter estimation from sequencing data,\
\ Bioinformatics, Volume 27, Issue 21, 1 November 2011, Pages 2987\u20132993,\
\ https://doi.org/10.1093/bioinformatics/btr509"
- Heng Li, A statistical framework for SNP calling, mutation discovery, association
mapping and population genetical parameter estimation from sequencing data, Bioinformatics,
Volume 27, Issue 21, 1 November 2011, Pages 2987-2993, https://doi.org/10.1093/bioinformatics/btr509
tabix:
- "Heng Li, Tabix: fast retrieval of sequence features from generic TAB-delimited\
\ files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718\u2013719,\
\ https://doi.org/10.1093/bioinformatics/btq671"
- 'Heng Li, Tabix: fast retrieval of sequence features from generic TAB-delimited
files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718-719, https://doi.org/10.1093/bioinformatics/btq671'
deep_variant:
deepvariant:
- "A universal SNP and small-indel variant caller using deep neural networks. Nature\
\ Biotechnology 36, 983\u2013987 (2018). Ryan Poplin, Pi-Chuan Chang, David Alexander,\
\ Scott Schwartz, Thomas Colthurst, Alexander Ku, Dan Newburger, Jojo Dijamco,\
\ Nam Nguyen, Pegah T. Afshar, Sam S. Gross, Lizzie Dorfman, Cory Y. McLean, Mark\
\ A. DePristo, doi: https://doi.org/10.1038/nbt.4235"
- 'A universal SNP and small-indel variant caller using deep neural networks. Nature
Biotechnology 36, 983-987 (2018). Ryan Poplin, Pi-Chuan Chang, David Alexander,
Scott Schwartz, Thomas Colthurst, Alexander Ku, Dan Newburger, Jojo Dijamco, Nam
Nguyen, Pegah T. Afshar, Sam S. Gross, Lizzie Dorfman, Cory Y. McLean, Mark A.
DePristo, doi: https://doi.org/10.1038/nbt.4235'
samtools:
- "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor\
\ Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing\
\ Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume\
\ 25, Issue 16, 15 August 2009, Pages 2078\u20132079, https://doi.org/10.1093/bioinformatics/btp352"
- Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor
Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup,
The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue
16, 15 August 2009, Pages 2078-2079, https://doi.org/10.1093/bioinformatics/btp352
freebayes:
freebayes:
- Garrison, E., & Marth, G. (2012). Haplotype-based variant detection from short-read
sequencing. arXiv preprint arXiv:1207.3907.
tabix:
- "Heng Li, Tabix: fast retrieval of sequence features from generic TAB-delimited\
\ files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718\u2013719,\
\ https://doi.org/10.1093/bioinformatics/btq671"
- 'Heng Li, Tabix: fast retrieval of sequence features from generic TAB-delimited
files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718-719, https://doi.org/10.1093/bioinformatics/btq671'
......@@ -121,7 +121,7 @@ steps:
title: Variant calling
tools:
- gatk_haplotype_caller
- bcftools_mpileup
- bcftools_mpileup_and_call
- deep_variant
- freebayes
default: gatk_haplotype_caller
......@@ -301,16 +301,16 @@ params_info:
type: input_file
label: Path to reference genome fasta file
variant_calling__bcftools_mpileup_and_call_threads:
tool: bcftools_mpileup
tool: bcftools_mpileup_and_call
rule: variant_calling_bcftools_mpileup_and_call
type: numeric
label: Threads to use
variant_calling__bcftools_mpileup_and_call_genome_fasta_select:
tool: bcftools_mpileup
tool: bcftools_mpileup_and_call
rule: variant_calling_bcftools_mpileup_and_call
type: select
variant_calling__bcftools_mpileup_and_call_genome_fasta:
tool: bcftools_mpileup
tool: bcftools_mpileup_and_call
rule: variant_calling_bcftools_mpileup_and_call
type: input_file
label: Path to reference genome fasta file
......@@ -456,41 +456,41 @@ outputs:
variant_calling__gatk_haplotype_caller:
- name: vcf
type: vcf
file: variants.vcf.gz
file: '{sample}_variants.vcf.gz'
description: Variants file
- name: stats
type: txt
file: vcf_stats.txt
file: '{sample}_vcf_stats.txt'
description: VCF stats
variant_calling__bcftools_mpileup:
variant_calling__bcftools_mpileup_and_call:
variant_calling__bcftools_mpileup_and_call:
- name: vcf
type: vcf
file: variants.vcf.gz
file: '{sample}_variants.vcf.gz'
description: Variant file (vcf)
- name: stats
type: txt
file: vcf_stats.txt
file: '{sample}_vcf_stats.txt'
description: VCF file stats
variant_calling__deep_variant:
variant_calling__deep_variant:
- name: vcf
type: vcf
file: output.vcf.gz
file: '{sample}_output.vcf.gz'
description: Variant file
- name: gvcf
type: gvcf
file: output.g.vcf.gz
file: '{sample}_output.g.vcf.gz'
description: Genomic variant file
- name: html_report
type: html
file: output.visual_report.html
file: '{sample}_output.visual_report.html'
description: Visual html report
variant_calling__freebayes:
variant_calling__freebayes:
- name: vcf
type: vcf
file: output.vcf.gz
file: '{sample}_output.vcf.gz'
description: Variant file
multiqc:
fastp: fastp
......@@ -501,7 +501,7 @@ multiqc:
gatk_prepare_fasta: custom
gatk_IndelRealigner: custom
gatk_haplotype_caller: bcftools
bcftools_mpileup: bcftools
bcftools_mpileup_and_call: bcftools
deep_variant: custom
freebayes: custom
stop_cases: {}
Bootstrap: localimage
From: ../base.sif
%environment
export PATH=/opt/biotools/bin:$PATH
export ROOTSYS=/opt/biotools/root
export LD_LIBRARY_PATH='$LD_LIBRARY_PATH:$ROOTSYS/lib'
export LANG=en_US.UTF-8
export LANGUAGE=en_US:en
export LC_ALL=en_US.UTF-8
%labels
Author YourName
Version v0.0.1
build_date 2018 déc. 07
%runscript
echo "This container contains two apps (UI and Snakemake)."
echo "UI is a user interface to set up the workflow and launch it."
echo "Snakemake let you provide your configfile and other parameters to the snakemake command and launch it."
echo "To get help for an app :\nsingularity help --app appName this_container.sif"
echo "To run an app :\nsingularity run --app appName this_container.sif"
%apprun UI
exec Rscript -e "shiny::runApp('/sagApp/app.R',host='$1',port=$2)"
%apphelp UI
To run the UI app you should bind data and results directories like in the following example.
You must also provide the host address and port where the shiny app will be launched
exemple : singularity run --app UI -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif 127.0.0.1 1234
%apprun Snakemake
configfile=$1
cores=$2
shift
shift
exec snakemake -s /workflow/Snakefile all --configfile $configfile --cores $cores $@
%apphelp Snakemake
To run the Snakemake app you should bind data and results directories like in the following example.
You must also provide the configfile and the number of cores provided to snakemake command (you can add other parameters after these two)
exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif myconfig.yml 16 otherparams
%apprun getConfigfile
exec cp /workflow/params.total.yml ./params.yml
%apphelp getConfigfile
To run the getConfigfile app you dont need to bind directories. This app will only copy the default parameters file from the container to your local disk.
exemple : singularity run --app getConfigfile this_container.sif
%apprun getSamples
exec python3 /workflow/get_samples.py $1 $2
%apphelp getSamples
To run the getSamples app you need to bind the data directory. This app will give you the list of samples detected in a given directory and their file suffix.
exemple : singularity run --app getSamples -B /path/to/data/directory:/Data this_container.sif /Data PE
%help
This container contains four apps (UI, Snakemake, getConfigfile and getSamples).
* UI is a user interface to set up the workflow and launch it.
* Snakemake let you provide your configfile and other parameters to the snakemake command and launch it.
* getConfigfile gives you a copy of a default parameters file to fill and use with the Snakemake app.
* getSamples gives you the list of samples detected in a given directory and their file suffix (usefull for filling samples and sample_suffix in parameters file).
To get help for an app :
singularity help --app appName this_container.sif
To run an app :
singularity run --app appName this_container.sif
%files
./files /workflow
./sagApp /sagApp
%post
mkdir /Data
mkdir /Results
apt-get update -y
wget https://github.com/OpenGene/fastp/archive/v0.20.0.tar.gz
tar -xvzf v0.20.0.tar.gz
cd fastp-0.20.0
make
mv fastp /opt/biotools/bin/fastp
cd ..
rm -r fastp-0.20.0 v0.20.0.tar.gz
cd /opt/biotools
wget https://github.com/lh3/bwa/releases/download/v0.7.17/bwa-0.7.17.tar.bz2
tar -xvjf bwa-0.7.17.tar.bz2
cd bwa-0.7.17
make -j 10
mv bwa ../bin/
cd ..
rm -r bwa-0.7.17 bwa-0.7.17.tar.bz2
cd /opt/biotools
wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2
tar -xvjf samtools-1.9.tar.bz2
cd samtools-1.9
./configure && make
cd ..
mv samtools-1.9/samtools bin/samtools
rm -r samtools-1.9 samtools-1.9.tar.bz2
wget -O bowtie-1.2.3-linux-x86_64.zip https://sourceforge.net/projects/bowtie-bio/files/bowtie/1.2.3/bowtie-1.2.3-linux-x86_64.zip/download
unzip bowtie-1.2.3-linux-x86_64.zip
cp bowtie-1.2.3-linux-x86_64/bowtie* /usr/bin
rm -rf bowtie-1.2.3*
cd /opt/biotools/bin
wget https://github.com/broadinstitute/picard/releases/download/2.20.8/picard.jar
cd /opt/biotools
wget -O GenomeAnalysisTK-3.6-0.tar.bz2 'https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.6-0-g89b7209.tar.bz2'
mkdir gatk3
tar -C gatk3 -xjf GenomeAnalysisTK-3.6-0.tar.bz2
rm GenomeAnalysisTK-3.6-0.tar.bz2
rm -r gatk3/resources
cd /opt/biotools
wget https://github.com/broadinstitute/gatk/releases/download/4.1.1.0/gatk-4.1.1.0.zip
unzip gatk-4.1.1.0.zip
cd bin
ln -s /opt/biotools/gatk-4.1.1.0/gatk gatk
cd /opt/biotools
wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2
tar -xvjf bcftools-1.9.tar.bz2
cd bcftools-1.9
./configure --prefix=/opt/biotools
make -j 10
make install
mv bcftools /opt/biotools/bin/
cd .. && rm -r bcftools-1.9.tar.bz2 bcftools-1.9
apt install -y tabix
cd /opt/biotools
wget https://github.com/google/deepvariant/archive/v0.9.0.tar.gz
tar -xvzf v0.9.0.tar.gz
mv deepvariant-0.9.0 /opt/deepvariant
sed -i "/sudo -H apt-get -qq -y install python-dev python-pip python-wheel > \/dev\/null/d" /opt/deepvariant/run-prereq.sh
sed -i "/python -m pip install .* --upgrade --force-reinstall pip/d" /opt/deepvariant/run-prereq.sh
cd /opt/deepvariant
apt-get install -y python-dev python-pip
pip install virtualenv
virtualenv -p /usr/bin/python2.7 deepvariant_env
/bin/bash -c "source deepvariant_env/bin/activate && pip install pip==19.3.1"
/bin/bash -c "source deepvariant_env/bin/activate && ./build-prereq.sh && PATH="${HOME}/bin:${PATH}" ./build_release_binaries.sh"
cd /opt
cp /opt/deepvariant/bazel-genfiles/licenses.zip .
mkdir /opt/deepvariant/bin/
cd /opt/deepvariant/bin/
cp /opt/deepvariant/run-prereq.sh .
cp /opt/deepvariant/settings.sh .
cp /opt/deepvariant/bazel-bin/deepvariant/make_examples.zip .
cp /opt/deepvariant/bazel-bin/deepvariant/call_variants.zip .
cp /opt/deepvariant/bazel-bin/deepvariant/postprocess_variants.zip .
cp /opt/deepvariant/bazel-bin/deepvariant/model_train.zip .
cp /opt/deepvariant/bazel-bin/deepvariant/model_eval.zip .
cp /opt/deepvariant/scripts/run_deepvariant.py .
/bin/bash -c "source /opt/deepvariant/deepvariant_env/bin/activate && ./run-prereq.sh"
BASH_HEADER='#!/bin/bash'
printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/make_examples.zip "$@"' > /opt/deepvariant/bin/make_examples
printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/call_variants.zip "$@"' > /opt/deepvariant/bin/call_variants
printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/postprocess_variants.zip "$@"' > /opt/deepvariant/bin/postprocess_variants
printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/model_train.zip "$@"' > /opt/deepvariant/bin/model_train
printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/model_eval.zip "$@"' > /opt/deepvariant/bin/model_eval
printf "%s\n%s\n" "${BASH_HEADER}" 'python -u /opt/deepvariant/bin/run_deepvariant.py "$@"' > /opt/deepvariant/bin/run_deepvariant
printf "%s\n%s\n" "${BASH_HEADER}" 'python -u /opt/deepvariant/bin/vcf_stats_report.py "$@"' > /opt/deepvariant/bin/vcf_stats_report
chmod +x /opt/deepvariant/bin/make_examples /opt/deepvariant/bin/call_variants /opt/deepvariant/bin/postprocess_variants /opt/deepvariant/bin/model_train /opt/deepvariant/bin/model_eval /opt/deepvariant/bin/run_deepvariant
mkdir /opt/bin
cd /opt/bin
wget https://github.com/dnanexus-rnd/GLnexus/releases/download/v1.2.1/glnexus_cli
wget https://raw.githubusercontent.com/dnanexus-rnd/GLnexus/v1.2.1/LICENSE -O glnexus_cli.LICENSE
chmod +rx /opt/bin/glnexus_cli
mkdir /opt/models
mkdir /opt/models/wgs
cd /opt/models/wgs
wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wgs_standard/model.ckpt.data-00000-of-00001
wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wgs_standard/model.ckpt.index
wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wgs_standard/model.ckpt.meta
chmod +r /opt/models/wgs/model.ckpt*
mkdir /opt/models/wes
cd /opt/models/wes
wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wes_standard/model.ckpt.data-00000-of-00001
wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wes_standard/model.ckpt.index
wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wes_standard/model.ckpt.meta
chmod +r /opt/models/wes/model.ckpt*
apt-get -y update
apt-get install -y parallel
/bin/bash -c "source /opt/deepvariant/deepvariant_env/bin/activate && python -m pip install pip==9.0.3 && pip install absl-py==0.7.1"
cd /opt/biotools
git clone https://gitlab.mbb.univ-montp2.fr/mmassaviol/mbb_mqc_plugin.git
cd mbb_mqc_plugin
python3 setup.py install
mkdir -p /share/apps/bin
mkdir -p /share/apps/lib
mkdir -p /share/apps/gridengine
mkdir -p /share/bio
mkdir -p /opt/gridengine
mkdir -p /export/scrach
mkdir -p /usr/lib64
ln -s /bin/bash /bin/mbb_bash
ln -s /bin/bash /bin/isem_bash
/usr/sbin/groupadd --system --gid 400 sge
/usr/sbin/useradd --system --uid 400 --gid 400 -c GridEngine --shell /bin/true --home /opt/gridengine sge
\ No newline at end of file
import oyaml as yaml
import shutil
import cerberus
def read_yaml(filepath):
try:
with open(filepath, 'r') as file:
with open(filepath, 'rb') as file:
data = yaml.load(file, Loader=yaml.FullLoader)
#data = yaml.load(file)
return data
except IOError as e:
print("Error in file opening:", e)
......@@ -25,4 +27,16 @@ def copy_dir(src,dst):
shutil.copytree(src,dst)
except FileExistsError:
shutil.rmtree(dst, ignore_errors=True)
shutil.copytree(src,dst)
\ No newline at end of file
shutil.copytree(src,dst)
def validate_yaml(document_yaml, schema_yaml):
document = read_yaml(document_yaml)
schema = read_yaml(schema_yaml)
v = cerberus.Validator(schema)
res = v.validate(document, schema)
if (not res):
exit(v.errors)
else:
return res
\ No newline at end of file
......@@ -9,6 +9,6 @@ Picard_MarkDuplicates: 2.20.8
gatk_prepare_fasta: ''
gatk_IndelRealigner: 3.6-0
gatk_haplotype_caller: 3.6-0
bcftools_mpileup: '1.9'
bcftools_mpileup_and_call: '1.9'
deep_variant: 0.9.0
freebayes: 1.3.1
{'App': {'project': None, 'general': {'title': 'Variant_calling', 'skin': 'blue', 'menu_width': 230}, 'pages': [{'icon': 'pencil', 'label': 'Global parameters', 'name': 'global_params', 'boxes': [{'name': 'global_params', 'title': 'Global parameters :', 'status': 'success', 'content': [{'name': 'results_dir', 'type': 'output_dir', 'value': '/Results', 'label': 'Results directory: ', 'volumes': [{'Results': '/Results'}]}, {'name': 'genome_fasta', 'type': 'input_file', 'value': '', 'label': 'Path to reference genome fasta file'}, {'name': 'sample_dir', 'type': 'input_dir', 'value': '/Data', 'label': 'Directory containing the fastq files: ', 'volumes': [{'Data': '/Data'}, {'Results': '/Results'}]}, {'name': 'SeOrPe', 'type': 'radio', 'value': 'PE', 'choices': [{'Single end': 'SE'}, {'Paired end': 'PE'}], 'label': 'Single end reads (SE) or Paired end reads (PE): '}, {'name': 'memo', 'type': 'textArea', 'value': '', 'label': 'Text area for the user'}]}]}, {'icon': 'pencil', 'label': 'Preprocessing', 'name': 'preprocess', 'default': 'fastp', 'boxes': [{'name': 'fastp', 'title': 'fastp', 'status': 'success', 'content': [{'name': 'preprocess__fastp_threads', 'prefix': '--thread', 'type': 'numeric', 'value': 4, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'Number of threads to use'}, {'name': 'preprocess__fastp_complexity_threshold', 'prefix': '--complexity_threshold', 'type': 'numeric', 'value': 30, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'The threshold for low complexity filter (0~100)'}, {'name': 'preprocess__fastp_report_title', 'prefix': '--report_title', 'type': 'text', 'value': 'fastp report', 'label': 'fastp report title'}, {'name': 'preprocess__fastp_adapter_sequence', 'prefix': '--adapter_sequence', 'type': 'text', 'value': '', 'label': 'The adapter for read1. For SE data, if not specified, the adapter will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped.'}, {'name': 'preprocess__fastp_adapter_sequence_R2_PE', 'prefix': '--adapter_sequence_r2', 'type': 'text', 'value': '', 'label': 'the adapter for read2 (PE data only). This is used if R1/R2 are found not overlapped. If not specified, it will be the same as <adapter_sequence>'}, {'name': 'preprocess__fastp_P', 'prefix': '-P', 'type': 'numeric', 'value': 20, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'One in (--overrepresentation_sampling) reads will be computed for overrepresentation analysis (1~10000), smaller is slower.'}, {'name': 'preprocess__fastp_correction_PE', 'type': 'checkbox', 'value': True, 'label': 'Enable base correction in overlapped regions'}, {'name': 'preprocess__fastp_low_complexity_filter', 'type': 'checkbox', 'value': True, 'label': 'Enable low complexity filter. The complexity is defined as the percentage of base that is different from its next base (base[i] != base[i+1]).'}, {'name': 'preprocess__fastp_overrepresentation_analysis', 'type': 'checkbox', 'value': True, 'label': 'enable overrepresented sequence analysis.'}, {'name': 'fastp', 'type': 'help', 'label': 'fastp: A tool designed to provide fast all-in-one preprocessing for FastQ files.'}, {'name': 'fastp', 'type': 'link', 'label': 'Website : ', 'href': 'https://github.com/OpenGene/fastp'}, {'name': 'fastp', 'type': 'link', 'label': 'Documentation : ', 'href': 'https://github.com/OpenGene/fastp'}, {'name': 'fastp', 'type': 'link', 'label': 'Paper : ', 'href': 'https://doi.org/10.1093/bioinformatics/bty560'}]}, {'name': 'null', 'title': 'null', 'status': 'success', 'content': [{'name': None, 'type': 'help', 'label': 'null: Skip this step'}]}]}, {'icon': 'pencil', 'label': 'Mapping', 'name': 'mapping', 'default': 'bwa', 'boxes': [{'name': 'bwa', 'title': 'BWA (Burrows-Wheeler Alignment Tool)', 'status': 'success', 'content': [{'name': 'mapping__bwa_index_path', 'type': 'input_dir', 'value': '', 'label': 'Path to an existing bwa index (or where to save a new one)'}, {'name': 'mapping__bwa_index_algorithm', 'type': 'radio', 'value': 'bwtsw', 'choices': [{'bwtsw': 'bwtsw'}, {'is': 'is'}], 'label': 'Algorithm for constructing BWT index (see documentation for details)'}, {'name': 'mapping__bwa_mem_threads', 'prefix': '-t', 'type': 'numeric', 'value': 4, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'Number of threads to use'}, {'name': 'mapping__bwa_mem_quality0_multimapping', 'prefix': None, 'type': 'checkbox', 'value': True, 'label': 'Put 0 as mapping quality for multimapping reads'}, {'name': 'bwa', 'type': 'help', 'label': 'BWA (Burrows-Wheeler Alignment Tool): BWA is a software package for mapping low-divergent sequences against a large reference genome.'}, {'name': 'bwa', 'type': 'link', 'label': 'Website : ', 'href': 'http://bio-bwa.sourceforge.net/'}, {'name': 'bwa', 'type': 'link', 'label': 'Documentation : ', 'href': 'http://bio-bwa.sourceforge.net/bwa.shtml'}, {'name': 'bwa', 'type': 'link', 'label': 'Paper : ', 'href': 'https://doi.org/10.1093/bioinformatics/btp324'}]}, {'name': 'bowtie', 'title': 'Bowtie', 'status': 'success', 'content': [{'name': 'mapping__bowtie_index_path', 'type': 'input_dir', 'value': '', 'label': 'Path to an existing bowtie index (or where to save a new one)'}, {'name': 'mapping__bowtie_index_threads', 'prefix': '--threads', 'type': 'numeric', 'value': 4, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'Number of threads to use to index genome'}, {'name': 'mapping__bowtie_threads', 'prefix': '--threads', 'type': 'numeric', 'value': 4, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'Number of threads to use to align reads'}, {'name': 'mapping__bowtie_minins_PE', 'prefix': '-I', 'type': 'numeric', 'value': 0, 'min': 0, 'max': 'NA', 'step': 1, 'label': 'The minimum insert size for valid paired-end alignments'}, {'name': 'mapping__bowtie_maxins_PE', 'prefix': '-X', 'type': 'numeric', 'value': 250, 'min': 0, 'max': 'NA', 'step': 1, 'label': 'The maximum insert size for valid paired-end alignments'}, {'name': 'mapping__bowtie_orientation_PE', 'type': 'radio', 'choices': [{'Not stranded': ''}, {'Forward Reverse': '--fr'}, {'Reverse Forward': '--rf'}, {'Forward Forward': '--ff'}], 'value': '', 'label': 'The upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand.'}, {'name': 'mapping__bowtie_mult_align_limit', 'prefix': '-m', 'type': 'numeric', 'value': 1, 'min': 1, 'max': 10, 'step': 1, 'label': "Suppress all alignments for a particular read or pair if more than 'x' reportable alignments exist for it"}, {'name': 'mapping__bowtie_best', 'prefix': None, 'type': 'checkbox', 'value': True, 'label': "--best : Make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum (i.e. number of mismatches, or mismatches in the seed in the case of -n mode) and in terms of the quality values at the mismatched position(s)."}, {'name': 'mapping__bowtie_strata', 'prefix': None, 'type': 'checkbox', 'value': True, 'label': "--strata : If many valid alignments exist and are reportable (e.g. are not disallowed via the -k option) and they fall into more than one alignment 'stratum', report only those alignments that fall into the best stratum. When --strata is specified, --best must also be specified."}, {'name': 'bowtie', 'type': 'help', 'label': 'Bowtie: Bowtie is an ultrafast, memory-efficient short read aligner.'}, {'name': 'bowtie', 'type': 'link', 'label': 'Website : ', 'href': 'http://bowtie-bio.sourceforge.net'}, {'name': 'bowtie', 'type': 'link', 'label': 'Documentation : ', 'href': 'http://bowtie-bio.sourceforge.net/manual.shtml'}, {'name': 'bowtie', 'type': 'link', 'label': 'Paper : ', 'href': 'https://doi.org/10.1002/0471250953.bi1107s32'}]}]}, {'icon': 'pencil', 'label': 'Mark Duplicates', 'name': 'mark_duplicates', 'default': 'Picard_MarkDuplicates', 'boxes': [{'name': 'Picard_MarkDuplicates', 'title': 'Picard Mark Duplicates', 'status': 'success', 'content': [{'name': 'mark_duplicates__Picard_MarkDuplicates_threads', 'type': 'numeric', 'value': 4, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'Number of threads to use'}, {'name': 'mark_duplicates__Picard_MarkDuplicates_remove_all_duplicates', 'type': 'checkbox', 'value': True, 'label': 'remove_all_duplicates : If true do not write duplicates to the output file instead of writing them with appropriate flags set.'}, {'name': 'mark_duplicates__Picard_MarkDuplicates_samtools_memory', 'type': 'numeric', 'value': 2, 'label': '-m parameter for samtools sort: Memory allocated per thread for samtools_sort (in Gb).'}, {'name': 'Picard_MarkDuplicates', 'type': 'help', 'label': 'Picard Mark Duplicates: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA.'}, {'name': 'Picard_MarkDuplicates', 'type': 'link', 'label': 'Website : ', 'href': 'https://broadinstitute.github.io/picard/'}, {'name': 'Picard_MarkDuplicates', 'type': 'link', 'label': 'Documentation : ', 'href': 'https://broadinstitute.github.io/picard/command-line-overview.html'}]}, {'name': 'null', 'title': 'null', 'status': 'success', 'content': [{'name': None, 'type': 'help', 'label': 'null: Skip this step'}]}]}, {'icon': 'pencil', 'label': 'Prepare fasta genome for index', 'name': 'prepare_fasta', 'default': 'gatk_prepare_fasta', 'boxes': [{'name': 'gatk_prepare_fasta', 'title': 'GATK prepare fasta', 'status': 'success', 'content': [{'name': 'gatk_prepare_fasta', 'type': 'help', 'label': 'GATK prepare fasta: Prepare fasta ref for GATK'}]}]}, {'icon': 'pencil', 'label': 'Indel Realign', 'name': 'indel_realign', 'default': 'gatk_IndelRealigner', 'boxes': [{'name': 'gatk_IndelRealigner', 'title': 'GATK Indel Realigner', 'status': 'success', 'content': [{'name': 'indel_realign__gatk_IndelRealigner_threads', 'type': 'numeric', 'value': 4, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'Number of threads to use'}, {'name': 'indel_realign__gatk_IndelRealigner_samtools_memory', 'type': 'numeric', 'value': 2, 'label': '-m parameter for samtools sort: Memory allocated per thread for samtools_sort (in Gb).'}, {'name': 'gatk_IndelRealigner', 'type': 'help', 'label': 'GATK Indel Realigner: The local realignment process is designed to consume one or more BAM files and to locally realign reads such that the number of mismatching bases is minimized across all the reads.'}, {'name': 'gatk_IndelRealigner', 'type': 'link', 'label': 'Website : ', 'href': 'https://software.broadinstitute.org/gatk/documentation/tooldocs/3.6-0/org_broadinstitute_gatk_tools_walkers_indels_IndelRealigner.php'}, {'name': 'gatk_IndelRealigner', 'type': 'link', 'label': 'Documentation : ', 'href': 'https://software.broadinstitute.org/gatk/documentation/tooldocs/3.6-0/org_broadinstitute_gatk_tools_walkers_indels_IndelRealigner.php'}]}, {'name': 'null', 'title': 'null', 'status': 'success', 'content': [{'name': None, 'type': 'help', 'label': 'null: Skip this step'}]}]}, {'icon': 'pencil', 'label': 'Variant calling', 'name': 'variant_calling', 'default': 'gatk_haplotype_caller', 'boxes': [{'name': 'gatk_haplotype_caller', 'title': 'GATK Haplotype Caller', 'status': 'success', 'content': [{'name': 'variant_calling__gatk_haplotype_caller_threads', 'type': 'numeric', 'value': 4, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'Number of threads to use'}, {'name': 'variant_calling__gatk_haplotype_caller_genome_fasta', 'type': 'input_file', 'value': '', 'label': 'Path to reference genome fasta file'}, {'name': 'gatk_haplotype_caller', 'type': 'help', 'label': 'GATK Haplotype Caller: The HaplotypeCaller is capable of calling SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region'}, {'name': 'gatk_haplotype_caller', 'type': 'link', 'label': 'Website : ', 'href': 'https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php'}, {'name': 'gatk_haplotype_caller', 'type': 'link', 'label': 'Documentation : ', 'href': 'https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php'}, {'name': 'gatk_haplotype_caller', 'type': 'link', 'label': 'Paper : ', 'href': 'https://doi.org/10.1101/gr.107524.110'}]}, {'name': 'bcftools_mpileup_and_call', 'title': 'BCFtools mpileup and call', 'status': 'success', 'content': [{'name': 'variant_calling__bcftools_mpileup_and_call_threads', 'prefix': '--threads', 'value': 4, 'min': 1, 'max': 32, 'step': 1, 'label': 'Threads to use', 'type': 'numeric'}, {'name': 'variant_calling__bcftools_mpileup_and_call_genome_fasta', 'type': 'input_file', 'value': '', 'label': 'Path to reference genome fasta file'}, {'name': 'bcftools_mpileup_and_call', 'type': 'help', 'label': 'BCFtools mpileup and call: BCFtools is a set of utilities that manipulate variant calls in the Variant Call Format (VCF) and its binary counterpart BCF.'}, {'name': 'bcftools_mpileup_and_call', 'type': 'link', 'label': 'Website : ', 'href': 'https://samtools.github.io/bcftools/'}, {'name': 'bcftools_mpileup_and_call', 'type': 'link', 'label': 'Documentation : ', 'href': 'https://samtools.github.io/bcftools/howtos/index.html'}, {'name': 'bcftools_mpileup_and_call', 'type': 'link', 'label': 'Paper : ', 'href': 'https://doi.org/10.1093/bioinformatics/btr509'}]}, {'name': 'deep_variant', 'title': 'DeepVariant', 'status': 'success', 'content': [{'name': 'variant_calling__deep_variant_genome_fasta', 'type': 'input_file', 'value': '', 'label': 'Path to reference genome fasta file'}, {'name': 'variant_calling__deep_variant_threads', 'prefix': '--threads', 'type': 'numeric', 'value': 4, 'min': 1, 'max': 'NA', 'step': 1, 'label': 'Number of threads to use'}, {'name': 'variant_calling__deep_variant_model_type', 'type': 'radio', 'value': 'WGS', 'choices': [{'WGS': 'WGS'}, {'WES': 'WES'}], 'label': 'Model type'}, {'name': 'deep_variant', 'type': 'help', 'label': 'DeepVariant: DeepVariant is an analysis pipeline that uses a deep neural network to call genetic variants from next-generation DNA sequencing data.'}, {'name': 'deep_variant', 'type': 'link', 'label': 'Website : ', 'href': 'https://github.com/google/deepvariant'}, {'name': 'deep_variant', 'type': 'link', 'label': 'Documentation : ', 'href': 'https://github.com/google/deepvariant/blob/r0.9/docs/README.md'}, {'name': 'deep_variant', 'type': 'link', 'label': 'Paper : ', 'href': 'https://doi.org/10.1038/nbt.4235'}]}, {'name': 'freebayes', 'title': 'freebayes', 'status': 'success', 'content': [{'name': 'variant_calling__freebayes_genome_fasta', 'type': 'input_file', 'value': '', 'label': 'Path to reference genome fasta file'}, {'name': 'freebayes', 'type': 'help', 'label': 'freebayes: Bayesian haplotype-based genetic polymorphism discovery and genotyping'}, {'name': 'freebayes', 'type': 'link', 'label': 'Website : ', 'href': 'https://github.com/ekg/freebayes'}, {'name': 'freebayes', 'type': 'link', 'label': 'Documentation : ', 'href': 'https://github.com/ekg/freebayes'}, {'name': 'freebayes', 'type': 'link', 'label': 'Paper : ', 'href': 'https://doi.org/arxiv.org/abs/1207.3907'}]}]}], 'run': {'shiny_button': {'name': 'RunPipeline', 'type': 'button', 'icon': 'play', 'class': 'btn btn-info', 'label': 'Run pipeline'}, 'program': 'snakemake', 'options': [{'name': '-s', 'type': 'value', 'value': '/workflow/Snakefile'}, {'name': '--configfile', 'type': 'value', 'value': '/Results/params.yml'}, {'name': '-d', 'type': 'value', 'value': '/Results'}, {'name': '--cores', 'type': 'shiny', 'value': 'cores'}]}, 'RULEGRAPH': None, 'Report': None, 'download': {'shiny_button': {'name': 'DownloadParams', 'type': 'button', 'class': 'btn btn-light', 'label': 'Download config file'}}}}
tabDE = fluidPage(
box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE, solidHeader = TRUE,
selectInput("selectDE", label = "Select the tool to use : ", selected = "edger", choices = list("edger" = "edger", "deseq2" = "deseq2")),
conditionalPanel(condition = "input.selectDE == 'edger'",box(title = "edgeR", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE,
numericInput("edger_threads", label = "Number of threads to use", min = 1, max = NA, step = 1, width = "auto", value = 4),
checkboxInput("edger_tx2gene", label = "Aggregate transcripts counts to gene counts : ", value = FALSE),
box(title = "Annotation file (gtf or gff) : ", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE,
selectInput("edger_annotations_select",label = "Select where to find the file", selected = "server", choices = c("On server" = "server", "On your machine" = "local")),
conditionalPanel(condition = "input.edger_annotations_select == 'server'",
tags$label("Annotation file (gtf or gff) : "),
fluidRow(
column(4,shinyFilesButton("shinyfiles_edger_annotations",label="Please select a file", title="Annotation file (gtf or gff) : ", multiple=FALSE)),
column(8,textInput("edger_annotations_server",label=NULL,value=""))
)
),
conditionalPanel(condition = "input.edger_annotations_select == 'local'",
fileInput("edger_annotations_local",label = "Annotation file (gtf or gff) : ")
)
)
,