Commit d4a30564 authored by mmassaviol's avatar mmassaviol
Browse files

Add variantcalling_benchmark_frombam

parent 9ff5c6bb
FROM mbbteam/mbb_workflows_base:latest as alltools
RUN cd /opt/biotools \
&& wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 \
&& tar -xvjf samtools-1.9.tar.bz2 \
&& cd samtools-1.9 \
&& ./configure && make \
&& cd .. \
&& mv samtools-1.9/samtools bin/samtools \
&& rm -r samtools-1.9 samtools-1.9.tar.bz2
RUN cd /opt/biotools/bin \
&& wget https://github.com/broadinstitute/picard/releases/download/2.20.8/picard.jar
RUN apt -y update && apt install -y openjdk-8-jre
RUN cd /opt/biotools \
&& wget -O GenomeAnalysisTK-3.6-0.tar.bz2 'https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.6-0-g89b7209.tar.bz2' \
&& mkdir gatk3 \
&& tar -C gatk3 -xjf GenomeAnalysisTK-3.6-0.tar.bz2 \
&& rm GenomeAnalysisTK-3.6-0.tar.bz2 \
&& rm -r gatk3/resources
RUN cd /opt/biotools \
&& wget https://github.com/broadinstitute/gatk/releases/download/4.1.1.0/gatk-4.1.1.0.zip \
&& unzip gatk-4.1.1.0.zip \
&& cd bin \
&& ln -s /opt/biotools/gatk-4.1.1.0/gatk gatk
RUN cd /opt/biotools \
&& wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2 \
&& tar -xvjf bcftools-1.9.tar.bz2 \
&& cd bcftools-1.9 \
&& ./configure --prefix=/opt/biotools \
&& make -j 10 \
&& make install \
&& mv bcftools /opt/biotools/bin/ \
&& cd .. && rm -r bcftools-1.9.tar.bz2 bcftools-1.9
RUN apt -y update && apt install -y tabix
RUN cd /opt/biotools \
&& wget https://github.com/google/deepvariant/archive/v0.9.0.tar.gz \
&& tar -xvzf v0.9.0.tar.gz \
&& mv deepvariant-0.9.0 /opt/deepvariant \
&& sed -i "/sudo -H apt-get -qq -y install python-dev python-pip python-wheel > \/dev\/null/d" /opt/deepvariant/run-prereq.sh \
&& sed -i "/python -m pip install .* --upgrade --force-reinstall pip/d" /opt/deepvariant/run-prereq.sh
RUN cd /opt/deepvariant \
&& apt-get install -y python-dev python-pip \
&& pip install virtualenv \
&& virtualenv -p /usr/bin/python2.7 deepvariant_env \
&& /bin/bash -c "source deepvariant_env/bin/activate && pip install pip==19.3.1"
RUN cd /opt/deepvariant \
&& /bin/bash -c "source deepvariant_env/bin/activate && PYTHONPATH=/opt/deepvariant/deepvariant_env/lib/python2.7 ./build-prereq.sh" \
&& /bin/bash -c "source deepvariant_env/bin/activate && PATH="${HOME}/bin:${PATH}" ./build_release_binaries.sh"
RUN cd /opt \
&& cp /opt/deepvariant/bazel-genfiles/licenses.zip . \
&& mkdir /opt/deepvariant/bin/ \
&& cd /opt/deepvariant/bin/ \
&& cp /opt/deepvariant/run-prereq.sh . \
&& cp /opt/deepvariant/settings.sh . \
&& cp /opt/deepvariant/bazel-bin/deepvariant/make_examples.zip . \
&& cp /opt/deepvariant/bazel-bin/deepvariant/call_variants.zip . \
&& cp /opt/deepvariant/bazel-bin/deepvariant/postprocess_variants.zip . \
&& cp /opt/deepvariant/bazel-bin/deepvariant/model_train.zip . \
&& cp /opt/deepvariant/bazel-bin/deepvariant/model_eval.zip . \
&& cp /opt/deepvariant/scripts/run_deepvariant.py . \
&& /bin/bash -c "source /opt/deepvariant/deepvariant_env/bin/activate && ./run-prereq.sh"
RUN BASH_HEADER='#!/bin/bash' \
&& printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/make_examples.zip "$@"' > /opt/deepvariant/bin/make_examples \
&& printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/call_variants.zip "$@"' > /opt/deepvariant/bin/call_variants \
&& printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/postprocess_variants.zip "$@"' > /opt/deepvariant/bin/postprocess_variants \
&& printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/model_train.zip "$@"' > /opt/deepvariant/bin/model_train \
&& printf "%s\n%s\n" "${BASH_HEADER}" 'python /opt/deepvariant/bin/model_eval.zip "$@"' > /opt/deepvariant/bin/model_eval \
&& printf "%s\n%s\n" "${BASH_HEADER}" 'python -u /opt/deepvariant/bin/run_deepvariant.py "$@"' > /opt/deepvariant/bin/run_deepvariant \
&& printf "%s\n%s\n" "${BASH_HEADER}" 'python -u /opt/deepvariant/bin/vcf_stats_report.py "$@"' > /opt/deepvariant/bin/vcf_stats_report \
&& chmod +x /opt/deepvariant/bin/make_examples /opt/deepvariant/bin/call_variants /opt/deepvariant/bin/postprocess_variants /opt/deepvariant/bin/model_train /opt/deepvariant/bin/model_eval /opt/deepvariant/bin/run_deepvariant \
&& mkdir /opt/bin \
&& cd /opt/bin \
&& wget https://github.com/dnanexus-rnd/GLnexus/releases/download/v1.2.1/glnexus_cli \
&& wget https://raw.githubusercontent.com/dnanexus-rnd/GLnexus/v1.2.1/LICENSE -O glnexus_cli.LICENSE \
&& chmod +rx /opt/bin/glnexus_cli \
&& mkdir /opt/models \
&& mkdir /opt/models/wgs \
&& cd /opt/models/wgs \
&& wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wgs_standard/model.ckpt.data-00000-of-00001 \
&& wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wgs_standard/model.ckpt.index \
&& wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wgs_standard/model.ckpt.meta \
&& chmod +r /opt/models/wgs/model.ckpt* \
&& mkdir /opt/models/wes \
&& cd /opt/models/wes \
&& wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wes_standard/model.ckpt.data-00000-of-00001 \
&& wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wes_standard/model.ckpt.index \
&& wget https://storage.googleapis.com/deepvariant/models/DeepVariant/0.9.0/DeepVariant-inception_v3-0.9.0+data-wes_standard/model.ckpt.meta \
&& chmod +r /opt/models/wes/model.ckpt* \
&& apt-get -y update \
&& apt-get install -y parallel \
&& /bin/bash -c "source /opt/deepvariant/deepvariant_env/bin/activate && python -m pip install pip==9.0.3 && pip install absl-py==0.7.1"
RUN cd /opt/biotools \
&& git clone https://gitlab.mbb.univ-montp2.fr/mmassaviol/mbb_mqc_plugin.git \
&& cd mbb_mqc_plugin \
&& python3 setup.py install
RUN cd /opt/biotools/bin \
&& wget https://github.com/ekg/freebayes/releases/download/v1.3.1/freebayes-v1.3.1 -O freebayes \
&& chmod +x freebayes
RUN apt-get install -y vcftools
RUN Rscript -e 'install.packages("UpSetR",Ncpus=8, clean=TRUE);library("UpSetR")'
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
#This part is necessary to run on ISEM cluster
RUN mkdir -p /share/apps/bin \
&& mkdir -p /share/apps/lib \
&& mkdir -p /share/apps/gridengine \
&& mkdir -p /share/bio \
&& mkdir -p /opt/gridengine \
&& mkdir -p /export/scrach \
&& mkdir -p /usr/lib64 \
&& ln -s /bin/bash /bin/mbb_bash \
&& ln -s /bin/bash /bin/isem_bash \
&& /usr/sbin/groupadd --system --gid 400 sge \
&& /usr/sbin/useradd --system --uid 400 --gid 400 -c GridEngine --shell /bin/true --home /opt/gridengine sge
EXPOSE 3838
CMD ["Rscript", "-e", "setwd('/sagApp/'); shiny::runApp('/sagApp/app.R',port=3838 , host='0.0.0.0')"]
FROM alltools
COPY files /workflow
COPY sagApp /sagApp
#!/bin/bash
#This script will help to run a workflow in a docker image.
if [ $# -lt 4 ]
then
echo usage : $0 dataDir resultsDir configFile nbCores '[dockerHub|local]'
exit
fi
# Docker volumes
# MBB Workflows reads data from /Data and write results to /Results
Data=$1
Results=$2
if [ ! -d "$Data" ]; then
echo "can't find $Data directory !"
exit;
fi
mkdir -p $Results
DOCK_VOL+=" --mount type=bind,src=$Data,dst=/Data"
DOCK_VOL+=" --mount type=bind,src=$Results,dst=/Results"
# config file must be in /Data or /Results !
config=$3
cores=$4
if [ $# -lt 5 ]
then
APP_IMG="mbbteam/variantcalling_benchmark_frombam:latest"
else
IMG_SRC=$5
case $IMG_SRC in
dockerHub )
APP_IMG="mbbteam/variantcalling_benchmark_frombam:latest" ;;
local)
docker build . -t variantcalling_benchmark_frombam:latest
APP_IMG="variantcalling_benchmark_frombam:latest" ;;
mbb)
#APP_IMG="X.X.X.X:5000/variantcalling_benchmark_frombam:latest" ;;
esac
fi
docker run --rm $DOCK_VOL --cidfile="CID.txt" $APP_IMG snakemake -s /workflow/Snakefile all --configfile $config --cores $cores
CONTAINER_ID=$(cat CID.txt)
if [ $CONTAINER_ID ]
then
echo " "
echo Results were written to : $2
echo " "
else
echo Failed to run the docker container !!
fi
#!/bin/bash
#This script will help a deployment of a docker image on an MBB bigmem machine
if [ $# -lt 2 ]
then
echo usage : $0 dataDir resultsDir '[dockerHub|local]'
exit
fi
#nginx
##### nginx install #####
#sudo apt-get install -y nginx
# HOST_NAME="192.168.100.49"
# HTTP_ENDP="https://$HOST_NAME"
# openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout /etc/ssl/private/nginx-selfsigned.key -out /etc/ssl/certs/nginx-selfsigned.crt -subj "/C=FR/ST=LR/L=Montpellier/O=CNRS/OU=CNRS-ISEM/CN=mbb.univ-montp2.fr"
# openssl dhparam -out /etc/ssl/certs/dhparam.pem 2048
# mkdir -p /etc/nginx/snippets
# echo "ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;" > /etc/nginx/snippets/self-signed.conf
# echo "ssl_certificate_key /etc/ssl/private/nginx-selfsigned.key;" >> /etc/nginx/snippets/self-signed.conf
# cp system/nginx_snippets_ssl-params.conf /etc/nginx/snippets/ssl-params.conf
# cp /etc/nginx/sites-available/default /etc/nginx/sites-available/default.bak
# cp system/nginx_sites-available_default /etc/nginx/sites-available/default
# sed -i "s|server_domain_or_IP|$HOST_NAME|" /etc/nginx/sites-available/default
# useradd nginx
# cp system/nginx_nginx.conf /etc/nginx/nginx.conf
# cp system/nginx_conf.d_10-rstudio.conf /etc/nginx/conf.d/10-rstudio.conf
# sed -i "s|example.com|$HOST_NAME|" /etc/nginx/conf.d/10-rstudio.conf
# systemctl restart nginx
# systemctl enable nginx
#essayer une plage de ports entre 8787 et 8800
#APP_PORT=$2
APP_PORT=8787
while [[ $(ss -tulw | grep $APP_PORT) != "" && $APP_PORT < 8800 ]]
do
APP_PORT=$(( $APP_PORT + 1))
done
if [[ $(ss -tulw | grep $APP_PORT) != "" ]]
then
echo "No tcp port available !!"
exit -1
fi
# Docker volumes
# MBB Workflows reads data from /Data and write results to /Results
if [ $SUDO_USER ]; then realUSER=$SUDO_USER; else realUSER=`whoami`; fi
Data=$1
Results=$2
mkdir -p $Data
mkdir -p $Results
DOCK_VOL+=" --mount type=bind,src=$Data,dst=/Data"
DOCK_VOL+=" --mount type=bind,src=$Results,dst=/Results"
if [ $# -lt 3 ]
then
APP_IMG="mbbteam/variantcalling_benchmark_frombam:latest"
else
IMG_SRC=$3
case $IMG_SRC in
dockerHub )
APP_IMG="mbbteam/variantcalling_benchmark_frombam:latest" ;;
local)
docker build . -t variantcalling_benchmark_frombam:latest
APP_IMG="variantcalling_benchmark_frombam:latest" ;;
mbb)
#APP_IMG="X.X.X.X:5000/variantcalling_benchmark_frombam:latest" ;;
esac
fi
CONTAINER_ID=$( docker run --rm -d -p $APP_PORT:3838 $DOCK_VOL $APP_IMG )
if [ $CONTAINER_ID ]
then
echo " "
echo You have to put your Data on : $1
echo " "
echo Results will be written to : $2
echo " "
hostname -I | grep -E -o "162.38.181.[0-9]{1,3}" | awk -v port=$APP_PORT '{print "You can access the workflow interface at : http://"$1":"port}'
echo " "
echo To start a Bash session inside the container : docker exec -it $CONTAINER_ID /bin/bash
else
echo Failed to run the docker container !!
fi
#!/bin/bash
# This script is executed on the virtual machine during the *Deployment* phase.
# It is used to apply parameters specific to the current deployment.
# It is executed secondly during a cloud deployement in IFB-Biosphere, after the *Installation* phase.
if [ $# -lt 1 ]
then
APP_IMG="mbbteam/variantcalling_benchmark_frombam:latest"
else
IMG_SRC=$1
case $IMG_SRC in
ifb)
APP_IMG="gitlab-registry.in2p3.fr/ifb-biosphere/apps/variantcalling_benchmark_frombam:master" ;;
docker )
APP_IMG="mbbteam/variantcalling_benchmark_frombam:latest" ;;
local)
docker build . -t variantcalling_benchmark_frombam:latest
APP_IMG="variantcalling_benchmark_frombam:latest" ;;
mbb)
#APP_IMG="X.X.X.X:5000/variantcalling_benchmark_frombam:latest" ;;
esac
fi
# Tuning if site proxy or not
#CLOUD_SERVICE = $(ss-get cloudservice)
#CLOUD_SERVICE="ifb-genouest-genostack"
#HOST_NAME=$( ss-get --timeout=3 hostname )
HOST_NAME="192.168.100.49"
#if [ "$CLOUD_SERVICE" == "ifb-genouest-genostack" ]; then
# Cloud site WITH a site proxy
# APP_PORT=80
# PROXIED_IP=$( echo $HOST_NAME | sed "s|\.|-|g")
# HOST_NAME="openstack-${PROXIED_IP}.genouest.org"
# HTTP_ENDP="https://$HOST_NAME"
# systemctl stop nginx
#else
# Cloud site WOUT a site proxy
APP_PORT=8787
HTTP_ENDP="https://$HOST_NAME"
openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout /etc/ssl/private/nginx-selfsigned.key -out /etc/ssl/certs/nginx-selfsigned.crt -subj "/C=FR/ST=AURA/L=Lyon/O=IFB/OU=IFB-biosphere/CN=myrstudio.biosphere.france-bioinformatique.fr"
openssl dhparam -out /etc/ssl/certs/dhparam.pem 2048
mkdir -p /etc/nginx/snippets
echo "ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;" > /etc/nginx/snippets/self-signed.conf
echo "ssl_certificate_key /etc/ssl/private/nginx-selfsigned.key;" >> /etc/nginx/snippets/self-signed.conf
cp system/nginx_snippets_ssl-params.conf /etc/nginx/snippets/ssl-params.conf
cp /etc/nginx/sites-available/default /etc/nginx/sites-available/default.bak
cp system/nginx_sites-available_default /etc/nginx/sites-available/default
sed -i "s|server_domain_or_IP|$HOST_NAME|" /etc/nginx/sites-available/default
useradd nginx
cp system/nginx_nginx.conf /etc/nginx/nginx.conf
cp system/nginx_conf.d_10-rstudio.conf /etc/nginx/conf.d/10-rstudio.conf
sed -i "s|example.com|$HOST_NAME|" /etc/nginx/conf.d/10-rstudio.conf
systemctl restart nginx
systemctl enable nginx
#fi
# Docker volumes
# mydatalocal: from the system disk or ephemeral one
IFB_DATADIR="/ifb/data/"
source /etc/profile.d/ifb.sh
VOL_NAME="mydatalocal"
VOL_DEV=$(readlink -f -n $IFB_DATADIR/$VOL_NAME )
DOCK_VOL=" --mount type=bind,src=$VOL_DEV,dst=$IFB_DATADIR/$VOL_NAME"
# MBB Workflows reads data from /Data and write results to /Results
mkdir ${VOL_DEV}/Data
mkdir ${VOL_DEV}/Results
DOCK_VOL+=" --mount type=bind,src=$VOL_DEV/Data,dst=/Data"
DOCK_VOL+=" --mount type=bind,src=$VOL_DEV/Results,dst=/Results"
# NFS mounts: from ifb_share configuration in autofs
IFS_ORI=$IFS
while IFS=" :" read VOL_NAME VOL_TYPE VOL_IP VOL_DEV ; do
DOCK_VOL+=" --mount type=volume,volume-driver=local,volume-opt=type=nfs,src=$VOL_NAME,dst=$IFB_DATADIR/$VOL_NAME,volume-opt=device=:$VOL_DEV,volume-opt=o=addr=$VOL_IP"
done < /etc/auto.ifb_share
IFS=$IFS_ORI
CONTAINER_ID=$( docker run -d -p $APP_PORT:3838 $DOCK_VOL $APP_IMG )
VM_IP=$(curl bot.whatismyipaddress.com)
if [ $CONTAINER_ID ]
then
echo " "
echo You have to put your Data on : ${VOL_DEV}/Data
echo " "
echo Results will be written to : ${VOL_DEV}/Results
echo " "
echo You can access the workflow interface at : https://${VM_IP}
echo " "
echo To start a Bash session inside the container : docker exec -it $CONTAINER_ID /bin/bash
echo " "
echo To run the workflow without the interface : docker exec -it $CONTAINER_ID snakemake -s /workflow/Snakefile all --configfile config --cores XX
echo " "
echo config est un fichier de configuration qui doit être dans un sous dossier de ${VOL_DEV}/Data ou ${VOL_DEV}/Results
echo " "
echo ex. si fichier dans ${VOL_DEV}/Data/run1/maconfig1.yml : docker exec -it $CONTAINER_ID snakemake -s /workflow/Snakefile all --configfile /Data/run1/maconfig1.yml --cores XX
echo " "
echo Vous pouvez utiliser l''interface graphique pour générer un fichier de configuration.
echo " "
echo XX étant le nombre de coeurs qui seront utilisés par le workflow.
else
echo Failed to run the docker container !!
fi
#!/bin/bash
#This script will help a deployment of a docker image
if [ $# -lt 2 ]
then
echo usage : $0 dataDir resultsDir '[dockerHub|local]'
exit
fi
#essayer une plage de ports entre 8787 et 8800
#APP_PORT=$2
APP_PORT=8787
while [[ $(ss -tulw | grep $APP_PORT) != "" && $APP_PORT < 8800 ]]
do
APP_PORT=$(( $APP_PORT + 1))
done
if [[ $(ss -tulw | grep $APP_PORT) != "" ]]
then
echo "No tcp port available !!"
exit -1
fi
# Docker volumes
# MBB Workflows reads data from /Data and write results to /Results
if [ $SUDO_USER ]; then realUSER=$SUDO_USER; else realUSER=`whoami`; fi
Data=$1
Results=$2
mkdir -p $Data
mkdir -p $Results
DOCK_VOL+=" --mount type=bind,src=$Data,dst=/Data"
DOCK_VOL+=" --mount type=bind,src=$Results,dst=/Results"
if [ $# -lt 3 ]
then
APP_IMG="mbbteam/variantcalling_benchmark_frombam:latest"
else
IMG_SRC=$3
case $IMG_SRC in
dockerHub )
APP_IMG="mbbteam/variantcalling_benchmark_frombam:latest" ;;
local)
docker build . -t variantcalling_benchmark_frombam:latest
APP_IMG="variantcalling_benchmark_frombam:latest" ;;
mbb)
#APP_IMG="X.X.X.X:5000/variantcalling_benchmark_frombam:latest" ;;
esac
fi
CONTAINER_ID=$( docker run --rm -d -p $APP_PORT:3838 $DOCK_VOL $APP_IMG )
if [ $CONTAINER_ID ]
then
echo " "
echo You have to put your Data on : $1
echo " "
echo Results will be written to : $2
echo " "
hostname -I | grep -E -o "162.38.181.[0-9]{1,3}" | awk -v port=$APP_PORT '{print "You can access the workflow interface at : http://"$1":"port}'
echo " "
echo To start a Bash session inside the container : docker exec -it $CONTAINER_ID /bin/bash
else
echo Failed to run the docker container !!
fi
This diff is collapsed.
base_tools:
snakemake:
- "K\xF6ster, Johannes and Rahmann, Sven. Snakemake - A scalable bioinformatics\
\ workflow engine. Bioinformatics 2012."
multiqc:
- "Philip Ewels, M\xE5ns Magnusson, Sverker Lundin, Max K\xE4ller, MultiQC: summarize\
\ analysis results for multiple tools and samples in a single report, Bioinformatics,\
\ Volume 32, Issue 19, 1 October 2016, Pages 3047\u20133048, https://doi.org/10.1093/bioinformatics/btw354"
shiny:
- 'Winston Chang, Joe Cheng, JJ Allaire, Yihui Xie and Jonathan McPherson (2019).
shiny: Web Application Framework for R. https://CRAN.R-project.org/package=shiny'
Picard_MarkDuplicates:
picard_tools:
- Picard Toolkit. 2019. Broad Institute, GitHub Repository. http://broadinstitute.github.io/picard/;
Broad Institute
samtools:
- "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor\
\ Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing\
\ Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume\
\ 25, Issue 16, 15 August 2009, Pages 2078\u20132079, https://doi.org/10.1093/bioinformatics/btp352"
gatk_IndelRealigner:
gatk:
- "McKenna, A., Hanna, M., Banks, E., Sivachenko, A., Cibulskis, K., Kernytsky,\
\ A., Garimella, K., Altshuler, D., Gabriel, S., Daly, M., & DePristo, M. A. (2010).\
\ The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation\
\ DNA sequencing data. Genome Research, 20(9), 1297\u20111303. https://doi.org/10.1101/gr.107524.110 "
gatk_haplotype_caller:
gatk:
- "McKenna, A., Hanna, M., Banks, E., Sivachenko, A., Cibulskis, K., Kernytsky,\
\ A., Garimella, K., Altshuler, D., Gabriel, S., Daly, M., & DePristo, M. A. (2010).\
\ The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation\
\ DNA sequencing data. Genome Research, 20(9), 1297\u20111303. https://doi.org/10.1101/gr.107524.110 "
bcftools:
- "Heng Li, A statistical framework for SNP calling, mutation discovery, association\
\ mapping and population genetical parameter estimation from sequencing data,\
\ Bioinformatics, Volume 27, Issue 21, 1 November 2011, Pages 2987\u20132993,\
\ https://doi.org/10.1093/bioinformatics/btr509"
samtools:
- "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor\
\ Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing\
\ Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume\
\ 25, Issue 16, 15 August 2009, Pages 2078\u20132079, https://doi.org/10.1093/bioinformatics/btp352"
bcftools_mpileup:
bcftools:
- "Heng Li, A statistical framework for SNP calling, mutation discovery, association\
\ mapping and population genetical parameter estimation from sequencing data,\
\ Bioinformatics, Volume 27, Issue 21, 1 November 2011, Pages 2987\u20132993,\
\ https://doi.org/10.1093/bioinformatics/btr509"
tabix:
- "Heng Li, Tabix: fast retrieval of sequence features from generic TAB-delimited\
\ files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718\u2013719,\
\ https://doi.org/10.1093/bioinformatics/btq671"
deep_variant:
deepvariant:
- "A universal SNP and small-indel variant caller using deep neural networks. Nature\
\ Biotechnology 36, 983\u2013987 (2018). Ryan Poplin, Pi-Chuan Chang, David Alexander,\
\ Scott Schwartz, Thomas Colthurst, Alexander Ku, Dan Newburger, Jojo Dijamco,\
\ Nam Nguyen, Pegah T. Afshar, Sam S. Gross, Lizzie Dorfman, Cory Y. McLean, Mark\
\ A. DePristo, doi: https://doi.org/10.1038/nbt.4235"
samtools:
- "Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor\
\ Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing\
\ Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume\
\ 25, Issue 16, 15 August 2009, Pages 2078\u20132079, https://doi.org/10.1093/bioinformatics/btp352"
freebayes:
freebayes:
- Garrison, E., & Marth, G. (2012). Haplotype-based variant detection from short-read
sequencing. arXiv preprint arXiv:1207.3907.
tabix:
- "Heng Li, Tabix: fast retrieval of sequence features from generic TAB-delimited\
\ files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718\u2013719,\
\ https://doi.org/10.1093/bioinformatics/btq671"
compare_vcfs_isec:
bcftools:
- "Heng Li, A statistical framework for SNP calling, mutation discovery, association\
\ mapping and population genetical parameter estimation from sequencing data,\
\ Bioinformatics, Volume 27, Issue 21, 1 November 2011, Pages 2987\u20132993,\
\ https://doi.org/10.1093/bioinformatics/btr509"
vcftools:
- The Variant Call Format and VCFtools, Petr Danecek, Adam Auton, Goncalo Abecasis,
Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter,
Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes
Project Analysis Group, Bioinformatics, 2011 http://dx.doi.org/10.1093/bioinformatics/btr330
upsetr:
- "Alexander Lex, Nils Gehlenborg, Hendrik Strobelt, Romain Vuillemot, Hanspeter\
\ Pfister, UpSet: Visualization of Intersecting Sets, IEEE Transactions on Visualization\
\ and Computer Graphics (InfoVis '14), vol. 20, no. 12, pp. 1983\u20131992, 2014.doi:\
\ https://doi.org/10.1109/TVCG.2014.2346248"
vcf_plot_dv:
deepvariant:
- "A universal SNP and small-indel variant caller using deep neural networks. Nature\
\ Biotechnology 36, 983\u2013987 (2018). Ryan Poplin, Pi-Chuan Chang, David Alexander,\
\ Scott Schwartz, Thomas Colthurst, Alexander Ku, Dan Newburger, Jojo Dijamco,\
\ Nam Nguyen, Pegah T. Afshar, Sam S. Gross, Lizzie Dorfman, Cory Y. McLean, Mark\
\ A. DePristo, doi: https://doi.org/10.1038/nbt.4235"
import re
import sys
from tools import read_yaml