Commit c6dc214c authored by abensaid's avatar abensaid
Browse files

Merge branch 'master' into squeezemeta

parents 4a706151 ff3a400d
FROM rocker/r-ver:3.5.3
ENV PATH /opt/biotools/bin:$PATH FROM ubuntu:20.04
ENV ROOTSYS /opt/biotools/root
ENV LD_LIBRARY_PATH '$LD_LIBRARY_PATH:$ROOTSYS/lib' MAINTAINER Khalid BELKHIR <khalid.belkhir@umontpellier.fr>
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y locales
RUN apt-get update
RUN apt-get install -yq tzdata
RUN apt-get install -y locales
RUN locale-gen "en_US.UTF-8"
RUN export LC_ALL=en_US.UTF-8
RUN export LANG=en_US.UTF-8
# Install apache, PHP 7, and supplimentary programs. openssh-server, curl, and lynx-cur are for debugging the container.
RUN apt-get update && \
apt-get -y install \
apache2 \
php \
php-cli \
libapache2-mod-php \
php-gd \
php-curl \
php-json \
php-mbstring \
php-mysql \
php-xml \
php-yaml \
php-xsl \
php-zip \
sqlite3 \
php-sqlite3 \
git
RUN apt-get update
RUN apt-get install -y curl wget apt-utils RUN apt-get install -y curl wget apt-utils
RUN apt-get install -y gcc fort77 aptitude RUN apt-get install -y gcc fort77 aptitude
RUN aptitude install -y g++ xorg-dev libreadline-dev gfortran RUN aptitude install -y g++ xorg-dev libreadline-dev gfortran
RUN apt-get update
RUN apt-get install -y libssl-dev libxml2-dev libpcre3-dev liblzma-dev libbz2-dev libcurl4-openssl-dev liblapack3 git nano graphviz python3 python3-pip RUN apt-get install -y libssl-dev libxml2-dev libpcre3-dev liblzma-dev libbz2-dev libcurl4-openssl-dev liblapack3 git nano graphviz python3 python3-pip
RUN apt-get install -y autotools-dev automake cmake grep sed dpkg fuse zip build-essential pkg-config bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion zlib1g-dev libncurses5-dev libncursesw5-dev RUN apt-get install -y autotools-dev automake cmake grep sed dpkg fuse zip build-essential pkg-config bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion zlib1g-dev libncurses5-dev libncursesw5-dev
RUN apt-get clean RUN apt-get clean
RUN pip3 install cerberus oyaml
# # Enable apache mods.
RUN a2enmod php7.4
RUN a2enmod rewrite
# # Update the PHP.ini file, enable <? ?> tags and quieten logging.
RUN sed -i "s/short_open_tag = Off/short_open_tag = On/" /etc/php/7.4/apache2/php.ini
RUN sed -i "s/error_reporting = .*$/error_reporting = E_ERROR | E_WARNING | E_PARSE/" /etc/php/7.4/apache2/php.ini
# Manually set up the apache environment variables
ENV APACHE_RUN_USER www-data
ENV APACHE_RUN_GROUP www-data
ENV APACHE_LOG_DIR /var/log/apache2
ENV APACHE_LOCK_DIR /var/lock/apache2
ENV APACHE_PID_FILE /var/run/apache2.pid
RUN locale-gen "fr_FR.UTF-8"
ENV LC_ALL fr_FR.UTF-8
ENV LANG fr_FR.UTF-8
RUN apt-get update
#install R version 4.0
#RUN apt-get install -y r-base r-base-dev
# update indices
RUN apt-get update -qq
# install two helper packages we need
RUN apt-get install -y --no-install-recommends software-properties-common dirmngr
# add the signing key (by Michael Rutter) for these repos
# To verify key, run gpg --show-keys /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
# Fingerprint: 298A3A825C0D65DFD57CBB651716619E084DAB9
RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
# add the R 4.0 repo from CRAN -- adjust 'focal' to 'groovy' or 'bionic' as needed
RUN add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
RUN apt update
RUN apt-get install -y r-base r-base-dev
#installer le package yaml
RUN R --slave -e "install.packages( 'yaml', repos='https://cloud.r-project.org')"
RUN if [ ! -d "/opt/biotools" ];then mkdir /opt/biotools; fi RUN if [ ! -d "/opt/biotools" ];then mkdir /opt/biotools; fi
RUN if [ ! -d "/opt/biotools/bin" ];then mkdir /opt/biotools/bin; fi RUN if [ ! -d "/opt/biotools/bin" ];then mkdir /opt/biotools/bin; fi
RUN chmod 777 -R /opt/biotools/ RUN chmod 777 -R /opt/biotools/
ENV PATH /opt/biotools/bin:$PATH ENV PATH /opt/biotools/bin:$PATH
ENV ROOTSYS /opt/biotools/root
ENV LD_LIBRARY_PATH '$LD_LIBRARY_PATH:$ROOTSYS/lib'
RUN pip3 install snakemake==5.9.1 oyaml RUN pip3 install snakemake==5.9.1 oyaml
RUN pip3 install multiqc==1.9 RUN pip3 install multiqc==1.9
RUN pip3 install biopython==1.76
RUN apt-get install yasm
#RUN pip3 install cutadapt==2.3
RUN pip3 install Cython
RUN python3 -m pip install --user --upgrade cutadapt
# Make table cells adapt to content (height and width) # Make table cells adapt to content (height and width)
ARG GET_PYTHON_VERSION="python3 --version | cut -d ' ' -f2 | cut -d'.' -f1,2 "
ARG GET_PYTHON_VERSION="python3 --version | cut -d ' ' -f2 | cut -f1,2 -d'.'"
RUN echo "Detected Python $(eval ${GET_PYTHON_VERSION})" RUN echo "Detected Python $(eval ${GET_PYTHON_VERSION})"
RUN sed -i "739s/absolute/relative/g" /usr/local/lib/python${GET_PYTHON_VERSION}/dist-packages/multiqc/templates/default/assets/css/default_multiqc.css RUN sed -i "739s/absolute/relative/g" /usr/local/lib/python$(eval ${GET_PYTHON_VERSION})/dist-packages/multiqc/templates/default/assets/css/default_multiqc.css
RUN Rscript -e 'install.packages("yaml",Ncpus=8,repos="https://cloud.r-project.org/");library("yaml")' RUN Rscript -e 'install.packages("yaml",Ncpus=8,repos="https://cloud.r-project.org/");library("yaml")'
RUN Rscript -e 'install.packages("DT",Ncpus=8,repos="https://cloud.r-project.org/");library("DT")' RUN Rscript -e 'install.packages("DT",Ncpus=8,repos="https://cloud.r-project.org/");library("DT")'
...@@ -41,4 +110,3 @@ RUN Rscript -e 'install.packages("shinyjs",Ncpus=8,repos="https://cloud.r-projec ...@@ -41,4 +110,3 @@ RUN Rscript -e 'install.packages("shinyjs",Ncpus=8,repos="https://cloud.r-projec
RUN Rscript -e 'install.packages("shinyFiles",Ncpus=8,repos="https://cloud.r-project.org/");library("shinyFiles")' RUN Rscript -e 'install.packages("shinyFiles",Ncpus=8,repos="https://cloud.r-project.org/");library("shinyFiles")'
RUN Rscript -e 'install.packages("BiocManager",Ncpus=8,repos="https://cloud.r-project.org/");library("BiocManager")' RUN Rscript -e 'install.packages("BiocManager",Ncpus=8,repos="https://cloud.r-project.org/");library("BiocManager")'
RUN pip3 install cerberus
...@@ -11,6 +11,11 @@ Ce workflow peut être lancé de manière autonome sur : ...@@ -11,6 +11,11 @@ Ce workflow peut être lancé de manière autonome sur :
* Remarquer la présence des fichiers suivants : * Remarquer la présence des fichiers suivants :
* Dockerfile : fichier de recettes pour la construction de l'image Docker
* Singularity.def : fichier de recettes pour la construction de l'image Singularity (nécessite les droits root)
* install.sh permet d'installer les logiciels pré-requis (à faire une seule fois si nécessaire!) * install.sh permet d'installer les logiciels pré-requis (à faire une seule fois si nécessaire!)
* deployBigMem.sh : permet de déployer un conteneur en mode web sur une machine de type bigmem * deployBigMem.sh : permet de déployer un conteneur en mode web sur une machine de type bigmem
...@@ -19,7 +24,7 @@ Ce workflow peut être lancé de manière autonome sur : ...@@ -19,7 +24,7 @@ Ce workflow peut être lancé de manière autonome sur :
* deployLocalHost.sh : permet de deployer sur votre machine * deployLocalHost.sh : permet de deployer sur votre machine
* waw_workflow.qsub : script de soumission d'un workflow sur le cluster MBB * waw_workflow.qsub : script de soumission d'un workflow sur le cluster MBB (nécessite une la création de l'image singularity au préalable)
* RunCmdLine.sh : permet de déployer et executer un workflow en ligne de commande (Nous verrons cela dans une prochaine partie) * RunCmdLine.sh : permet de déployer et executer un workflow en ligne de commande (Nous verrons cela dans une prochaine partie)
...@@ -75,12 +80,12 @@ Ce workflow peut être lancé de manière autonome sur : ...@@ -75,12 +80,12 @@ Ce workflow peut être lancé de manière autonome sur :
***deployLocalHost.sh /home/$USER/datasets/rnaseq/ /home/$USER/result1 local*** ***deployLocalHost.sh /home/$USER/datasets/rnaseq/ /home/$USER/result1 local***
### B/ Changer la version dun outil ### B/ Changer la version d'un outil
Les procédures d'installation des différente outils nécessaires au bon fonctionnement du workflow sont rassemblées dans un fichier de recette nommé Dockerfile. Les procédures d'installation des différente outils nécessaires au bon fonctionnement du workflow sont rassemblées dans un fichier de recette nommé Dockerfile.
* Ouvrir ce fichier et repérer la partie concernant l'installation de kallisto * Ouvrir ce fichier et repérer la partie concernant l'installation de kallisto
* Liste des versions de kallisto : <https://github.com/pachterlab/kallisto/releases> * Liste des versions de kallisto : <https://github.com/pachterlab/kallisto/releases>
* Modifier le n° de version pour une version de kallisto plus récente * Modifier le numéro de version pour une version de kallisto plus récente
* Relancer le conteneur avec l'option 'local' pour reconstruire l'image avec vos modifs * Relancer le conteneur avec l'option 'local' pour reconstruire l'image avec vos modifs
***deployLocalHost.sh /home/$USER/datasets/rnaseq/ /home/$USER/result1 local*** ***deployLocalHost.sh /home/$USER/datasets/rnaseq/ /home/$USER/result1 local***
...@@ -112,7 +117,7 @@ Ce fichier peur être : ...@@ -112,7 +117,7 @@ Ce fichier peur être :
* Enregistrer vos modifs dans maconfig.yaml dans par ex. /home/$USER/results1/version2/ et sera visible dans le conteneur sous /Result/maconfig.yaml * Enregistrer vos modifs dans maconfig.yaml dans par ex. /home/$USER/results1/version2/ et sera visible dans le conteneur sous /Result/maconfig.yaml
* lancer depuis une console la ligne de commande (ici la paramètre 10 pour utiliser 10 coeurs) : * lancer depuis une console la ligne de commande (ici le paramètre 10 pour utiliser 10 coeurs) :
***bash RunCmdLine.sh /home/$USER/datasets/rnaseq/ /home/$USER/results1/version2/ /Results/maconfig.yaml 10*** ***bash RunCmdLine.sh /home/$USER/datasets/rnaseq/ /home/$USER/results1/version2/ /Results/maconfig.yaml 10***
...@@ -126,7 +131,7 @@ Ce fichier peur être : ...@@ -126,7 +131,7 @@ Ce fichier peur être :
ex 1 deploiement : ***bash deployBigMem.sh /home/votrelogin/data1/ /home/votrelogin/results1/*** ex 1 deploiement : ***bash deployBigMem.sh /home/votrelogin/data1/ /home/votrelogin/results1/***
A lintérieur du conteneur : A l'intérieur du conteneur :
* /home/votrelogin/data1/ -> /Data/ * /home/votrelogin/data1/ -> /Data/
* /home/votrelogin/results1/ -> /Results/ * /home/votrelogin/results1/ -> /Results/
......
...@@ -6,4 +6,27 @@ sudo apt-get update ...@@ -6,4 +6,27 @@ sudo apt-get update
sudo apt-get install -y docker.io sudo apt-get install -y docker.io
##### nginx install ##### ##### nginx install #####
sudo apt-get install -y nginx sudo apt-get install -y nginx
\ No newline at end of file
###### Singularity install #######
sudo apt-get install -y build-essential libssl-dev uuid-dev libgpgme11-dev \
squashfs-tools libseccomp-dev wget pkg-config git cryptsetup debootstrap
#Install Go https://golang.org/ and extract to /usr/local/go
wget https://dl.google.com/go/go1.13.linux-amd64.tar.gz
sudo tar --directory=/usr/local -xzvf go1.13.linux-amd64.tar.gz
export PATH=/usr/local/go/bin:$PATH
#Change version if needed
wget https://github.com/singularityware/singularity/releases/download/v3.8.3/singularity-3.8.3.tar.gz
tar -xzvf singularity-3.8.3.tar.gz
cd singularity-3.8.3
./mconfig
cd builddir
make
sudo make install
#You can test your installation
singularity run library://godlovedc/funny/lolcow
\ No newline at end of file
#!/bin/bash
#$ -S /bin/bash
# Job name
#$ -N WAW_worflow
# Using current working directory (otherwise, you will have to use '#$ wd /path/to/run')
#$ -cwd
# job time limits (h_rt is required [s_rt == software time limit / h_rt == hardware time limit])
#$ -l h_rt=48:00:00
# Redirects the standard output to the named file.
#$ -o Results/waw.out
#$ -e Results/waw.err
module load singularity-3.1
dataDir=$1
resultsDir=$2
# Config file in a location binded to /Data or /Results
#ex. file in /home/khalid/dataanalyse/config.yaml
#/home/khalid/dataanalyse/ is dataDir that will be binded to /Data in the container
#configfile must be set to /Data/config.yaml
configFile=$3
cores=$4
singularityImage=$5
# If needed build singularity image
# sudo singularity build --sandbox waw-workflow Singularity.def
#Run the workflow in cmd line
singularity exec -B $dataDir:/Data -B $resultsDir:/Results singularityImage snakemake -s /workflow/Snakefile --configfile $configFile --cores $cores
...@@ -83,7 +83,7 @@ def report_section_order(): ...@@ -83,7 +83,7 @@ def report_section_order():
def main(): def main():
res = "skip_generalstats: true\n\n" res = "skip_generalstats: true\n\n"
res += "report_comment: '" + config["params"]["memo"] +"'\n\n" res += "report_comment: '" + str(config["params"]["memo"]) +"'\n\n"
res += "custom_logo: /workflow/mbb.png\n\n" res += "custom_logo: /workflow/mbb.png\n\n"
res += "custom_logo_url: https://mbb.univ-montp2.fr\n\n" res += "custom_logo_url: https://mbb.univ-montp2.fr\n\n"
res += "custom_logo_title: MBB platform\n\n" res += "custom_logo_title: MBB platform\n\n"
......
...@@ -10,6 +10,10 @@ import re ...@@ -10,6 +10,10 @@ import re
import subprocess import subprocess
from tools import * from tools import *
#spython must be installed: pip3 install spython
from spython.main.parse.writers import get_writer
from spython.main.parse.parsers import get_parser
WORKFLOW_YAML = collections.OrderedDict() WORKFLOW_YAML = collections.OrderedDict()
TOOLS_YAML = collections.OrderedDict() TOOLS_YAML = collections.OrderedDict()
...@@ -399,6 +403,16 @@ def generate_pipeline_files(workflow_yaml, output_dir, waw_dir, local_config="de ...@@ -399,6 +403,16 @@ def generate_pipeline_files(workflow_yaml, output_dir, waw_dir, local_config="de
with open(output_dir + "/Dockerfile", "w") as out: with open(output_dir + "/Dockerfile", "w") as out:
out.write(generate_dockerfile(output_dir, local_config, waw_dir)) out.write(generate_dockerfile(output_dir, local_config, waw_dir))
DockerParser = get_parser('docker')
SingularityWriter = get_writer('singularity')
parser = DockerParser(output_dir + "/Dockerfile")
writer = SingularityWriter(parser.recipe)
result = writer.convert()
with open(output_dir + "/Singularity.def", 'w') as f:
print(result, file=f)
f.close()
write_yaml(output_dir + "/files/params.total.yml", generate_params_yaml(workflow_name, get_params_to_remove(), waw_dir)) write_yaml(output_dir + "/files/params.total.yml", generate_params_yaml(workflow_name, get_params_to_remove(), waw_dir))
###1 ###1
...@@ -429,6 +443,9 @@ def generate_pipeline_files(workflow_yaml, output_dir, waw_dir, local_config="de ...@@ -429,6 +443,9 @@ def generate_pipeline_files(workflow_yaml, output_dir, waw_dir, local_config="de
shutil.copy(os.path.join(waw_dir+"/generate_multiqc_config.py"), output_dir+ "/files") shutil.copy(os.path.join(waw_dir+"/generate_multiqc_config.py"), output_dir+ "/files")
shutil.copy(os.path.join(waw_dir+"/tools.py"), output_dir + "/files") shutil.copy(os.path.join(waw_dir+"/tools.py"), output_dir + "/files")
shutil.copy(os.path.join(waw_dir+"/mbb.png"), output_dir + "/files") shutil.copy(os.path.join(waw_dir+"/mbb.png"), output_dir + "/files")
#save a copy of workflow json in files dir
os.system('cp '+ output_dir + '/*.json '+ output_dir + "/files/")
if "input" in WORKFLOW_YAML: if "input" in WORKFLOW_YAML:
for raw_input in WORKFLOW_YAML["input"]: for raw_input in WORKFLOW_YAML["input"]:
......
...@@ -38,10 +38,11 @@ def raw_bams(results_dir, sample_dir, STOP_CASES=dict()): ...@@ -38,10 +38,11 @@ def raw_bams(results_dir, sample_dir, STOP_CASES=dict()):
else: else:
exit("Files have different suffixes:" + ','.join(suffixes)) exit("Files have different suffixes:" + ','.join(suffixes))
with open(results_dir+"/samples.tsv","w") as sampleTab: if os.path.exists(results_dir):
sampleTab.write("sample\tbam_file") with open(results_dir+"/samples.tsv","w") as sampleTab:
for sample in sorted(samples): sampleTab.write("sample\tbam_file")
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample]))) for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples} out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples}
out ["bams"] = os.path.join(sample_dir,"{sample}"+suffix) out ["bams"] = os.path.join(sample_dir,"{sample}"+suffix)
......
...@@ -22,10 +22,11 @@ def raw_fasta(results_dir, sample_dir): ...@@ -22,10 +22,11 @@ def raw_fasta(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ): if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0] suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab: if os.path.exists(results_dir):
sampleTab.write("sample\tfasta_file") with open(results_dir+"/samples.tsv","w") as sampleTab:
for sample in sorted(samples): sampleTab.write("sample\tfasta_file")
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample]))) for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples} out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples}
......
...@@ -22,10 +22,11 @@ def raw_long_reads(results_dir, sample_dir): ...@@ -22,10 +22,11 @@ def raw_long_reads(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ): if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0] suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab: if os.path.exists(results_dir):
sampleTab.write("sample\tfile_reads") with open(results_dir+"/samples.tsv","w") as sampleTab:
for sample in sorted(samples): sampleTab.write("sample\tfile_reads")
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample]))) for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'suffix': suffix, 'dico': dicoSamples} out = {'samples': sorted(samples), 'suffix': suffix, 'dico': dicoSamples}
out ["read"] = os.path.join(sample_dir,"{sample}"+suffix) out ["read"] = os.path.join(sample_dir,"{sample}"+suffix)
......
...@@ -33,13 +33,14 @@ def raw_reads(results_dir, sample_dir, SeOrPe): ...@@ -33,13 +33,14 @@ def raw_reads(results_dir, sample_dir, SeOrPe):
if (len(set(suffixes)) == 1 ): if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0] suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab: if os.path.exists(results_dir):
if SeOrPe == "PE": with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tfile_read_1\tfile_read_2") if SeOrPe == "PE":
else: sampleTab.write("sample\tfile_read_1\tfile_read_2")
sampleTab.write("sample\tfile_read_1") else:
for sample in sorted(samples): sampleTab.write("sample\tfile_read_1")
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample]))) for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples, 'pe_mark': PE_mark} out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples, 'pe_mark': PE_mark}
......
...@@ -6,7 +6,7 @@ def raw_tsv(results_dir, input_dir): ...@@ -6,7 +6,7 @@ def raw_tsv(results_dir, input_dir):
samples = list() samples = list()
out = dict() out = dict()
out["tsv"] = input_dir out["tsv"] = input_dir
out["samples"] =""
#out["samples"] = samples #out["samples"] = samples
return(out) return(out)
......
...@@ -23,10 +23,11 @@ def raw_vcf(results_dir, sample_dir): ...@@ -23,10 +23,11 @@ def raw_vcf(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ): if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0] suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab: if os.path.exists(results_dir):
sampleTab.write("sample\tvcf_file") with open(results_dir+"/samples.tsv","w") as sampleTab:
for sample in sorted(samples): sampleTab.write("sample\tvcf_file")
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample]))) for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples} out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples}
......
...@@ -24,10 +24,11 @@ def raw_vcfsinDir(results_dir, sample_dir): ...@@ -24,10 +24,11 @@ def raw_vcfsinDir(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ): if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0] suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab: if os.path.exists(results_dir):
sampleTab.write("sample\tvcf_file") with open(results_dir+"/samples.tsv","w") as sampleTab:
for sample in sorted(samples): sampleTab.write("sample\tvcf_file")
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample]))) for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples, 'vcfsinDir': vcfsinDir} out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples, 'vcfsinDir': vcfsinDir}
......
...@@ -11,7 +11,7 @@ def <step_name>__db(): ...@@ -11,7 +11,7 @@ def <step_name>__db():
file = <step_name>__blastn_inputs()["blastdb"][0] file = <step_name>__blastn_inputs()["blastdb"][0]
db_name = os.path.splitext(os.path.basename(file))[0] db_name = os.path.splitext(os.path.basename(file))[0]
db_path = os.path.dirname(file) db_path = os.path.dirname(file)
return { "db_name": db_name, "db_path": db_path } return { "db_name": db_name, "db_path": db_path }
rule <step_name>__blastn: rule <step_name>__blastn:
...@@ -34,7 +34,7 @@ rule <step_name>__blastn: ...@@ -34,7 +34,7 @@ rule <step_name>__blastn:
config["results_dir"]+'/logs/' + config["<step_name>__blastn_output_dir"] + '/blastn_log.txt' config["results_dir"]+'/logs/' + config["<step_name>__blastn_output_dir"] + '/blastn_log.txt'
shell: shell:
# output header # output header
"echo -e 'qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tqlen' > {output.blastout}; " "echo -e 'qseqid\tsseqid\tpident\tlength\tmismach\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tqlen' > {output.blastout}; "
# command # command
"{params.command} " "{params.command} "
"-query {input.query} " "-query {input.query} "
......
...@@ -3,9 +3,9 @@ rule <step_name>__deseq2: ...@@ -3,9 +3,9 @@ rule <step_name>__deseq2:
**<step_name>__deseq2_inputs() **<step_name>__deseq2_inputs()
output: output:
de_table = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/de_table.csv', de_table = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/de_table.csv',
r_data = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/data.RData', raw_counts = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/raw_counts.tsv',
PCA = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/PCA_mqc.png', PCA = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/PCA_mqc.png',
Top_genes = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Top_genes_mqc.tsv', Top_genes = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Top_genes_mqc.yaml',
MA_plot = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/MA_plot_mqc.png', MA_plot = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/MA_plot_mqc.png',
Volcano_plot = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Volcano_plot_mqc.png', Volcano_plot = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Volcano_plot_mqc.png',
Heatmap = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Heatmap_mqc.png', Heatmap = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Heatmap_mqc.png',
...@@ -17,4 +17,4 @@ rule <step_name>__deseq2: ...@@ -17,4 +17,4 @@ rule <step_name>__deseq2:
outDir = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"], outDir = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"],
biomaRtDataset = config["<step_name>__deseq2_biomaRtDataset"], biomaRtDataset = config["<step_name>__deseq2_biomaRtDataset"],
script: script:
config["<step_name>__deseq2_script"] config["<step_name>__deseq2_script"]
\ No newline at end of file
...@@ -10,67 +10,89 @@ library(ggplot2) ...@@ -10,67 +10,89 @@ library(ggplot2)
popmapData = read.table(snakemake@config[["popmap_file"]],stringsAsFactors=F) popmapData = read.table(snakemake@config[["popmap_file"]],stringsAsFactors=F)
colnames(popmapData)=c("samples","condition") colnames(popmapData)=c("samples","condition")
samples = popmapData$samples popmapData[,"condition"] = as.factor(popmapData[,"condition"])
conditions = popmapData$condition
#samples = c("DRR016125","DRR016126","DRR016127","DRR016128","DRR016129","DRR016130","DRR016131","DRR016132") #samples = c("DRR016125","DRR016126","DRR016127","DRR016128","DRR016129","DRR016130","DRR016131","DRR016132")
#files = file.path("/home/mbb/Documents/results2/kallisto/quant",samples,"abundance.h5") #files = file.path("/home/mbb/Documents/results2/kallisto/quant",samples,"abundance.h5")
if ("quantification" %in% names(snakemake@config) ) if ("quantification" %in% names(snakemake@config) )
{ {
files = sort(snakemake@input[["counts"]])
#sort popmpData based on samples names and 'hope' that sorting files will give the wright mapping !!!
popmapData = popmapData[order(popmapData$samples), ]
samples = popmapData$samples
conditions = popmapData$condition
# !!!!! here we assume that files are in the same order as in the popmap file !!!!!
if(snakemake@config[["quantification"]] == "htseq_count") if(snakemake@config[["quantification"]] == "htseq_count")
{ {
print("Quantification was done with htseq-count !") print("Quantification was done with htseq-count !")
files = sort(snakemake@input[["counts"]])
names(files) = samples names(files) = samples
sampleTable <- data.frame( sampleTable <- data.frame(
sampleName = samples, sampleName = samples,
fileName = files, fileName = files,
condition = conditions condition = conditions
) )
# create the DESeq data object # create the DESeq data object
dds <- DESeqDataSetFromHTSeqCount( dds <- DESeqDataSetFromHTSeqCount(
sampleTable = sampleTable, sampleTable = sampleTable,
directory = "", directory = "",
design = ~ condition design = ~ condition
) )
} else{ # counts where generated with one of these software "salmon", "sailfish", "alevin", "kallisto", "rsem", "stringtie" } else { # counts where generated with one of these software "salmon", "sailfish", "alevin", "kallisto", "rsem", "stringtie"
if (snakemake@params[["tx2gene"]] == TRUE){ if (snakemake@params[["tx2gene"]] == TRUE){
# conversion transcrits vers gènes