Commit c6dc214c authored by abensaid's avatar abensaid
Browse files

Merge branch 'master' into squeezemeta

parents 4a706151 ff3a400d
FROM rocker/r-ver:3.5.3
ENV PATH /opt/biotools/bin:$PATH
ENV ROOTSYS /opt/biotools/root
ENV LD_LIBRARY_PATH '$LD_LIBRARY_PATH:$ROOTSYS/lib'
FROM ubuntu:20.04
MAINTAINER Khalid BELKHIR <khalid.belkhir@umontpellier.fr>
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y locales
RUN apt-get update
RUN apt-get install -yq tzdata
RUN apt-get install -y locales
RUN locale-gen "en_US.UTF-8"
RUN export LC_ALL=en_US.UTF-8
RUN export LANG=en_US.UTF-8
# Install apache, PHP 7, and supplimentary programs. openssh-server, curl, and lynx-cur are for debugging the container.
RUN apt-get update && \
apt-get -y install \
apache2 \
php \
php-cli \
libapache2-mod-php \
php-gd \
php-curl \
php-json \
php-mbstring \
php-mysql \
php-xml \
php-yaml \
php-xsl \
php-zip \
sqlite3 \
php-sqlite3 \
git
RUN apt-get update
RUN apt-get install -y curl wget apt-utils
RUN apt-get install -y gcc fort77 aptitude
RUN aptitude install -y g++ xorg-dev libreadline-dev gfortran
RUN apt-get update
RUN apt-get install -y libssl-dev libxml2-dev libpcre3-dev liblzma-dev libbz2-dev libcurl4-openssl-dev liblapack3 git nano graphviz python3 python3-pip
RUN apt-get install -y autotools-dev automake cmake grep sed dpkg fuse zip build-essential pkg-config bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion zlib1g-dev libncurses5-dev libncursesw5-dev
RUN apt-get clean
RUN pip3 install cerberus oyaml
# # Enable apache mods.
RUN a2enmod php7.4
RUN a2enmod rewrite
# # Update the PHP.ini file, enable <? ?> tags and quieten logging.
RUN sed -i "s/short_open_tag = Off/short_open_tag = On/" /etc/php/7.4/apache2/php.ini
RUN sed -i "s/error_reporting = .*$/error_reporting = E_ERROR | E_WARNING | E_PARSE/" /etc/php/7.4/apache2/php.ini
# Manually set up the apache environment variables
ENV APACHE_RUN_USER www-data
ENV APACHE_RUN_GROUP www-data
ENV APACHE_LOG_DIR /var/log/apache2
ENV APACHE_LOCK_DIR /var/lock/apache2
ENV APACHE_PID_FILE /var/run/apache2.pid
RUN locale-gen "fr_FR.UTF-8"
ENV LC_ALL fr_FR.UTF-8
ENV LANG fr_FR.UTF-8
RUN apt-get update
#install R version 4.0
#RUN apt-get install -y r-base r-base-dev
# update indices
RUN apt-get update -qq
# install two helper packages we need
RUN apt-get install -y --no-install-recommends software-properties-common dirmngr
# add the signing key (by Michael Rutter) for these repos
# To verify key, run gpg --show-keys /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
# Fingerprint: 298A3A825C0D65DFD57CBB651716619E084DAB9
RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
# add the R 4.0 repo from CRAN -- adjust 'focal' to 'groovy' or 'bionic' as needed
RUN add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
RUN apt update
RUN apt-get install -y r-base r-base-dev
#installer le package yaml
RUN R --slave -e "install.packages( 'yaml', repos='https://cloud.r-project.org')"
RUN if [ ! -d "/opt/biotools" ];then mkdir /opt/biotools; fi
RUN if [ ! -d "/opt/biotools/bin" ];then mkdir /opt/biotools/bin; fi
RUN chmod 777 -R /opt/biotools/
ENV PATH /opt/biotools/bin:$PATH
ENV ROOTSYS /opt/biotools/root
ENV LD_LIBRARY_PATH '$LD_LIBRARY_PATH:$ROOTSYS/lib'
RUN pip3 install snakemake==5.9.1 oyaml
RUN pip3 install multiqc==1.9
RUN pip3 install biopython==1.76
RUN apt-get install yasm
#RUN pip3 install cutadapt==2.3
RUN pip3 install Cython
RUN python3 -m pip install --user --upgrade cutadapt
# Make table cells adapt to content (height and width)
ARG GET_PYTHON_VERSION="python3 --version | cut -d ' ' -f2 | cut -d'.' -f1,2 "
ARG GET_PYTHON_VERSION="python3 --version | cut -d ' ' -f2 | cut -f1,2 -d'.'"
RUN echo "Detected Python $(eval ${GET_PYTHON_VERSION})"
RUN sed -i "739s/absolute/relative/g" /usr/local/lib/python${GET_PYTHON_VERSION}/dist-packages/multiqc/templates/default/assets/css/default_multiqc.css
RUN sed -i "739s/absolute/relative/g" /usr/local/lib/python$(eval ${GET_PYTHON_VERSION})/dist-packages/multiqc/templates/default/assets/css/default_multiqc.css
RUN Rscript -e 'install.packages("yaml",Ncpus=8,repos="https://cloud.r-project.org/");library("yaml")'
RUN Rscript -e 'install.packages("DT",Ncpus=8,repos="https://cloud.r-project.org/");library("DT")'
......@@ -41,4 +110,3 @@ RUN Rscript -e 'install.packages("shinyjs",Ncpus=8,repos="https://cloud.r-projec
RUN Rscript -e 'install.packages("shinyFiles",Ncpus=8,repos="https://cloud.r-project.org/");library("shinyFiles")'
RUN Rscript -e 'install.packages("BiocManager",Ncpus=8,repos="https://cloud.r-project.org/");library("BiocManager")'
RUN pip3 install cerberus
......@@ -11,6 +11,11 @@ Ce workflow peut être lancé de manière autonome sur :
* Remarquer la présence des fichiers suivants :
* Dockerfile : fichier de recettes pour la construction de l'image Docker
* Singularity.def : fichier de recettes pour la construction de l'image Singularity (nécessite les droits root)
* install.sh permet d'installer les logiciels pré-requis (à faire une seule fois si nécessaire!)
* deployBigMem.sh : permet de déployer un conteneur en mode web sur une machine de type bigmem
......@@ -19,7 +24,7 @@ Ce workflow peut être lancé de manière autonome sur :
* deployLocalHost.sh : permet de deployer sur votre machine
* waw_workflow.qsub : script de soumission d'un workflow sur le cluster MBB
* waw_workflow.qsub : script de soumission d'un workflow sur le cluster MBB (nécessite une la création de l'image singularity au préalable)
* RunCmdLine.sh : permet de déployer et executer un workflow en ligne de commande (Nous verrons cela dans une prochaine partie)
......@@ -75,12 +80,12 @@ Ce workflow peut être lancé de manière autonome sur :
***deployLocalHost.sh /home/$USER/datasets/rnaseq/ /home/$USER/result1 local***
### B/ Changer la version dun outil
### B/ Changer la version d'un outil
Les procédures d'installation des différente outils nécessaires au bon fonctionnement du workflow sont rassemblées dans un fichier de recette nommé Dockerfile.
* Ouvrir ce fichier et repérer la partie concernant l'installation de kallisto
* Liste des versions de kallisto : <https://github.com/pachterlab/kallisto/releases>
* Modifier le n° de version pour une version de kallisto plus récente
* Modifier le numéro de version pour une version de kallisto plus récente
* Relancer le conteneur avec l'option 'local' pour reconstruire l'image avec vos modifs
***deployLocalHost.sh /home/$USER/datasets/rnaseq/ /home/$USER/result1 local***
......@@ -112,7 +117,7 @@ Ce fichier peur être :
* Enregistrer vos modifs dans maconfig.yaml dans par ex. /home/$USER/results1/version2/ et sera visible dans le conteneur sous /Result/maconfig.yaml
* lancer depuis une console la ligne de commande (ici la paramètre 10 pour utiliser 10 coeurs) :
* lancer depuis une console la ligne de commande (ici le paramètre 10 pour utiliser 10 coeurs) :
***bash RunCmdLine.sh /home/$USER/datasets/rnaseq/ /home/$USER/results1/version2/ /Results/maconfig.yaml 10***
......@@ -126,7 +131,7 @@ Ce fichier peur être :
ex 1 deploiement : ***bash deployBigMem.sh /home/votrelogin/data1/ /home/votrelogin/results1/***
A lintérieur du conteneur :
A l'intérieur du conteneur :
* /home/votrelogin/data1/ -> /Data/
* /home/votrelogin/results1/ -> /Results/
......
......@@ -6,4 +6,27 @@ sudo apt-get update
sudo apt-get install -y docker.io
##### nginx install #####
sudo apt-get install -y nginx
\ No newline at end of file
sudo apt-get install -y nginx
###### Singularity install #######
sudo apt-get install -y build-essential libssl-dev uuid-dev libgpgme11-dev \
squashfs-tools libseccomp-dev wget pkg-config git cryptsetup debootstrap
#Install Go https://golang.org/ and extract to /usr/local/go
wget https://dl.google.com/go/go1.13.linux-amd64.tar.gz
sudo tar --directory=/usr/local -xzvf go1.13.linux-amd64.tar.gz
export PATH=/usr/local/go/bin:$PATH
#Change version if needed
wget https://github.com/singularityware/singularity/releases/download/v3.8.3/singularity-3.8.3.tar.gz
tar -xzvf singularity-3.8.3.tar.gz
cd singularity-3.8.3
./mconfig
cd builddir
make
sudo make install
#You can test your installation
singularity run library://godlovedc/funny/lolcow
\ No newline at end of file
#!/bin/bash
#$ -S /bin/bash
# Job name
#$ -N WAW_worflow
# Using current working directory (otherwise, you will have to use '#$ wd /path/to/run')
#$ -cwd
# job time limits (h_rt is required [s_rt == software time limit / h_rt == hardware time limit])
#$ -l h_rt=48:00:00
# Redirects the standard output to the named file.
#$ -o Results/waw.out
#$ -e Results/waw.err
module load singularity-3.1
dataDir=$1
resultsDir=$2
# Config file in a location binded to /Data or /Results
#ex. file in /home/khalid/dataanalyse/config.yaml
#/home/khalid/dataanalyse/ is dataDir that will be binded to /Data in the container
#configfile must be set to /Data/config.yaml
configFile=$3
cores=$4
singularityImage=$5
# If needed build singularity image
# sudo singularity build --sandbox waw-workflow Singularity.def
#Run the workflow in cmd line
singularity exec -B $dataDir:/Data -B $resultsDir:/Results singularityImage snakemake -s /workflow/Snakefile --configfile $configFile --cores $cores
......@@ -83,7 +83,7 @@ def report_section_order():
def main():
res = "skip_generalstats: true\n\n"
res += "report_comment: '" + config["params"]["memo"] +"'\n\n"
res += "report_comment: '" + str(config["params"]["memo"]) +"'\n\n"
res += "custom_logo: /workflow/mbb.png\n\n"
res += "custom_logo_url: https://mbb.univ-montp2.fr\n\n"
res += "custom_logo_title: MBB platform\n\n"
......
......@@ -10,6 +10,10 @@ import re
import subprocess
from tools import *
#spython must be installed: pip3 install spython
from spython.main.parse.writers import get_writer
from spython.main.parse.parsers import get_parser
WORKFLOW_YAML = collections.OrderedDict()
TOOLS_YAML = collections.OrderedDict()
......@@ -399,6 +403,16 @@ def generate_pipeline_files(workflow_yaml, output_dir, waw_dir, local_config="de
with open(output_dir + "/Dockerfile", "w") as out:
out.write(generate_dockerfile(output_dir, local_config, waw_dir))
DockerParser = get_parser('docker')
SingularityWriter = get_writer('singularity')
parser = DockerParser(output_dir + "/Dockerfile")
writer = SingularityWriter(parser.recipe)
result = writer.convert()
with open(output_dir + "/Singularity.def", 'w') as f:
print(result, file=f)
f.close()
write_yaml(output_dir + "/files/params.total.yml", generate_params_yaml(workflow_name, get_params_to_remove(), waw_dir))
###1
......@@ -429,6 +443,9 @@ def generate_pipeline_files(workflow_yaml, output_dir, waw_dir, local_config="de
shutil.copy(os.path.join(waw_dir+"/generate_multiqc_config.py"), output_dir+ "/files")
shutil.copy(os.path.join(waw_dir+"/tools.py"), output_dir + "/files")
shutil.copy(os.path.join(waw_dir+"/mbb.png"), output_dir + "/files")
#save a copy of workflow json in files dir
os.system('cp '+ output_dir + '/*.json '+ output_dir + "/files/")
if "input" in WORKFLOW_YAML:
for raw_input in WORKFLOW_YAML["input"]:
......
......@@ -38,10 +38,11 @@ def raw_bams(results_dir, sample_dir, STOP_CASES=dict()):
else:
exit("Files have different suffixes:" + ','.join(suffixes))
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tbam_file")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tbam_file")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples}
out ["bams"] = os.path.join(sample_dir,"{sample}"+suffix)
......
......@@ -22,10 +22,11 @@ def raw_fasta(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tfasta_file")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tfasta_file")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples}
......
......@@ -22,10 +22,11 @@ def raw_long_reads(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tfile_reads")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tfile_reads")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'suffix': suffix, 'dico': dicoSamples}
out ["read"] = os.path.join(sample_dir,"{sample}"+suffix)
......
......@@ -33,13 +33,14 @@ def raw_reads(results_dir, sample_dir, SeOrPe):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab:
if SeOrPe == "PE":
sampleTab.write("sample\tfile_read_1\tfile_read_2")
else:
sampleTab.write("sample\tfile_read_1")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
if SeOrPe == "PE":
sampleTab.write("sample\tfile_read_1\tfile_read_2")
else:
sampleTab.write("sample\tfile_read_1")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples, 'pe_mark': PE_mark}
......
......@@ -6,7 +6,7 @@ def raw_tsv(results_dir, input_dir):
samples = list()
out = dict()
out["tsv"] = input_dir
out["samples"] =""
#out["samples"] = samples
return(out)
......
......@@ -23,10 +23,11 @@ def raw_vcf(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tvcf_file")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tvcf_file")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples}
......
......@@ -24,10 +24,11 @@ def raw_vcfsinDir(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tvcf_file")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tvcf_file")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples, 'vcfsinDir': vcfsinDir}
......
......@@ -11,7 +11,7 @@ def <step_name>__db():
file = <step_name>__blastn_inputs()["blastdb"][0]
db_name = os.path.splitext(os.path.basename(file))[0]
db_path = os.path.dirname(file)
return { "db_name": db_name, "db_path": db_path }
rule <step_name>__blastn:
......@@ -34,7 +34,7 @@ rule <step_name>__blastn:
config["results_dir"]+'/logs/' + config["<step_name>__blastn_output_dir"] + '/blastn_log.txt'
shell:
# output header
"echo -e 'qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tqlen' > {output.blastout}; "
"echo -e 'qseqid\tsseqid\tpident\tlength\tmismach\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tqlen' > {output.blastout}; "
# command
"{params.command} "
"-query {input.query} "
......
......@@ -3,9 +3,9 @@ rule <step_name>__deseq2:
**<step_name>__deseq2_inputs()
output:
de_table = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/de_table.csv',
r_data = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/data.RData',
raw_counts = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/raw_counts.tsv',
PCA = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/PCA_mqc.png',
Top_genes = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Top_genes_mqc.tsv',
Top_genes = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Top_genes_mqc.yaml',
MA_plot = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/MA_plot_mqc.png',
Volcano_plot = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Volcano_plot_mqc.png',
Heatmap = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Heatmap_mqc.png',
......@@ -17,4 +17,4 @@ rule <step_name>__deseq2:
outDir = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"],
biomaRtDataset = config["<step_name>__deseq2_biomaRtDataset"],
script:
config["<step_name>__deseq2_script"]
\ No newline at end of file
config["<step_name>__deseq2_script"]
......@@ -10,67 +10,89 @@ library(ggplot2)
popmapData = read.table(snakemake@config[["popmap_file"]],stringsAsFactors=F)
colnames(popmapData)=c("samples","condition")
samples = popmapData$samples
conditions = popmapData$condition
popmapData[,"condition"] = as.factor(popmapData[,"condition"])
#samples = c("DRR016125","DRR016126","DRR016127","DRR016128","DRR016129","DRR016130","DRR016131","DRR016132")
#files = file.path("/home/mbb/Documents/results2/kallisto/quant",samples,"abundance.h5")
if ("quantification" %in% names(snakemake@config) )
{
files = sort(snakemake@input[["counts"]])
#sort popmpData based on samples names and 'hope' that sorting files will give the wright mapping !!!
popmapData = popmapData[order(popmapData$samples), ]
samples = popmapData$samples
conditions = popmapData$condition
# !!!!! here we assume that files are in the same order as in the popmap file !!!!!
if(snakemake@config[["quantification"]] == "htseq_count")
{
print("Quantification was done with htseq-count !")
files = sort(snakemake@input[["counts"]])
names(files) = samples
sampleTable <- data.frame(
sampleName = samples,
fileName = files,
condition = conditions
)
# create the DESeq data object
dds <- DESeqDataSetFromHTSeqCount(
sampleTable = sampleTable,
directory = "",
design = ~ condition
)
} else{ # counts where generated with one of these software "salmon", "sailfish", "alevin", "kallisto", "rsem", "stringtie"
if (snakemake@params[["tx2gene"]] == TRUE){
# conversion transcrits vers gènes
TxDb <- makeTxDbFromGFF(file = snakemake@params[["annotations"]])
k <- keys(TxDb, keytype = "TXNAME")
tx2gene <- select(TxDb, k, "GENEID", "TXNAME")
txi <- tximport(files, type = snakemake@config[["quantification"]], tx2gene = tx2gene)
} else {
txi <- tximport(files, type = snakemake@config[["quantification"]], txOut = TRUE)
}
groups = as.factor(conditions)
designMat <- model.matrix(~groups)
colnames(designMat)[2] = "condition"
sampleTable <- data.frame(condition = factor(conditions))
rownames(sampleTable) <- colnames(txi$counts)
dds <- DESeqDataSetFromTximport(txi, sampleTable, designMat)
print("Quantification was done with htseq-count !")
names(files) = samples
sampleTable <- data.frame(
sampleName = samples,
fileName = files,
condition = conditions
)
# create the DESeq data object
dds <- DESeqDataSetFromHTSeqCount(
sampleTable = sampleTable,
directory = "",
design = ~ condition
)
} else { # counts where generated with one of these software "salmon", "sailfish", "alevin", "kallisto", "rsem", "stringtie"
if (snakemake@params[["tx2gene"]] == TRUE){
# conversion transcrits vers gènes
TxDb <- makeTxDbFromGFF(file = snakemake@params[["annotations"]])
k <- keys(TxDb, keytype = "TXNAME")
tx2gene <- select(TxDb, k, "GENEID", "TXNAME")
txi <- tximport(files, type = snakemake@config[["quantification"]], tx2gene = tx2gene)
} else {
txi <- tximport(files, type = snakemake@config[["quantification"]], txOut = TRUE)
}
groups = as.factor(conditions)
designMat <- model.matrix(~groups)
colnames(designMat)[2] = "condition"
sampleTable <- data.frame(condition = factor(conditions))
rownames(sampleTable) <- colnames(txi$counts)
dds <- DESeqDataSetFromTximport(txi, sampleTable, designMat)
}
} else {
if("tsv_file" %in% names(snakemake@config)) #it's from a tsv file containing counts for samples
{
print("Counts are given in a file !")
countMatrix <- read.table (snakemake@config[["tsv_file"]], header=TRUE, sep="\t")
countMatrix <- read.table(snakemake@config[["tsv_file"]], header=TRUE, sep="\t", check.names=F)
rownames(countMatrix) = countMatrix[,1]
countMatrix = countMatrix[,-1]
dds = DESeqDataSetFromMatrix(countData = countMatrix, colData = popmapData, design = ~ condition)
if (all(popmapData$samples %in% colnames(countMatrix)))
{
#reduce countMatrix to the columns given by samples in popmap !
samples = popmapData$samples
countMatrix = countMatrix[, samples]
dds = DESeqDataSetFromMatrix(countData = countMatrix, colData = popmapData, design = ~ condition)
} else {
print("Samples in popmap were not found in count matrix !!!")
exit()
}
}
}
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
raw_counts = assay(dds)
write.table(raw_counts, snakemake@output[["raw_counts"]], quote =F, sep="\t", row.names = T, col.names = NA)
# output a mqc file showing samples and conditions
write.table(colData(dds), paste0(snakemake@params[["outDir"]],"/samples_and_conditions_mqc.tsv"), quote =F, sep="\t", row.names = T, col.names = NA)
dds <- DESeq(dds) #, fitType = snakemake@config[["deseq2_fitType"]], betaPrior = as.logical(snakemake@config[["deseq2_betaPrior"]]), test="Wald")
res <- results(dds)
save.image(snakemake@output[["r_data"]])
write.csv(res, snakemake@output[["de_table"]])
###### PREPARE REPORT ######
......@@ -95,22 +117,38 @@ dev.off()
options(digit=4)
# resSig <- subset(res, padj < 0.1)
# resSigdf = data.frame(resSig@listData,row.names = resSig@rownames)
#only significant genes
resSig <- subset(res, padj <= 0.05)
signiGenes <- as.data.frame(resSig)
#datatable(resSigdf,options = list(scrollX = '300px'))
#topGenes = head(resSigdf[order(resSigdf$padj),], 40)
#top 40 genes basé sur les p-values
#top 40 genes basé sur les p-values significatif ou non !!
topGenes = head(as.data.frame(res[order(res$pvalue),]) , 40)
cat("# id: Top_genes
# section_name: 'Top 40 genes'
# description: 'Tableau des 40 top genes avec les plus faibles pvalues'
# format: 'tsv'
# plot_type: 'table'\ngene\t", file=snakemake@output[["Top_genes"]])
write.table(topGenes,snakemake@output[["Top_genes"]],append=TRUE,sep="\t")
speciesData = snakemake@params[["biomaRtDataset"]]
if ( speciesData != "None")
{ # add link to ensembl.org
topGenes= cbind(topGenes, link=sprintf('<a href="http://www.ensembl.org/%s/geneview?gene=%s" target="_blank" class="btn btn-success" style="color: #fff; background-color: #449d44; border-color: #255625" >%s</a>',speciesData, rownames(topGenes), rownames(topGenes)))
}
cat("id: 'Top_genes'
section_name: 'Top 40 genes'
description: 'Top 40 genes sorted by p-values'
plot_type: 'table'
pconfig:
id: 'Top_genes'
namespace: 'Top 40 genes'
title: 'Top 40 genes'
scale: false
data: