Commit c6dc214c authored by abensaid's avatar abensaid
Browse files

Merge branch 'master' into squeezemeta

parents 4a706151 ff3a400d
FROM rocker/r-ver:3.5.3
ENV PATH /opt/biotools/bin:$PATH
ENV ROOTSYS /opt/biotools/root
ENV LD_LIBRARY_PATH '$LD_LIBRARY_PATH:$ROOTSYS/lib'
FROM ubuntu:20.04
MAINTAINER Khalid BELKHIR <khalid.belkhir@umontpellier.fr>
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y locales
RUN apt-get update
RUN apt-get install -yq tzdata
RUN apt-get install -y locales
RUN locale-gen "en_US.UTF-8"
RUN export LC_ALL=en_US.UTF-8
RUN export LANG=en_US.UTF-8
# Install apache, PHP 7, and supplimentary programs. openssh-server, curl, and lynx-cur are for debugging the container.
RUN apt-get update && \
apt-get -y install \
apache2 \
php \
php-cli \
libapache2-mod-php \
php-gd \
php-curl \
php-json \
php-mbstring \
php-mysql \
php-xml \
php-yaml \
php-xsl \
php-zip \
sqlite3 \
php-sqlite3 \
git
RUN apt-get update
RUN apt-get install -y curl wget apt-utils
RUN apt-get install -y gcc fort77 aptitude
RUN aptitude install -y g++ xorg-dev libreadline-dev gfortran
RUN apt-get update
RUN apt-get install -y libssl-dev libxml2-dev libpcre3-dev liblzma-dev libbz2-dev libcurl4-openssl-dev liblapack3 git nano graphviz python3 python3-pip
RUN apt-get install -y autotools-dev automake cmake grep sed dpkg fuse zip build-essential pkg-config bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion zlib1g-dev libncurses5-dev libncursesw5-dev
RUN apt-get clean
RUN pip3 install cerberus oyaml
# # Enable apache mods.
RUN a2enmod php7.4
RUN a2enmod rewrite
# # Update the PHP.ini file, enable <? ?> tags and quieten logging.
RUN sed -i "s/short_open_tag = Off/short_open_tag = On/" /etc/php/7.4/apache2/php.ini
RUN sed -i "s/error_reporting = .*$/error_reporting = E_ERROR | E_WARNING | E_PARSE/" /etc/php/7.4/apache2/php.ini
# Manually set up the apache environment variables
ENV APACHE_RUN_USER www-data
ENV APACHE_RUN_GROUP www-data
ENV APACHE_LOG_DIR /var/log/apache2
ENV APACHE_LOCK_DIR /var/lock/apache2
ENV APACHE_PID_FILE /var/run/apache2.pid
RUN locale-gen "fr_FR.UTF-8"
ENV LC_ALL fr_FR.UTF-8
ENV LANG fr_FR.UTF-8
RUN apt-get update
#install R version 4.0
#RUN apt-get install -y r-base r-base-dev
# update indices
RUN apt-get update -qq
# install two helper packages we need
RUN apt-get install -y --no-install-recommends software-properties-common dirmngr
# add the signing key (by Michael Rutter) for these repos
# To verify key, run gpg --show-keys /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
# Fingerprint: 298A3A825C0D65DFD57CBB651716619E084DAB9
RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
# add the R 4.0 repo from CRAN -- adjust 'focal' to 'groovy' or 'bionic' as needed
RUN add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu $(lsb_release -cs)-cran40/"
RUN apt update
RUN apt-get install -y r-base r-base-dev
#installer le package yaml
RUN R --slave -e "install.packages( 'yaml', repos='https://cloud.r-project.org')"
RUN if [ ! -d "/opt/biotools" ];then mkdir /opt/biotools; fi
RUN if [ ! -d "/opt/biotools/bin" ];then mkdir /opt/biotools/bin; fi
RUN chmod 777 -R /opt/biotools/
ENV PATH /opt/biotools/bin:$PATH
ENV ROOTSYS /opt/biotools/root
ENV LD_LIBRARY_PATH '$LD_LIBRARY_PATH:$ROOTSYS/lib'
RUN pip3 install snakemake==5.9.1 oyaml
RUN pip3 install multiqc==1.9
RUN pip3 install biopython==1.76
RUN apt-get install yasm
#RUN pip3 install cutadapt==2.3
RUN pip3 install Cython
RUN python3 -m pip install --user --upgrade cutadapt
# Make table cells adapt to content (height and width)
ARG GET_PYTHON_VERSION="python3 --version | cut -d ' ' -f2 | cut -d'.' -f1,2 "
ARG GET_PYTHON_VERSION="python3 --version | cut -d ' ' -f2 | cut -f1,2 -d'.'"
RUN echo "Detected Python $(eval ${GET_PYTHON_VERSION})"
RUN sed -i "739s/absolute/relative/g" /usr/local/lib/python${GET_PYTHON_VERSION}/dist-packages/multiqc/templates/default/assets/css/default_multiqc.css
RUN sed -i "739s/absolute/relative/g" /usr/local/lib/python$(eval ${GET_PYTHON_VERSION})/dist-packages/multiqc/templates/default/assets/css/default_multiqc.css
RUN Rscript -e 'install.packages("yaml",Ncpus=8,repos="https://cloud.r-project.org/");library("yaml")'
RUN Rscript -e 'install.packages("DT",Ncpus=8,repos="https://cloud.r-project.org/");library("DT")'
......@@ -41,4 +110,3 @@ RUN Rscript -e 'install.packages("shinyjs",Ncpus=8,repos="https://cloud.r-projec
RUN Rscript -e 'install.packages("shinyFiles",Ncpus=8,repos="https://cloud.r-project.org/");library("shinyFiles")'
RUN Rscript -e 'install.packages("BiocManager",Ncpus=8,repos="https://cloud.r-project.org/");library("BiocManager")'
RUN pip3 install cerberus
......@@ -11,6 +11,11 @@ Ce workflow peut être lancé de manière autonome sur :
* Remarquer la présence des fichiers suivants :
* Dockerfile : fichier de recettes pour la construction de l'image Docker
* Singularity.def : fichier de recettes pour la construction de l'image Singularity (nécessite les droits root)
* install.sh permet d'installer les logiciels pré-requis (à faire une seule fois si nécessaire!)
* deployBigMem.sh : permet de déployer un conteneur en mode web sur une machine de type bigmem
......@@ -19,7 +24,7 @@ Ce workflow peut être lancé de manière autonome sur :
* deployLocalHost.sh : permet de deployer sur votre machine
* waw_workflow.qsub : script de soumission d'un workflow sur le cluster MBB
* waw_workflow.qsub : script de soumission d'un workflow sur le cluster MBB (nécessite une la création de l'image singularity au préalable)
* RunCmdLine.sh : permet de déployer et executer un workflow en ligne de commande (Nous verrons cela dans une prochaine partie)
......@@ -75,12 +80,12 @@ Ce workflow peut être lancé de manière autonome sur :
***deployLocalHost.sh /home/$USER/datasets/rnaseq/ /home/$USER/result1 local***
### B/ Changer la version dun outil
### B/ Changer la version d'un outil
Les procédures d'installation des différente outils nécessaires au bon fonctionnement du workflow sont rassemblées dans un fichier de recette nommé Dockerfile.
* Ouvrir ce fichier et repérer la partie concernant l'installation de kallisto
* Liste des versions de kallisto : <https://github.com/pachterlab/kallisto/releases>
* Modifier le n° de version pour une version de kallisto plus récente
* Modifier le numéro de version pour une version de kallisto plus récente
* Relancer le conteneur avec l'option 'local' pour reconstruire l'image avec vos modifs
***deployLocalHost.sh /home/$USER/datasets/rnaseq/ /home/$USER/result1 local***
......@@ -112,7 +117,7 @@ Ce fichier peur être :
* Enregistrer vos modifs dans maconfig.yaml dans par ex. /home/$USER/results1/version2/ et sera visible dans le conteneur sous /Result/maconfig.yaml
* lancer depuis une console la ligne de commande (ici la paramètre 10 pour utiliser 10 coeurs) :
* lancer depuis une console la ligne de commande (ici le paramètre 10 pour utiliser 10 coeurs) :
***bash RunCmdLine.sh /home/$USER/datasets/rnaseq/ /home/$USER/results1/version2/ /Results/maconfig.yaml 10***
......@@ -126,7 +131,7 @@ Ce fichier peur être :
ex 1 deploiement : ***bash deployBigMem.sh /home/votrelogin/data1/ /home/votrelogin/results1/***
A lintérieur du conteneur :
A l'intérieur du conteneur :
* /home/votrelogin/data1/ -> /Data/
* /home/votrelogin/results1/ -> /Results/
......
......@@ -7,3 +7,26 @@ sudo apt-get install -y docker.io
##### nginx install #####
sudo apt-get install -y nginx
###### Singularity install #######
sudo apt-get install -y build-essential libssl-dev uuid-dev libgpgme11-dev \
squashfs-tools libseccomp-dev wget pkg-config git cryptsetup debootstrap
#Install Go https://golang.org/ and extract to /usr/local/go
wget https://dl.google.com/go/go1.13.linux-amd64.tar.gz
sudo tar --directory=/usr/local -xzvf go1.13.linux-amd64.tar.gz
export PATH=/usr/local/go/bin:$PATH
#Change version if needed
wget https://github.com/singularityware/singularity/releases/download/v3.8.3/singularity-3.8.3.tar.gz
tar -xzvf singularity-3.8.3.tar.gz
cd singularity-3.8.3
./mconfig
cd builddir
make
sudo make install
#You can test your installation
singularity run library://godlovedc/funny/lolcow
\ No newline at end of file
#!/bin/bash
#$ -S /bin/bash
# Job name
#$ -N WAW_worflow
# Using current working directory (otherwise, you will have to use '#$ wd /path/to/run')
#$ -cwd
# job time limits (h_rt is required [s_rt == software time limit / h_rt == hardware time limit])
#$ -l h_rt=48:00:00
# Redirects the standard output to the named file.
#$ -o Results/waw.out
#$ -e Results/waw.err
module load singularity-3.1
dataDir=$1
resultsDir=$2
# Config file in a location binded to /Data or /Results
#ex. file in /home/khalid/dataanalyse/config.yaml
#/home/khalid/dataanalyse/ is dataDir that will be binded to /Data in the container
#configfile must be set to /Data/config.yaml
configFile=$3
cores=$4
singularityImage=$5
# If needed build singularity image
# sudo singularity build --sandbox waw-workflow Singularity.def
#Run the workflow in cmd line
singularity exec -B $dataDir:/Data -B $resultsDir:/Results singularityImage snakemake -s /workflow/Snakefile --configfile $configFile --cores $cores
......@@ -83,7 +83,7 @@ def report_section_order():
def main():
res = "skip_generalstats: true\n\n"
res += "report_comment: '" + config["params"]["memo"] +"'\n\n"
res += "report_comment: '" + str(config["params"]["memo"]) +"'\n\n"
res += "custom_logo: /workflow/mbb.png\n\n"
res += "custom_logo_url: https://mbb.univ-montp2.fr\n\n"
res += "custom_logo_title: MBB platform\n\n"
......
......@@ -10,6 +10,10 @@ import re
import subprocess
from tools import *
#spython must be installed: pip3 install spython
from spython.main.parse.writers import get_writer
from spython.main.parse.parsers import get_parser
WORKFLOW_YAML = collections.OrderedDict()
TOOLS_YAML = collections.OrderedDict()
......@@ -399,6 +403,16 @@ def generate_pipeline_files(workflow_yaml, output_dir, waw_dir, local_config="de
with open(output_dir + "/Dockerfile", "w") as out:
out.write(generate_dockerfile(output_dir, local_config, waw_dir))
DockerParser = get_parser('docker')
SingularityWriter = get_writer('singularity')
parser = DockerParser(output_dir + "/Dockerfile")
writer = SingularityWriter(parser.recipe)
result = writer.convert()
with open(output_dir + "/Singularity.def", 'w') as f:
print(result, file=f)
f.close()
write_yaml(output_dir + "/files/params.total.yml", generate_params_yaml(workflow_name, get_params_to_remove(), waw_dir))
###1
......@@ -430,6 +444,9 @@ def generate_pipeline_files(workflow_yaml, output_dir, waw_dir, local_config="de
shutil.copy(os.path.join(waw_dir+"/tools.py"), output_dir + "/files")
shutil.copy(os.path.join(waw_dir+"/mbb.png"), output_dir + "/files")
#save a copy of workflow json in files dir
os.system('cp '+ output_dir + '/*.json '+ output_dir + "/files/")
if "input" in WORKFLOW_YAML:
for raw_input in WORKFLOW_YAML["input"]:
shutil.copy(os.path.join(waw_dir+"/raw_inputs/"+raw_input+".py"), output_dir+"/files")
......
......@@ -38,6 +38,7 @@ def raw_bams(results_dir, sample_dir, STOP_CASES=dict()):
else:
exit("Files have different suffixes:" + ','.join(suffixes))
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tbam_file")
for sample in sorted(samples):
......
......@@ -22,6 +22,7 @@ def raw_fasta(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tfasta_file")
for sample in sorted(samples):
......
......@@ -22,6 +22,7 @@ def raw_long_reads(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tfile_reads")
for sample in sorted(samples):
......
......@@ -33,6 +33,7 @@ def raw_reads(results_dir, sample_dir, SeOrPe):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
if SeOrPe == "PE":
sampleTab.write("sample\tfile_read_1\tfile_read_2")
......
......@@ -6,7 +6,7 @@ def raw_tsv(results_dir, input_dir):
samples = list()
out = dict()
out["tsv"] = input_dir
out["samples"] =""
#out["samples"] = samples
return(out)
......
......@@ -23,6 +23,7 @@ def raw_vcf(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tvcf_file")
for sample in sorted(samples):
......
......@@ -24,6 +24,7 @@ def raw_vcfsinDir(results_dir, sample_dir):
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
if os.path.exists(results_dir):
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tvcf_file")
for sample in sorted(samples):
......
......@@ -34,7 +34,7 @@ rule <step_name>__blastn:
config["results_dir"]+'/logs/' + config["<step_name>__blastn_output_dir"] + '/blastn_log.txt'
shell:
# output header
"echo -e 'qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tqlen' > {output.blastout}; "
"echo -e 'qseqid\tsseqid\tpident\tlength\tmismach\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tqlen' > {output.blastout}; "
# command
"{params.command} "
"-query {input.query} "
......
......@@ -3,9 +3,9 @@ rule <step_name>__deseq2:
**<step_name>__deseq2_inputs()
output:
de_table = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/de_table.csv',
r_data = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/data.RData',
raw_counts = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/raw_counts.tsv',
PCA = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/PCA_mqc.png',
Top_genes = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Top_genes_mqc.tsv',
Top_genes = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Top_genes_mqc.yaml',
MA_plot = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/MA_plot_mqc.png',
Volcano_plot = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Volcano_plot_mqc.png',
Heatmap = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"]+'/Heatmap_mqc.png',
......
......@@ -10,17 +10,23 @@ library(ggplot2)
popmapData = read.table(snakemake@config[["popmap_file"]],stringsAsFactors=F)
colnames(popmapData)=c("samples","condition")
samples = popmapData$samples
conditions = popmapData$condition
popmapData[,"condition"] = as.factor(popmapData[,"condition"])
#samples = c("DRR016125","DRR016126","DRR016127","DRR016128","DRR016129","DRR016130","DRR016131","DRR016132")
#files = file.path("/home/mbb/Documents/results2/kallisto/quant",samples,"abundance.h5")
if ("quantification" %in% names(snakemake@config) )
{
files = sort(snakemake@input[["counts"]])
#sort popmpData based on samples names and 'hope' that sorting files will give the wright mapping !!!
popmapData = popmapData[order(popmapData$samples), ]
samples = popmapData$samples
conditions = popmapData$condition
# !!!!! here we assume that files are in the same order as in the popmap file !!!!!
if(snakemake@config[["quantification"]] == "htseq_count")
{
print("Quantification was done with htseq-count !")
files = sort(snakemake@input[["counts"]])
names(files) = samples
sampleTable <- data.frame(
sampleName = samples,
......@@ -33,8 +39,9 @@ if ("quantification" %in% names(snakemake@config) )
directory = "",
design = ~ condition
)
} else{ # counts where generated with one of these software "salmon", "sailfish", "alevin", "kallisto", "rsem", "stringtie"
} else { # counts where generated with one of these software "salmon", "sailfish", "alevin", "kallisto", "rsem", "stringtie"
if (snakemake@params[["tx2gene"]] == TRUE){
# conversion transcrits vers gènes
TxDb <- makeTxDbFromGFF(file = snakemake@params[["annotations"]])
k <- keys(TxDb, keytype = "TXNAME")
......@@ -57,20 +64,35 @@ if ("quantification" %in% names(snakemake@config) )
if("tsv_file" %in% names(snakemake@config)) #it's from a tsv file containing counts for samples
{
print("Counts are given in a file !")
countMatrix <- read.table (snakemake@config[["tsv_file"]], header=TRUE, sep="\t")
countMatrix <- read.table(snakemake@config[["tsv_file"]], header=TRUE, sep="\t", check.names=F)
rownames(countMatrix) = countMatrix[,1]
countMatrix = countMatrix[,-1]
if (all(popmapData$samples %in% colnames(countMatrix)))
{
#reduce countMatrix to the columns given by samples in popmap !
samples = popmapData$samples
countMatrix = countMatrix[, samples]
dds = DESeqDataSetFromMatrix(countData = countMatrix, colData = popmapData, design = ~ condition)
} else {
print("Samples in popmap were not found in count matrix !!!")
exit()
}
}
}
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
raw_counts = assay(dds)
write.table(raw_counts, snakemake@output[["raw_counts"]], quote =F, sep="\t", row.names = T, col.names = NA)
# output a mqc file showing samples and conditions
write.table(colData(dds), paste0(snakemake@params[["outDir"]],"/samples_and_conditions_mqc.tsv"), quote =F, sep="\t", row.names = T, col.names = NA)
dds <- DESeq(dds) #, fitType = snakemake@config[["deseq2_fitType"]], betaPrior = as.logical(snakemake@config[["deseq2_betaPrior"]]), test="Wald")
res <- results(dds)
save.image(snakemake@output[["r_data"]])
write.csv(res, snakemake@output[["de_table"]])
###### PREPARE REPORT ######
......@@ -95,22 +117,38 @@ dev.off()
options(digit=4)
# resSig <- subset(res, padj < 0.1)
# resSigdf = data.frame(resSig@listData,row.names = resSig@rownames)
#datatable(resSigdf,options = list(scrollX = '300px'))
#only significant genes
resSig <- subset(res, padj <= 0.05)
signiGenes <- as.data.frame(resSig)
#topGenes = head(resSigdf[order(resSigdf$padj),], 40)
#top 40 genes basé sur les p-values
#top 40 genes basé sur les p-values significatif ou non !!
topGenes = head(as.data.frame(res[order(res$pvalue),]) , 40)
cat("# id: Top_genes
# section_name: 'Top 40 genes'
# description: 'Tableau des 40 top genes avec les plus faibles pvalues'
# format: 'tsv'
# plot_type: 'table'\ngene\t", file=snakemake@output[["Top_genes"]])
write.table(topGenes,snakemake@output[["Top_genes"]],append=TRUE,sep="\t")
speciesData = snakemake@params[["biomaRtDataset"]]
if ( speciesData != "None")
{ # add link to ensembl.org
topGenes= cbind(topGenes, link=sprintf('<a href="http://www.ensembl.org/%s/geneview?gene=%s" target="_blank" class="btn btn-success" style="color: #fff; background-color: #449d44; border-color: #255625" >%s</a>',speciesData, rownames(topGenes), rownames(topGenes)))
}
cat("id: 'Top_genes'
section_name: 'Top 40 genes'
description: 'Top 40 genes sorted by p-values'
plot_type: 'table'
pconfig:
id: 'Top_genes'
namespace: 'Top 40 genes'
title: 'Top 40 genes'
scale: false
data:
", file = snakemake@output[["Top_genes"]])
for (i in 1:nrow(topGenes))
{
cat (paste0(" ",rownames(topGenes)[i],":\n") ,file = snakemake@output[["Top_genes"]], append=TRUE)
for (j in 1:ncol(topGenes)) cat ( paste0(" ",colnames(topGenes)[j],": '", topGenes[i,j], "'\n" ) ,file = snakemake@output[["Top_genes"]], append=TRUE)
}
png(filename = snakemake@output[["MA_plot"]], height=800, width=800)
DESeq2::plotMA(res, ylim=c(-5,5))
......@@ -123,7 +161,6 @@ png(filename = snakemake@output[["Volcano_plot"]], height=800, width=800)
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
dev.off()
# Heatmap
library(pheatmap)
select <- order(rowMeans(counts(dds,normalized=TRUE)), decreasing=TRUE)[1:20]
......@@ -137,24 +174,22 @@ png(filename = snakemake@output[["Heatmap"]], height=800, width=800)
pheatmap(assayNTD[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=TRUE, annotation_col=df)
dev.off()
speciesData = snakemake@params[["biomaRtDataset"]]
#what if we have tarnscript ID or not ENSEMBL ID instead of gene ID ?????????
if we know speciesData make a blast
if (speciesData != "None") {
#if we know speciesData make a blast
if (speciesData != "None" & nrow(signiGenes) > 1 ) {
# Add annotation with biomaRt
library(biomaRt)
tryCatch({
mart <- useMart(biomart="ENSEMBL_MART_ENSEMBL", host="uswest.ensembl.org")
#"mus_musculus" -> "mmusculus_gene_ensembl"
martName = paste0(substring(speciesData, 1, 1), unlist(strsplit(speciesData,"_"))[2] ,"_gene_ensembl")
species_mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "uswest.ensembl.org", dataset = martName)
species_mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "uswest.ensembl.org", dataset = speciesData)
#for the moment the top genes change to significant genes ?
ann <- getBM(filter="ensembl_gene_id",value=rownames(signiGenes),attributes=c("ensembl_gene_id","description","transcript_length"),mart=species_mart)
ann <- getBM(filter="ensembl_gene_id",value=rownames(topGenes),attributes=c("ensembl_gene_id","description","transcript_length"),mart=species_mart)
go_ann <- getBM(filter="ensembl_gene_id",value=rownames(signiGenes),attributes=c("ensembl_gene_id","description","go_id","name_1006","namespace_1003"),mart=species_mart)
go_ann <- getBM(filter="ensembl_gene_id",value=rownames(topGenes),attributes=c("ensembl_gene_id","description","go_id","name_1006","namespace_1003"),mart=species_mart)
write.table(go_ann,paste0(snakemake@params[["outDir"]],"/TopGenes_go_ann_mqc.tsv"),sep="\t",row.names =FALSE, quote = FALSE)
},
error = function(err) {
......@@ -164,19 +199,19 @@ if (speciesData != "None") {
#test for gene enrichment with gprofiler2
library(gprofiler2)
#for the moment the top genes change to significant genes ?
organisme = gsub("_gene_ensembl", "", speciesData)
organisme = gsub("_gene_ensembl", "", martName)
#limit to GO sources
sources = c("GO:BP", "GO:MF", "GO:CC")
query = rownames(topGenes) #setNames(as.list(rownames(topGenes)), rownames(topGenes))
query = rownames(signiGenes) #setNames(as.list(rownames(topGenes)), rownames(topGenes))
gostres <- gost(query = query,
organism = organisme, ordered_query = FALSE,
multi_query = FALSE, significant = FALSE, exclude_iea = TRUE,
multi_query = FALSE, significant = TRUE, exclude_iea = TRUE,
measure_underrepresentation = FALSE, evcodes = FALSE,
user_threshold = 0.05, correction_method = "g_SCS",
domain_scope = "annotated", custom_bg = NULL,
numeric_ns = "", sources = sources, as_short_link = FALSE)
if ( nrow(gostres$result) > 1) # we have some results
if ( !is.null(gostres)) # we have some results
{
png(filename = paste0(snakemake@params[["outDir"]],"/gprofiler2_plot_mqc.png"), height=800, width=800)
p <- gostplot(gostres, capped = FALSE, interactive = FALSE)
......
......@@ -5,7 +5,7 @@
website: "https://bioconductor.org/packages/release/bioc/html/DESeq2.html",
git: "git clone https://git.bioconductor.org/packages/DESeq2",
description: "Differential gene expression analysis based on the negative binomial distribution. Using normalized count data.",
version: "3.10",
version: "3.13",
documentation: "https://bioconductor.org/packages/release/bioc/manuals/DESeq2/man/DESeq2.pdf",
multiqc: "custom",
commands:
......@@ -24,9 +24,9 @@
outputs:
[
{ name: de_table, type: "tsv", file: de_table.csv, description: "Tableau d'expression différentielle" },
{ name: RData, type: "RData", file: data.RData, description: "Sauvegarde de la session R" },
{ name: raw_counts, type: "tsv", file: raw_counts.tsv, description: "Sauvegarde de la matrice des comptes non normalisés" },
{ name: PCA, type: "png", file: PCA_mqc.png, description: "PCA plot" },
{ name: DE_Table, type: "tsv", file: DE_Table_mqc.tsv, description: "Tableau d'expression différentielle avec p-value ajustée < 0.1" },
{ name: Top_genes, type: "tsv", file: Top_genes_mqc.yaml, description: "Tableau d'expression différentielle avec p-value ajustée < 0.1" },
{ name: MA_plot, type: "png", file: MA_plot_mqc.png, description: "MA-plot log2 fold changes (on the y-axis) versus the mean of normalized counts (on the x-axis)" },
{ name: Heatmap, type: "png", file: Heatmap_mqc.png, description: "Heatmap" },
],
......@@ -59,13 +59,13 @@
type: select,
choices: [
None: "None",
Mus musculus: "mmusculus_gene_ensembl",
Homo sapiens: "hsapiens_gene_ensembl",
Nile tilapia: "oniloticus_gene_ensembl",
Drosophila melanogaster: "dmelanogaster_gene_ensembl"
Mus musculus: "mus_musculus",
Homo sapiens: "homo_sapiens",
Nile tilapia: "oreochromis_niloticus",
Drosophila melanogaster: "drosophila_melanogaster"
],
value: "None",
label: "Species"
label: "Species name in Ensembl.org"
},
# {
# name: deseq2_fitType,
......@@ -86,12 +86,12 @@
install:
{
latticeExtra: ['Rscript -e ''install.packages("latticeExtra",Ncpus=8, clean=TRUE);library("latticeExtra")'''],
DESeq2: ['Rscript -e ''BiocManager::install("DESeq2", version = "3.10",Ncpus=8, clean=TRUE);library("DESeq2")'''],
apeglm: ['Rscript -e ''BiocManager::install("apeglm", version = "3.10",Ncpus=8, clean=TRUE);library("apeglm")'''],
BiocParallel: ['Rscript -e ''BiocManager::install("BiocParallel", version = "3.10",Ncpus=8, clean=TRUE);library("BiocParallel")'''],
tximport: ['Rscript -e ''BiocManager::install("tximport", version = "3.10",Ncpus=8, clean=TRUE);library("tximport")'''],