Commit 4ad900d6 authored by khalid's avatar khalid
Browse files

EdgeR and deseq2 use an expandable input

parent 56971a55
......@@ -133,8 +133,20 @@ def generate(name, path_yaml = "", path_input = DEFAULT_PATH_INPUT, path_output
if inputt["name"] == param["input_name"]:
INPUT_YAML = inputt
# Inputs de type expand
if ("expand" in INPUT_YAML and INPUT_YAML["expand"]):
if param["input_name"] not in inputs_list_from_yaml:
result += "\tinputs[\"" + param["input_name"] + "\"] = list()\n"
if "raw_" in param["origin_command"]:
#if not param["origin_command"] in raw_inputs:
# result += "\tinputs[\"" + param["input_name"] + "\"].append(" + expand_begin + param["origin_command"] + "[\"" + param["origin_name"] + "\"]" + expand_end + ")\n"
# raw_inputs.append(param["origin_command"])
reslt += "n"
else:
result += "\tinputs[\"" + param["input_name"] + "\"] = expand(" + expand_begin + "rules." + param["origin_step"] + "__" + param["origin_command"] + ".output." + param["origin_name"] + expand_end + ", sample=SAMPLES)\n"
# Inputs de type liste
if ("list" in INPUT_YAML and INPUT_YAML["list"]):
elif ("list" in INPUT_YAML and INPUT_YAML["list"]):
if param["input_name"] not in inputs_list_from_yaml:
result += "\tinputs[\"" + param["input_name"] + "\"] = list()\n"
if "raw_" in param["origin_command"]:
......
......@@ -12,33 +12,50 @@ library(ggplot2)
register(MulticoreParam(snakemake@threads))
samples = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V1
conditions = read.table(snakemake@config[["group_file"]],stringsAsFactors=F)$V2
samples = read.table(snakemake@config[["popmap_file"]],stringsAsFactors=F)$V1
conditions = read.table(snakemake@config[["popmap_file"]],stringsAsFactors=F)$V2
#samples = c("DRR016125","DRR016126","DRR016127","DRR016128","DRR016129","DRR016130","DRR016131","DRR016132")
#files = file.path("/home/mbb/Documents/results2/kallisto/quant",samples,"abundance.h5")
files = sort(snakemake@input[["counts"]])
names(files) = samples
if (snakemake@params[["tx2gene"]] == TRUE){
# conversion transcrits vers gènes
TxDb <- makeTxDbFromGFF(file = snakemake@params[["annotations"]])
k <- keys(TxDb, keytype = "TXNAME")
tx2gene <- select(TxDb, k, "GENEID", "TXNAME")
txi <- tximport(files, type = snakemake@config[["quantification"]], tx2gene = tx2gene)
} else {
txi <- tximport(files, type = snakemake@config[["quantification"]], txOut = TRUE)
if (snakemake@config[["quantification"]] == "htseq_count")
{
print("Quantification was done with htseq-count !")
sampleTable <- data.frame(
sampleName = samples,
fileName = files,
condition = conditions
)
# create the DESeq data object
dds <- DESeqDataSetFromHTSeqCount(
sampleTable = sampleTable,
directory = "", #paste0(snakemake@config[["results_dir"]],"/",snakemake@config[["<step_name>__deseq2_output_dir"]],"/"),
design = ~ condition
)
} else{
if (snakemake@params[["tx2gene"]] == TRUE){
# conversion transcrits vers gènes
TxDb <- makeTxDbFromGFF(file = snakemake@params[["annotations"]])
k <- keys(TxDb, keytype = "TXNAME")
tx2gene <- select(TxDb, k, "GENEID", "TXNAME")
txi <- tximport(files, type = snakemake@config[["quantification"]], tx2gene = tx2gene)
} else {
txi <- tximport(files, type = snakemake@config[["quantification"]], txOut = TRUE)
}
groups = as.factor(conditions)
designMat <- model.matrix(~groups)
colnames(designMat)[2] = "condition"
#dds$condition <- factor(dds$condition, levels = unique(conditions))
sampleTable <- data.frame(condition = factor(conditions))
rownames(sampleTable) <- colnames(txi$counts)
dds <- DESeqDataSetFromTximport(txi, sampleTable, designMat)
}
groups = as.factor(conditions)
designMat <- model.matrix(~groups)
colnames(designMat)[2] = "condition"
#dds$condition <- factor(dds$condition, levels = unique(conditions))
sampleTable <- data.frame(condition = factor(conditions))
rownames(sampleTable) <- colnames(txi$counts)
dds <- DESeqDataSetFromTximport(txi, sampleTable, designMat)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
......@@ -50,14 +67,28 @@ save.image(snakemake@output[["r_data"]])
write.csv(res, snakemake@output[["de_table"]])
###### PREPARE REPORT ######
# normalized, variance-stabilized transformed counts for visualization
if (sum(keep) < 1000) {
vsd = varianceStabilizingTransformation(dds)
} else {
vsd <- vst(dds, blind=FALSE)
}
dat <- plotPCA(vsd, intgroup="condition",returnData=TRUE)
cpm_counts = cpm(txi$counts)
pca = prcomp(t(cpm_counts))
png(filename = snakemake@output[["PCA"]], height=800, width=800)
ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point()
p <- ggplot(dat,aes(x=PC1,y=PC2,col=group))
p + geom_point() + ggtitle("PCA on normalized, variance-stabilized transformed counts")
dev.off()
# cpm_counts = cpm(txi$counts)
# pca = prcomp(t(cpm_counts))
# png(filename = snakemake@output[["PCA"]], height=800, width=800)
# ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point()
# dev.off()
options(digit=4)
resSig <- subset(res, padj < 0.1)
......@@ -86,7 +117,6 @@ with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue),
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
dev.off()
cpmTop = cpm_counts[rownames(resSigdf),]
# Heatmap
library(pheatmap)
......@@ -99,6 +129,9 @@ assayNTD = assay(ntd)
png(filename = snakemake@output[["Heatmap"]], height=800, width=800)
pheatmap(assayNTD[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df)
#cpmTop = cpm_counts[rownames(resSigdf),]
#heatmap(cpmTop,main = "Heatmap",margins = c(10,1))
dev.off()
......
......@@ -5,7 +5,7 @@
website: "https://bioconductor.org/packages/release/bioc/html/DESeq2.html",
git: "git clone https://git.bioconductor.org/packages/DESeq2",
description: "Differential gene expression analysis based on the negative binomial distribution. Using normalized count data.",
version: "3.8",
version: "3\.10",
documentation: "https://bioconductor.org/packages/release/bioc/manuals/DESeq2/man/DESeq2.pdf",
multiqc: "custom",
commands:
......@@ -16,8 +16,11 @@
command: ~,
category: "differential_expression",
output_dir: deseq2,
inputs: [{ name: counts, type: "tsv" }
{ name: group_file, type: "popmap", file: "", description: "Path to tsv file with samples conditions"}],
inputs:
[
{ name: counts, type: "tsv" },
{ name: popmap_file, type: "popmap", file: "", description: "Path to tsv file with samples conditions"},
],
outputs:
[
{ name: de_table, type: "tsv", file: de_table.csv, description: "Tableau d'expression différentielle" },
......@@ -70,12 +73,12 @@
install:
{
latticeExtra: ['Rscript -e ''install.packages("latticeExtra",Ncpus=8, clean=TRUE);library("latticeExtra")'''],
DESeq2: ['Rscript -e ''BiocManager::install("DESeq2", version = "3.8",Ncpus=8, clean=TRUE);library("DESeq2")'''],
apeglm: ['Rscript -e ''BiocManager::install("apeglm", version = "3.8",Ncpus=8, clean=TRUE);library("apeglm")'''],
BiocParallel: ['Rscript -e ''BiocManager::install("BiocParallel", version = "3.8",Ncpus=8, clean=TRUE);library("BiocParallel")'''],
tximport: ['Rscript -e ''BiocManager::install("tximport", version = "3.8",Ncpus=8, clean=TRUE);library("tximport")'''],
rhdf5: ['Rscript -e ''BiocManager::install("rhdf5", version = "3.8",Ncpus=8, clean=TRUE);library("rhdf5")'''],
GenomicFeatures: ['Rscript -e ''BiocManager::install("GenomicFeatures", version = "3.8",Ncpus=8, clean=TRUE);library("GenomicFeatures")'''],
DESeq2: ['Rscript -e ''BiocManager::install("DESeq2", version = "3.10",Ncpus=8, clean=TRUE);library("DESeq2")'''],
apeglm: ['Rscript -e ''BiocManager::install("apeglm", version = "3.10",Ncpus=8, clean=TRUE);library("apeglm")'''],
BiocParallel: ['Rscript -e ''BiocManager::install("BiocParallel", version = "3.10",Ncpus=8, clean=TRUE);library("BiocParallel")'''],
tximport: ['Rscript -e ''BiocManager::install("tximport", version = "3.10",Ncpus=8, clean=TRUE);library("tximport")'''],
rhdf5: ['Rscript -e ''BiocManager::install("rhdf5", version = "3.10",Ncpus=8, clean=TRUE);library("rhdf5")'''],
GenomicFeatures: ['Rscript -e ''BiocManager::install("GenomicFeatures", version = "3.10",Ncpus=8, clean=TRUE);library("GenomicFeatures")'''],
pheatmap: ['Rscript -e ''install.packages("pheatmap",Ncpus=8, clean=TRUE);library("pheatmap")''']
},
script: deseq2.script.R,
......
......@@ -15,9 +15,11 @@
command: ~,
category: "differential_expression",
output_dir: edger,
inputs: [{ name: counts, type: "tsv" },
{ name: group_file, type: "popmap", file: "", description: "Path to tsv file with samples conditions"},
]
inputs:
[
{ name: counts, type: "tsv" },
{ name: group_file, type: "popmap", file: "", description: "Path to tsv file with samples conditions"},
],
outputs:
[
{ name: de_table, type: "csv", file: de_table.csv, description: "Tableau d'expression différentielle" },
......
......@@ -53,7 +53,7 @@ elif config["SeOrPe"] == "SE":
"-x {params.indexPrefix} "
"--rg-id ID:WAW --rg SM:{wildcards.sample} "
#"-S "#output in sam format
"{input.read} 2> {log} "
" -Ub{input.read} 2> {log} "
"| samtools view -b 2>> {log} "
"| samtools sort -@ {threads} > {output.bam} 2>> {log} &&"
"samtools index -@ {threads} {output.bam} 2>> {log}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment