Commit 35cebac9 authored by khalid's avatar khalid
Browse files

add a raw tsv input

parent cf84f6d1
......@@ -141,7 +141,7 @@ def generate(name, path_yaml = "", path_input = DEFAULT_PATH_INPUT, path_output
#if not param["origin_command"] in raw_inputs:
# result += "\tinputs[\"" + param["input_name"] + "\"].append(" + expand_begin + param["origin_command"] + "[\"" + param["origin_name"] + "\"]" + expand_end + ")\n"
# raw_inputs.append(param["origin_command"])
reslt += "\n"
result += "\tinputs[\"" + param["input_name"] + "\"] = " + expand_begin + param["origin_command"] + "[\"" + param["origin_name"] + "\"] " + expand_end + "\n"
else:
result += "\tinputs[\"" + param["input_name"] + "\"] = expand(" + expand_begin + "rules." + param["origin_step"] + "__" + param["origin_command"] + ".output." + param["origin_name"] + expand_end + ", sample=SAMPLES)\n"
# Inputs de type liste
......
......@@ -14,6 +14,7 @@ rule <step_name>__deseq2:
script = config["<step_name>__deseq2_command"],
tx2gene = config["<step_name>__deseq2_tx2gene"],
annotations = config["<step_name>__deseq2_annotations"],
outDir = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"],
biomaRtDataset = config["<step_name>__deseq2_biomaRtDataset"],
script:
config["<step_name>__deseq2_script"]
\ No newline at end of file
......@@ -3,12 +3,11 @@
#sink(log, type="message")
library(DESeq2)
library(apeglm)
library(BiocParallel)
library(tximport)
library(GenomicFeatures)
library(ggplot2)
register(MulticoreParam(snakemake@threads))
samples = read.table(snakemake@config[["popmap_file"]],stringsAsFactors=F)$V1
conditions = read.table(snakemake@config[["popmap_file"]],stringsAsFactors=F)$V2
......@@ -28,11 +27,11 @@ if (snakemake@config[["quantification"]] == "htseq_count")
# create the DESeq data object
dds <- DESeqDataSetFromHTSeqCount(
sampleTable = sampleTable,
directory = "", #paste0(snakemake@config[["results_dir"]],"/",snakemake@config[["<step_name>__deseq2_output_dir"]],"/"),
directory = "",
design = ~ condition
)
} else{
} else{ # counts where generated with one of these software "salmon", "sailfish", "alevin", "kallisto", "rsem", "stringtie"
if (snakemake@params[["tx2gene"]] == TRUE){
# conversion transcrits vers gènes
TxDb <- makeTxDbFromGFF(file = snakemake@params[["annotations"]])
......@@ -46,7 +45,6 @@ if (snakemake@config[["quantification"]] == "htseq_count")
groups = as.factor(conditions)
designMat <- model.matrix(~groups)
colnames(designMat)[2] = "condition"
#dds$condition <- factor(dds$condition, levels = unique(conditions))
sampleTable <- data.frame(condition = factor(conditions))
rownames(sampleTable) <- colnames(txi$counts)
......@@ -83,12 +81,6 @@ png(filename = snakemake@output[["PCA"]], height=800, width=800)
coord_fixed() + ggtitle("PCA on normalized, variance-stabilized transformed counts")
dev.off()
# cpm_counts = cpm(txi$counts)
# pca = prcomp(t(cpm_counts))
# png(filename = snakemake@output[["PCA"]], height=800, width=800)
# ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point()
# dev.off()
options(digit=4)
......@@ -110,14 +102,14 @@ cat("# id: Top_genes
write.table(topGenes,snakemake@output[["Top_genes"]],append=TRUE,sep="\t")
png(filename = snakemake@output[["MA_plot"]], height=800, width=800)
DESeq2::plotMA(res, ylim=c(-5,5))
DESeq2::plotMA(res, ylim=c(-5,5))
dev.off()
png(filename = snakemake@output[["Volcano_plot"]], height=800, width=800)
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-2.5,2)))
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-2.5,2)))
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
dev.off()
......@@ -131,78 +123,59 @@ colnames(df) = "Conditions"
assayNTD = assay(ntd)
png(filename = snakemake@output[["Heatmap"]], height=800, width=800)
pheatmap(assayNTD[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=TRUE, annotation_col=df)
#heatmap(cpmTop,main = "Heatmap",margins = c(10,1))
pheatmap(assayNTD[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=TRUE, annotation_col=df)
dev.off()
speciesData = snakemake@params[["biomaRtDataset"]]
print(speciesData)
if (speciesData != "None") {
# Add annotation with biomaRt
library(biomaRt)
mart <- useMart(biomart="ENSEMBL_MART_ENSEMBL", host="ensembl.org")
tryCatch({
mart <- useMart(biomart="ENSEMBL_MART_ENSEMBL", host="uswest.ensembl.org")
species_mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "ensembl.org", dataset = )
species_mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", host = "uswest.ensembl.org", dataset = speciesData)
#for the moment the top genes change to significant genes ?
#what if we have tarnscript ID instead of gene ID ?
ann <- getBM(filter="ensembl_gene_id",value=rownames(topGenes),attributes=c("ensembl_gene_id","description","transcript_length"),mart=species_mart)
go_ann <- getBM(filter="ensembl_gene_id",value=rownames(topGenes),attributes=c("ensembl_gene_id","description","go_id","name_1006","namespace_1003"),mart=species_mart)
write.table(go_ann,paste(snakemake@output[["Top_genes"]],"_mqc.tsv",sep="\t"))
write.table(go_ann,paste0(snakemake@params[["outDir"]],"/TopGenes_go_ann_mqc.tsv"),sep="\t",row.names =FALSE, quote = FALSE)
},
error = function(err) {
print("Can't access Ensembl Biomart !")
}
)
#test for gene enrichment with gprofiler2
library(gprofiler2)
#for the moment the top genes change to significant genes ?
organisme = gsub("_gene_ensembl", "", speciesData)
gostres <- gost(query = rownames(topGenes),
#limit to GO sources
sources = c("GO:BP", "GO:MF", "GO:CC")
query = rownames(topGenes) #setNames(as.list(rownames(topGenes)), rownames(topGenes))
gostres <- gost(query = query,
organism = organisme, ordered_query = FALSE,
multi_query = FALSE, significant = FALSE, exclude_iea = FALSE,
multi_query = FALSE, significant = FALSE, exclude_iea = TRUE,
measure_underrepresentation = FALSE, evcodes = FALSE,
user_threshold = 0.05, correction_method = "g_SCS",
domain_scope = "annotated", custom_bg = NULL,
numeric_ns = "", sources = NULL, as_short_link = FALSE)
numeric_ns = "", sources = sources, as_short_link = FALSE)
png(filename = paste0(snakemake@config[["results_dir"]],"/",snakemake@config[["<step_name>__deseq2_output_dir"]],"/gprofiler2_mqc.png"), height=800, width=800)
gostplot(gostres, capped = TRUE, interactive = TRUE)
png(filename = paste0(snakemake@params[["outDir"]],"/gprofiler2_plot_mqc.png"), height=800, width=800)
p <- gostplot(gostres, capped = FALSE, interactive = FALSE)
print(p + ggtitle("gprofiler2 enrichment analysis for top genes to find over-representation of terms from Gene Ontology"))
dev.off()
write.table(gostres$result, paste0(snakemake@config[["results_dir"]],"/",snakemake@config[["<step_name>__deseq2_output_dir"]],"/gprofiler2_mqc.tsv"), sep="\t")
#la colonne parents est une liste qu'il faut transformer avant write.table !!
#write.table(apply(gostres$result,2,as.character), paste0(snakemake@params[["outDir"]],"/gprofiler2_mqc.tsv"), sep="\t",row.names =FALSE, quote = FALSE)
#ou
ttable = cbind(1:nrow(gostres$result), gostres$result[,-14])
colnames(ttable)[1]="id"
write.table(ttable, paste0(snakemake@params[["outDir"]],"/gprofiler2_mqc.tsv"), sep="\t",row.names =FALSE, quote = FALSE)
}
#cpmTop = cpm_counts[rownames(resSigdf),]
#resLFC <- lfcShrink(dds, coef="groups", type="apeglm")
#resOrdered <- res[order(res$pvalue),]
#plotMA(res, ylim=c(-2,2))
#plotMA(resLFC, ylim=c(-2,2))
#
# ntd <- normTransform(dds)
#
# library("vsn")
# meanSdPlot(assay(ntd))
#
# library("pheatmap")
# select <- order(rowMeans(counts(dds,normalized=TRUE)), decreasing=TRUE)[1:20]
# df <- as.data.frame(colData(dds)[,c("condition")])
# pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df)
#
#
# sampleDists <- dist(t(assay(vsd)))
#
# library("RColorBrewer")
# sampleDistMatrix <- as.matrix(sampleDists)
# rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
# colnames(sampleDistMatrix) <- NULL
# colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
# pheatmap(sampleDistMatrix,
# clustering_distance_rows=sampleDists,
# clustering_distance_cols=sampleDists,
# col=colors)
#
# plotPCA(vsd, intgroup=c("condition"))
#sink(NULL, type = "message")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment