Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mmassaviol
WAWmassaviol
Commits
35cebac9
Commit
35cebac9
authored
Oct 08, 2021
by
khalid
Browse files
add a raw tsv input
parent
cf84f6d1
Changes
3
Hide whitespace changes
Inline
Side-by-side
generate_workflow_snakefile.py
View file @
35cebac9
...
...
@@ -141,7 +141,7 @@ def generate(name, path_yaml = "", path_input = DEFAULT_PATH_INPUT, path_output
#if not param["origin_command"] in raw_inputs:
# result += "\tinputs[\"" + param["input_name"] + "\"].append(" + expand_begin + param["origin_command"] + "[\"" + param["origin_name"] + "\"]" + expand_end + ")\n"
# raw_inputs.append(param["origin_command"])
reslt
+=
"
\n
"
res
u
lt
+=
"
\t
inputs[
\"
"
+
param
[
"input_name"
]
+
"
\"
] = "
+
expand_begin
+
param
[
"origin_command"
]
+
"[
\"
"
+
param
[
"origin_name"
]
+
"
\"
] "
+
expand_end
+
"
\n
"
else
:
result
+=
"
\t
inputs[
\"
"
+
param
[
"input_name"
]
+
"
\"
] = expand("
+
expand_begin
+
"rules."
+
param
[
"origin_step"
]
+
"__"
+
param
[
"origin_command"
]
+
".output."
+
param
[
"origin_name"
]
+
expand_end
+
", sample=SAMPLES)
\n
"
# Inputs de type liste
...
...
tools/deseq2/deseq2.rule.snakefile
View file @
35cebac9
...
...
@@ -14,6 +14,7 @@ rule <step_name>__deseq2:
script = config["<step_name>__deseq2_command"],
tx2gene = config["<step_name>__deseq2_tx2gene"],
annotations = config["<step_name>__deseq2_annotations"],
outDir = config["results_dir"]+'/'+config["<step_name>__deseq2_output_dir"],
biomaRtDataset = config["<step_name>__deseq2_biomaRtDataset"],
script:
config["<step_name>__deseq2_script"]
\ No newline at end of file
tools/deseq2/deseq2.script.R
View file @
35cebac9
...
...
@@ -3,12 +3,11 @@
#sink(log, type="message")
library
(
DESeq2
)
library
(
apeglm
)
library
(
BiocParallel
)
library
(
tximport
)
library
(
GenomicFeatures
)
library
(
ggplot2
)
register
(
MulticoreParam
(
snakemake
@
threads
))
samples
=
read.table
(
snakemake
@
config
[[
"popmap_file"
]],
stringsAsFactors
=
F
)
$
V1
conditions
=
read.table
(
snakemake
@
config
[[
"popmap_file"
]],
stringsAsFactors
=
F
)
$
V2
...
...
@@ -28,11 +27,11 @@ if (snakemake@config[["quantification"]] == "htseq_count")
# create the DESeq data object
dds
<-
DESeqDataSetFromHTSeqCount
(
sampleTable
=
sampleTable
,
directory
=
""
,
#paste0(snakemake@config[["results_dir"]],"/",snakemake@config[["<step_name>__deseq2_output_dir"]],"/"),
directory
=
""
,
design
=
~
condition
)
}
else
{
}
else
{
# counts where generated with one of these software "salmon", "sailfish", "alevin", "kallisto", "rsem", "stringtie"
if
(
snakemake
@
params
[[
"tx2gene"
]]
==
TRUE
){
# conversion transcrits vers gènes
TxDb
<-
makeTxDbFromGFF
(
file
=
snakemake
@
params
[[
"annotations"
]])
...
...
@@ -46,7 +45,6 @@ if (snakemake@config[["quantification"]] == "htseq_count")
groups
=
as.factor
(
conditions
)
designMat
<-
model.matrix
(
~
groups
)
colnames
(
designMat
)[
2
]
=
"condition"
#dds$condition <- factor(dds$condition, levels = unique(conditions))
sampleTable
<-
data.frame
(
condition
=
factor
(
conditions
))
rownames
(
sampleTable
)
<-
colnames
(
txi
$
counts
)
...
...
@@ -83,12 +81,6 @@ png(filename = snakemake@output[["PCA"]], height=800, width=800)
coord_fixed
()
+
ggtitle
(
"PCA on normalized, variance-stabilized transformed counts"
)
dev.off
()
# cpm_counts = cpm(txi$counts)
# pca = prcomp(t(cpm_counts))
# png(filename = snakemake@output[["PCA"]], height=800, width=800)
# ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point()
# dev.off()
options
(
digit
=
4
)
...
...
@@ -110,14 +102,14 @@ cat("# id: Top_genes
write.table
(
topGenes
,
snakemake
@
output
[[
"Top_genes"
]],
append
=
TRUE
,
sep
=
"\t"
)
png
(
filename
=
snakemake
@
output
[[
"MA_plot"
]],
height
=
800
,
width
=
800
)
DESeq2
::
plotMA
(
res
,
ylim
=
c
(
-5
,
5
))
DESeq2
::
plotMA
(
res
,
ylim
=
c
(
-5
,
5
))
dev.off
()
png
(
filename
=
snakemake
@
output
[[
"Volcano_plot"
]],
height
=
800
,
width
=
800
)
with
(
res
,
plot
(
log2FoldChange
,
-
log10
(
pvalue
),
pch
=
20
,
main
=
"Volcano plot"
,
xlim
=
c
(
-2.5
,
2
)))
with
(
subset
(
res
,
padj
<
.05
),
points
(
log2FoldChange
,
-
log10
(
pvalue
),
pch
=
20
,
col
=
"red"
))
with
(
subset
(
res
,
abs
(
log2FoldChange
)
>
1
),
points
(
log2FoldChange
,
-
log10
(
pvalue
),
pch
=
20
,
col
=
"orange"
))
with
(
subset
(
res
,
padj
<
.05
&
abs
(
log2FoldChange
)
>
1
),
points
(
log2FoldChange
,
-
log10
(
pvalue
),
pch
=
20
,
col
=
"green"
))
with
(
res
,
plot
(
log2FoldChange
,
-
log10
(
pvalue
),
pch
=
20
,
main
=
"Volcano plot"
,
xlim
=
c
(
-2.5
,
2
)))
with
(
subset
(
res
,
padj
<
.05
),
points
(
log2FoldChange
,
-
log10
(
pvalue
),
pch
=
20
,
col
=
"red"
))
with
(
subset
(
res
,
abs
(
log2FoldChange
)
>
1
),
points
(
log2FoldChange
,
-
log10
(
pvalue
),
pch
=
20
,
col
=
"orange"
))
with
(
subset
(
res
,
padj
<
.05
&
abs
(
log2FoldChange
)
>
1
),
points
(
log2FoldChange
,
-
log10
(
pvalue
),
pch
=
20
,
col
=
"green"
))
dev.off
()
...
...
@@ -131,78 +123,59 @@ colnames(df) = "Conditions"
assayNTD
=
assay
(
ntd
)
png
(
filename
=
snakemake
@
output
[[
"Heatmap"
]],
height
=
800
,
width
=
800
)
pheatmap
(
assayNTD
[
select
,],
cluster_rows
=
FALSE
,
show_rownames
=
FALSE
,
cluster_cols
=
TRUE
,
annotation_col
=
df
)
#heatmap(cpmTop,main = "Heatmap",margins = c(10,1))
pheatmap
(
assayNTD
[
select
,],
cluster_rows
=
FALSE
,
show_rownames
=
FALSE
,
cluster_cols
=
TRUE
,
annotation_col
=
df
)
dev.off
()
speciesData
=
snakemake
@
params
[[
"biomaRtDataset"
]]
print
(
speciesData
)
if
(
speciesData
!=
"None"
)
{
# Add annotation with biomaRt
library
(
biomaRt
)
mart
<-
useMart
(
biomart
=
"ENSEMBL_MART_ENSEMBL"
,
host
=
"ensembl.org"
)
tryCatch
({
mart
<-
useMart
(
biomart
=
"ENSEMBL_MART_ENSEMBL"
,
host
=
"uswest.ensembl.org"
)
species_mart
<-
useMart
(
biomart
=
"ENSEMBL_MART_ENSEMBL"
,
host
=
"ensembl.org"
,
dataset
=
)
species_mart
<-
useMart
(
biomart
=
"ENSEMBL_MART_ENSEMBL"
,
host
=
"
uswest.
ensembl.org"
,
dataset
=
speciesData
)
#for the moment the top genes change to significant genes ?
#what if we have tarnscript ID instead of gene ID ?
ann
<-
getBM
(
filter
=
"ensembl_gene_id"
,
value
=
rownames
(
topGenes
),
attributes
=
c
(
"ensembl_gene_id"
,
"description"
,
"transcript_length"
),
mart
=
species_mart
)
go_ann
<-
getBM
(
filter
=
"ensembl_gene_id"
,
value
=
rownames
(
topGenes
),
attributes
=
c
(
"ensembl_gene_id"
,
"description"
,
"go_id"
,
"name_1006"
,
"namespace_1003"
),
mart
=
species_mart
)
write.table
(
go_ann
,
paste
(
snakemake
@
output
[[
"Top_genes"
]],
"_mqc.tsv"
,
sep
=
"\t"
))
write.table
(
go_ann
,
paste0
(
snakemake
@
params
[[
"outDir"
]],
"/TopGenes_go_ann_mqc.tsv"
),
sep
=
"\t"
,
row.names
=
FALSE
,
quote
=
FALSE
)
},
error
=
function
(
err
)
{
print
(
"Can't access Ensembl Biomart !"
)
}
)
#test for gene enrichment with gprofiler2
library
(
gprofiler2
)
#for the moment the top genes change to significant genes ?
organisme
=
gsub
(
"_gene_ensembl"
,
""
,
speciesData
)
gostres
<-
gost
(
query
=
rownames
(
topGenes
),
#limit to GO sources
sources
=
c
(
"GO:BP"
,
"GO:MF"
,
"GO:CC"
)
query
=
rownames
(
topGenes
)
#setNames(as.list(rownames(topGenes)), rownames(topGenes))
gostres
<-
gost
(
query
=
query
,
organism
=
organisme
,
ordered_query
=
FALSE
,
multi_query
=
FALSE
,
significant
=
FALSE
,
exclude_iea
=
FALS
E
,
multi_query
=
FALSE
,
significant
=
FALSE
,
exclude_iea
=
TRU
E
,
measure_underrepresentation
=
FALSE
,
evcodes
=
FALSE
,
user_threshold
=
0.05
,
correction_method
=
"g_SCS"
,
domain_scope
=
"annotated"
,
custom_bg
=
NULL
,
numeric_ns
=
""
,
sources
=
NULL
,
as_short_link
=
FALSE
)
numeric_ns
=
""
,
sources
=
sources
,
as_short_link
=
FALSE
)
png
(
filename
=
paste0
(
snakemake
@
config
[[
"results_dir"
]],
"/"
,
snakemake
@
config
[[
"<step_name>__deseq2_output_dir"
]],
"/gprofiler2_mqc.png"
),
height
=
800
,
width
=
800
)
gostplot
(
gostres
,
capped
=
TRUE
,
interactive
=
TRUE
)
png
(
filename
=
paste0
(
snakemake
@
params
[[
"outDir"
]],
"/gprofiler2_plot_mqc.png"
),
height
=
800
,
width
=
800
)
p
<-
gostplot
(
gostres
,
capped
=
FALSE
,
interactive
=
FALSE
)
print
(
p
+
ggtitle
(
"gprofiler2 enrichment analysis for top genes to find over-representation of terms from Gene Ontology"
))
dev.off
()
write.table
(
gostres
$
result
,
paste0
(
snakemake
@
config
[[
"results_dir"
]],
"/"
,
snakemake
@
config
[[
"<step_name>__deseq2_output_dir"
]],
"/gprofiler2_mqc.tsv"
),
sep
=
"\t"
)
#la colonne parents est une liste qu'il faut transformer avant write.table !!
#write.table(apply(gostres$result,2,as.character), paste0(snakemake@params[["outDir"]],"/gprofiler2_mqc.tsv"), sep="\t",row.names =FALSE, quote = FALSE)
#ou
ttable
=
cbind
(
1
:
nrow
(
gostres
$
result
),
gostres
$
result
[,
-14
])
colnames
(
ttable
)[
1
]
=
"id"
write.table
(
ttable
,
paste0
(
snakemake
@
params
[[
"outDir"
]],
"/gprofiler2_mqc.tsv"
),
sep
=
"\t"
,
row.names
=
FALSE
,
quote
=
FALSE
)
}
#cpmTop = cpm_counts[rownames(resSigdf),]
#resLFC <- lfcShrink(dds, coef="groups", type="apeglm")
#resOrdered <- res[order(res$pvalue),]
#plotMA(res, ylim=c(-2,2))
#plotMA(resLFC, ylim=c(-2,2))
#
# ntd <- normTransform(dds)
#
# library("vsn")
# meanSdPlot(assay(ntd))
#
# library("pheatmap")
# select <- order(rowMeans(counts(dds,normalized=TRUE)), decreasing=TRUE)[1:20]
# df <- as.data.frame(colData(dds)[,c("condition")])
# pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=FALSE, annotation_col=df)
#
#
# sampleDists <- dist(t(assay(vsd)))
#
# library("RColorBrewer")
# sampleDistMatrix <- as.matrix(sampleDists)
# rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
# colnames(sampleDistMatrix) <- NULL
# colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
# pheatmap(sampleDistMatrix,
# clustering_distance_rows=sampleDists,
# clustering_distance_cols=sampleDists,
# col=colors)
#
# plotPCA(vsd, intgroup=c("condition"))
#sink(NULL, type = "message")
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment