Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mmassaviol
WAWmassaviol
Commits
4ad900d6
Commit
4ad900d6
authored
Oct 06, 2021
by
khalid
Browse files
EdgeR and deseq2 use an expandable input
parent
56971a55
Changes
5
Hide whitespace changes
Inline
Side-by-side
generate_workflow_snakefile.py
View file @
4ad900d6
...
...
@@ -133,8 +133,20 @@ def generate(name, path_yaml = "", path_input = DEFAULT_PATH_INPUT, path_output
if
inputt
[
"name"
]
==
param
[
"input_name"
]:
INPUT_YAML
=
inputt
# Inputs de type expand
if
(
"expand"
in
INPUT_YAML
and
INPUT_YAML
[
"expand"
]):
if
param
[
"input_name"
]
not
in
inputs_list_from_yaml
:
result
+=
"
\t
inputs[
\"
"
+
param
[
"input_name"
]
+
"
\"
] = list()
\n
"
if
"raw_"
in
param
[
"origin_command"
]:
#if not param["origin_command"] in raw_inputs:
# result += "\tinputs[\"" + param["input_name"] + "\"].append(" + expand_begin + param["origin_command"] + "[\"" + param["origin_name"] + "\"]" + expand_end + ")\n"
# raw_inputs.append(param["origin_command"])
reslt
+=
"n"
else
:
result
+=
"
\t
inputs[
\"
"
+
param
[
"input_name"
]
+
"
\"
] = expand("
+
expand_begin
+
"rules."
+
param
[
"origin_step"
]
+
"__"
+
param
[
"origin_command"
]
+
".output."
+
param
[
"origin_name"
]
+
expand_end
+
", sample=SAMPLES)
\n
"
# Inputs de type liste
if
(
"list"
in
INPUT_YAML
and
INPUT_YAML
[
"list"
]):
el
if
(
"list"
in
INPUT_YAML
and
INPUT_YAML
[
"list"
]):
if
param
[
"input_name"
]
not
in
inputs_list_from_yaml
:
result
+=
"
\t
inputs[
\"
"
+
param
[
"input_name"
]
+
"
\"
] = list()
\n
"
if
"raw_"
in
param
[
"origin_command"
]:
...
...
tools/deseq2/deseq2.script.R
View file @
4ad900d6
...
...
@@ -12,33 +12,50 @@ library(ggplot2)
register
(
MulticoreParam
(
snakemake
@
threads
))
samples
=
read.table
(
snakemake
@
config
[[
"
grou
p_file"
]],
stringsAsFactors
=
F
)
$
V1
conditions
=
read.table
(
snakemake
@
config
[[
"
grou
p_file"
]],
stringsAsFactors
=
F
)
$
V2
samples
=
read.table
(
snakemake
@
config
[[
"
popma
p_file"
]],
stringsAsFactors
=
F
)
$
V1
conditions
=
read.table
(
snakemake
@
config
[[
"
popma
p_file"
]],
stringsAsFactors
=
F
)
$
V2
#samples = c("DRR016125","DRR016126","DRR016127","DRR016128","DRR016129","DRR016130","DRR016131","DRR016132")
#files = file.path("/home/mbb/Documents/results2/kallisto/quant",samples,"abundance.h5")
files
=
sort
(
snakemake
@
input
[[
"counts"
]])
names
(
files
)
=
samples
if
(
snakemake
@
params
[[
"tx2gene"
]]
==
TRUE
){
# conversion transcrits vers gènes
TxDb
<-
makeTxDbFromGFF
(
file
=
snakemake
@
params
[[
"annotations"
]])
k
<-
keys
(
TxDb
,
keytype
=
"TXNAME"
)
tx2gene
<-
select
(
TxDb
,
k
,
"GENEID"
,
"TXNAME"
)
txi
<-
tximport
(
files
,
type
=
snakemake
@
config
[[
"quantification"
]],
tx2gene
=
tx2gene
)
}
else
{
txi
<-
tximport
(
files
,
type
=
snakemake
@
config
[[
"quantification"
]],
txOut
=
TRUE
)
if
(
snakemake
@
config
[[
"quantification"
]]
==
"htseq_count"
)
{
print
(
"Quantification was done with htseq-count !"
)
sampleTable
<-
data.frame
(
sampleName
=
samples
,
fileName
=
files
,
condition
=
conditions
)
# create the DESeq data object
dds
<-
DESeqDataSetFromHTSeqCount
(
sampleTable
=
sampleTable
,
directory
=
""
,
#paste0(snakemake@config[["results_dir"]],"/",snakemake@config[["<step_name>__deseq2_output_dir"]],"/"),
design
=
~
condition
)
}
else
{
if
(
snakemake
@
params
[[
"tx2gene"
]]
==
TRUE
){
# conversion transcrits vers gènes
TxDb
<-
makeTxDbFromGFF
(
file
=
snakemake
@
params
[[
"annotations"
]])
k
<-
keys
(
TxDb
,
keytype
=
"TXNAME"
)
tx2gene
<-
select
(
TxDb
,
k
,
"GENEID"
,
"TXNAME"
)
txi
<-
tximport
(
files
,
type
=
snakemake
@
config
[[
"quantification"
]],
tx2gene
=
tx2gene
)
}
else
{
txi
<-
tximport
(
files
,
type
=
snakemake
@
config
[[
"quantification"
]],
txOut
=
TRUE
)
}
groups
=
as.factor
(
conditions
)
designMat
<-
model.matrix
(
~
groups
)
colnames
(
designMat
)[
2
]
=
"condition"
#dds$condition <- factor(dds$condition, levels = unique(conditions))
sampleTable
<-
data.frame
(
condition
=
factor
(
conditions
))
rownames
(
sampleTable
)
<-
colnames
(
txi
$
counts
)
dds
<-
DESeqDataSetFromTximport
(
txi
,
sampleTable
,
designMat
)
}
groups
=
as.factor
(
conditions
)
designMat
<-
model.matrix
(
~
groups
)
colnames
(
designMat
)[
2
]
=
"condition"
#dds$condition <- factor(dds$condition, levels = unique(conditions))
sampleTable
<-
data.frame
(
condition
=
factor
(
conditions
))
rownames
(
sampleTable
)
<-
colnames
(
txi
$
counts
)
dds
<-
DESeqDataSetFromTximport
(
txi
,
sampleTable
,
designMat
)
keep
<-
rowSums
(
counts
(
dds
))
>=
10
dds
<-
dds
[
keep
,]
...
...
@@ -50,14 +67,28 @@ save.image(snakemake@output[["r_data"]])
write.csv
(
res
,
snakemake
@
output
[[
"de_table"
]])
###### PREPARE REPORT ######
# normalized, variance-stabilized transformed counts for visualization
if
(
sum
(
keep
)
<
1000
)
{
vsd
=
varianceStabilizingTransformation
(
dds
)
}
else
{
vsd
<-
vst
(
dds
,
blind
=
FALSE
)
}
dat
<-
plotPCA
(
vsd
,
intgroup
=
"condition"
,
returnData
=
TRUE
)
cpm_counts
=
cpm
(
txi
$
counts
)
pca
=
prcomp
(
t
(
cpm_counts
))
png
(
filename
=
snakemake
@
output
[[
"PCA"
]],
height
=
800
,
width
=
800
)
ggplot
(
data.frame
(
pca
$
x
),
aes
(
x
=
pca
$
x
[,
1
]
,
y
=
pca
$
x
[,
2
],
col
=
conditions
))
+
geom_point
()
p
<-
ggplot
(
dat
,
aes
(
x
=
PC1
,
y
=
PC2
,
col
=
group
))
p
+
geom_point
()
+
ggtitle
(
"PCA on normalized, variance-stabilized transformed counts"
)
dev.off
()
# cpm_counts = cpm(txi$counts)
# pca = prcomp(t(cpm_counts))
# png(filename = snakemake@output[["PCA"]], height=800, width=800)
# ggplot(data.frame(pca$x),aes(x =pca$x[,1] , y = pca$x[,2],col = conditions)) + geom_point()
# dev.off()
options
(
digit
=
4
)
resSig
<-
subset
(
res
,
padj
<
0.1
)
...
...
@@ -86,7 +117,6 @@ with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue),
with
(
subset
(
res
,
padj
<
.05
&
abs
(
log2FoldChange
)
>
1
),
points
(
log2FoldChange
,
-
log10
(
pvalue
),
pch
=
20
,
col
=
"green"
))
dev.off
()
cpmTop
=
cpm_counts
[
rownames
(
resSigdf
),]
# Heatmap
library
(
pheatmap
)
...
...
@@ -99,6 +129,9 @@ assayNTD = assay(ntd)
png
(
filename
=
snakemake
@
output
[[
"Heatmap"
]],
height
=
800
,
width
=
800
)
pheatmap
(
assayNTD
[
select
,],
cluster_rows
=
FALSE
,
show_rownames
=
FALSE
,
cluster_cols
=
FALSE
,
annotation_col
=
df
)
#cpmTop = cpm_counts[rownames(resSigdf),]
#heatmap(cpmTop,main = "Heatmap",margins = c(10,1))
dev.off
()
...
...
tools/deseq2/deseq2.yaml
View file @
4ad900d6
...
...
@@ -5,7 +5,7 @@
website
:
"
https://bioconductor.org/packages/release/bioc/html/DESeq2.html"
,
git
:
"
git
clone
https://git.bioconductor.org/packages/DESeq2"
,
description
:
"
Differential
gene
expression
analysis
based
on
the
negative
binomial
distribution.
Using
normalized
count
data."
,
version
:
"
3
.8
"
,
version
:
"
3
\
.10
"
,
documentation
:
"
https://bioconductor.org/packages/release/bioc/manuals/DESeq2/man/DESeq2.pdf"
,
multiqc
:
"
custom"
,
commands
:
...
...
@@ -16,8 +16,11 @@
command
:
~
,
category
:
"
differential_expression"
,
output_dir
:
deseq2
,
inputs
:
[{
name
:
counts
,
type
:
"
tsv"
}
{
name
:
group_file
,
type
:
"
popmap"
,
file
:
"
"
,
description
:
"
Path
to
tsv
file
with
samples
conditions"
}],
inputs
:
[
{
name
:
counts
,
type
:
"
tsv"
},
{
name
:
popmap_file
,
type
:
"
popmap"
,
file
:
"
"
,
description
:
"
Path
to
tsv
file
with
samples
conditions"
},
],
outputs
:
[
{
name
:
de_table
,
type
:
"
tsv"
,
file
:
de_table.csv
,
description
:
"
Tableau
d'expression
différentielle"
},
...
...
@@ -70,12 +73,12 @@
install
:
{
latticeExtra
:
[
'
Rscript
-e
'
'
install.packages("latticeExtra",Ncpus=8,
clean=TRUE);library("latticeExtra")'
'
'
],
DESeq2
:
[
'
Rscript
-e
'
'
BiocManager::install("DESeq2",
version
=
"3.
8
",Ncpus=8,
clean=TRUE);library("DESeq2")'
'
'
],
apeglm
:
[
'
Rscript
-e
'
'
BiocManager::install("apeglm",
version
=
"3.
8
",Ncpus=8,
clean=TRUE);library("apeglm")'
'
'
],
BiocParallel
:
[
'
Rscript
-e
'
'
BiocManager::install("BiocParallel",
version
=
"3.
8
",Ncpus=8,
clean=TRUE);library("BiocParallel")'
'
'
],
tximport
:
[
'
Rscript
-e
'
'
BiocManager::install("tximport",
version
=
"3.
8
",Ncpus=8,
clean=TRUE);library("tximport")'
'
'
],
rhdf5
:
[
'
Rscript
-e
'
'
BiocManager::install("rhdf5",
version
=
"3.
8
",Ncpus=8,
clean=TRUE);library("rhdf5")'
'
'
],
GenomicFeatures
:
[
'
Rscript
-e
'
'
BiocManager::install("GenomicFeatures",
version
=
"3.
8
",Ncpus=8,
clean=TRUE);library("GenomicFeatures")'
'
'
],
DESeq2
:
[
'
Rscript
-e
'
'
BiocManager::install("DESeq2",
version
=
"3.
10
",Ncpus=8,
clean=TRUE);library("DESeq2")'
'
'
],
apeglm
:
[
'
Rscript
-e
'
'
BiocManager::install("apeglm",
version
=
"3.
10
",Ncpus=8,
clean=TRUE);library("apeglm")'
'
'
],
BiocParallel
:
[
'
Rscript
-e
'
'
BiocManager::install("BiocParallel",
version
=
"3.
10
",Ncpus=8,
clean=TRUE);library("BiocParallel")'
'
'
],
tximport
:
[
'
Rscript
-e
'
'
BiocManager::install("tximport",
version
=
"3.
10
",Ncpus=8,
clean=TRUE);library("tximport")'
'
'
],
rhdf5
:
[
'
Rscript
-e
'
'
BiocManager::install("rhdf5",
version
=
"3.
10
",Ncpus=8,
clean=TRUE);library("rhdf5")'
'
'
],
GenomicFeatures
:
[
'
Rscript
-e
'
'
BiocManager::install("GenomicFeatures",
version
=
"3.
10
",Ncpus=8,
clean=TRUE);library("GenomicFeatures")'
'
'
],
pheatmap
:
[
'
Rscript
-e
'
'
install.packages("pheatmap",Ncpus=8,
clean=TRUE);library("pheatmap")'
'
'
]
},
script
:
deseq2.script.R
,
...
...
tools/edger/edger.yaml
View file @
4ad900d6
...
...
@@ -15,9 +15,11 @@
command
:
~
,
category
:
"
differential_expression"
,
output_dir
:
edger
,
inputs
:
[{
name
:
counts
,
type
:
"
tsv"
},
{
name
:
group_file
,
type
:
"
popmap"
,
file
:
"
"
,
description
:
"
Path
to
tsv
file
with
samples
conditions"
},
]
inputs
:
[
{
name
:
counts
,
type
:
"
tsv"
},
{
name
:
group_file
,
type
:
"
popmap"
,
file
:
"
"
,
description
:
"
Path
to
tsv
file
with
samples
conditions"
},
],
outputs
:
[
{
name
:
de_table
,
type
:
"
csv"
,
file
:
de_table.csv
,
description
:
"
Tableau
d'expression
différentielle"
},
...
...
tools/hisat2/hisat2.rule.snakefile
View file @
4ad900d6
...
...
@@ -53,7 +53,7 @@ elif config["SeOrPe"] == "SE":
"-x {params.indexPrefix} "
"--rg-id ID:WAW --rg SM:{wildcards.sample} "
#"-S "#output in sam format
"{input.read} 2> {log} "
"
-Ub
{input.read} 2> {log} "
"| samtools view -b 2>> {log} "
"| samtools sort -@ {threads} > {output.bam} 2>> {log} &&"
"samtools index -@ {threads} {output.bam} 2>> {log}"
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment