Commit 18be31b9 authored by khalid's avatar khalid
Browse files

Add IBS analysis

parent 0848081d
<step_name>__IBS_gds_gds_name = os.path.basename(<step_name>__IBS_gds_inputs()["gds"]).split(".")[0]
rule <step_name>__IBS_gds:
input:
**<step_name>__IBS_gds_inputs(),
output:
ibs = config["results_dir"] + "/" + config["<step_name>__IBS_gds_output_dir"] + "/" + <step_name>__IBS_gds_gds_name + "_ibs.txt",
ibs_dendro_png = config["results_dir"] + "/" + config["<step_name>__IBS_gds_output_dir"] + "/ibs_dendro_plot_mqc.png",
ibs_heatmap_png = config["results_dir"] + "/" + config["<step_name>__IBS_gds_output_dir"] + "/ibs_heatmap_mqc.png",
ibs_mdscale_png = config["results_dir"] + "/" + config["<step_name>__IBS_gds_output_dir"] + "/ibs_mdscale_mqc.png",
params:
output_dir = config["results_dir"] + "/" + config["<step_name>__IBS_gds_output_dir"]+ "/",
command = config["<step_name>__IBS_gds_command"],
threads:
config["<step_name>__IBS_gds_threads"]
log:
config["results_dir"] + "/logs/" + config["<step_name>__IBS_gds_output_dir"] + "/IBS_gds_log.txt"
script:
config["<step_name>__IBS_gds_script"]
library(SNPRelate);
library(SeqArray);
#popmap_file = snakemake@input[["popmap_file"]]
gdsFile = snakemake@input[["gds"]]
popmap_file = snakemake@input[["popmap_file"]]
ibs_dendro_png = snakemake@output[["ibs_dendro_png"]]
ibs_heatmap_png = snakemake@output[["ibs_heatmap_png"]]
ibs_mdscale_png = snakemake@output[["ibs_mdscale_png"]]
ibsFile = snakemake@output[["ibs"]]
threads = snakemake@threads
gds = seqOpen(gdsFile);
set.seed(1000)
ibs <- snpgdsIBS(gds, autosome.only = FALSE, num.thread=threads)
ibs.hc <- snpgdsHCluster(ibs)
if (popmap_file != '')
{
pop_map= read.table(popmap_file, header=F, sep='\t') ;
colnames(pop_map)[c(1,2)]=c('sample.id', 'popcode') ;
pops = factor(pop_map$popcode)[match(ibsca$sample.id, pop_map$sample.id)]
# individulas in the same population are clustered together
pop.idx <- order(pops)
# Determine groups of individuals by population information
rv2 <- snpgdsCutTree(ibs.hc, samp.group=pops)
} else {
pop.idx = seq(1, length(ibs$sample.id))
# Determine groups of individuals automatically
rv <- snpgdsCutTree(ibs.hc)
}
png(file=ibs_heatmap_png, width=1024, height=1024);
image(ibs$ibs[pop.idx, pop.idx], col=terrain.colors(16))
dev.off()
png(file=ibs_dendro_png, width=1024, height=1024);
plot(rv$dendrogram, leaflab="none", main="cluster analysis on IBS pairwise distances")
if(popmap_file != '') legend("topright", legend=levels(pops), col=1:nlevels(pops), pch=19, ncol=4)
dev.off()
png(file=ibs_mdscale_png, width=1024, height=1024);
loc <- cmdscale(1 - ibs$ibs, k = 2)#k must be a param
x <- loc[, 1]; y <- loc[, 2]
if(popmap_file != '')
{
plot(x, y, col=pops, xlab = "", ylab = "", main = "Multidimensional Scaling Analysis (on IBS distances)")
legend("topleft", legend=levels(pops), pch="o", text.col=1:nlevels(pops))
}
else{
plot(x, y, xlab = "", ylab = "", main = "Multidimensional Scaling Analysis (on IBS distances)")
}
dev.off()
rownames(ibs$ibs) = ibs$sample.id ; colnames(ibs$ibs) = ibs$sample.id
write.table(ibs$ibs, file = ibsFile, append = FALSE, quote = F, sep = '\t', row.names = T, col.names = NA);
seqClose(gds);
{
id: IBS_gds,
name: identity-by-state IBS,
article: "",
website: "",
git: "",
description: "identity-by-state (IBS) estimation with SNPRelate of the fraction of identity by state for each pair of samples",
version: "",
documentation: "",
multiqc: "custom",
commands:
[
{
name: IBS_gds,
command: "",
category: "genet_pop",
output_dir: IBS_gds,
inputs: [{ name: gds, type: "gds", file: "",description: "gds file"} ,
{ name: popmap_file, type: "popmap", file: "", description: "Path to tsv file with samples group"}
],
outputs: [
{ name: ibs, type: "txt", file: "*_k0.txt", description: "a matrix of IBS proportion" },
{ name: ibs_dendro_png, type: "png", file: "ibs_dendro_plot_mqc.png", description: "HClust and determine groups of individuals by population information or automatically" },
{ name: ibs_heatmap_png, type: "png", file: "ibs_heatmap_mqc.png", description: "A heatmap of IBS pairwise identities" },
{ name: ibs_mdscale_png, type: "png", file: "ibs_mdscale_mqc.png", description: "Multidimensional scaling analysis on the matrix of genome-wide IBS pairwise distances" },
],
options: [
{
name: "IBS_gds_threads",
prefix: "--threads",
value: 16,
min: 1,
max: 64,
step: 1,
label: "Threads to use",
type: "numeric",
},
],
},
],
script: IBS_gds.script.R,
install:
{
SNPRelate: [
'Rscript -e ''if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager");
BiocManager::install("SNPRelate", update = TRUE, ask = FALSE)'' '
],
SeqArray: [
'Rscript -e ''if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager");
BiocManager::install("SeqArray", update = TRUE, ask = FALSE)'' '
],
},
citations:
{
SNPRelate: [
"Zheng X, Levine D, Shen J, Gogarten S, Laurie C, Weir B (2012). A High-performance Computing Toolset for Relatedness and Principal Component Analysis of SNP Data. Bioinformatics, 28(24), 3326-3328. doi: 10.1093/bioinformatics/bts606. "
],
SeqArray: [
"Zheng X, Gogarten S, Lawrence M, Stilp A, Conomos M, Weir B, Laurie C, Levine D (2017). SeqArray - A storage-efficient high-performance data format for WGS variant calls. Bioinformatics. doi: 10.1093/bioinformatics/btx145."
]
},
}
......@@ -29,11 +29,11 @@ switch(methode,
#sinon il faut transformer le seqarray en format SNPrelate
tmp = tempfile(pattern = "tmp", fileext = "snpRelate")
seqGDS2SNP(genofile, tmp, dosage=FALSE, compress.geno="LZMA_RA", compress.annotation="LZMA_RA", optimize=TRUE, verbose=TRUE)
seqGDS2SNP(gds, tmp, dosage=FALSE, compress.geno="LZMA_RA", compress.annotation="LZMA_RA", optimize=TRUE, verbose=TRUE)
#ouvrir le fichier
genofile <- snpgdsOpen(tmp)
pibd <- snpgdsIBDMLE(gds, method = "EM",snp.id=variant.id, autosome.only=FALSE,num.thread=threads)
pibd <- snpgdsIBDMLE(genofile, method = "EM",snp.id=variant.id, autosome.only=FALSE,num.thread=threads)
snpgdsClose(genofile)
unlink(tmp)
},
......@@ -46,11 +46,11 @@ switch(methode,
#sinon il faut transformer le seqarray en format SNPrelate
tmp = tempfile(pattern = "tmp", fileext = "snpRelate")
seqGDS2SNP(genofile, tmp, dosage=FALSE, compress.geno="LZMA_RA", compress.annotation="LZMA_RA", optimize=TRUE, verbose=TRUE)
seqGDS2SNP(gds, tmp, dosage=FALSE, compress.geno="LZMA_RA", compress.annotation="LZMA_RA", optimize=TRUE, verbose=TRUE)
#ouvrir le fichier
genofile <- snpgdsOpen(tmp)
pibd <- snpgdsIBDMLE(genofile, method = "downhill.simplex",snp.id=variant.id, autosome.only=FALSE,num.thread=threads )
snpgdsClose(genofile)
unlink(tmp)
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment