Commit e8cceda8 authored by khalid's avatar khalid
Browse files

add Relatedness calc

parent 61cc3b6b
......@@ -75,7 +75,6 @@ if (popmap_file != '')
}
if (nbpops < 2) { #or no popmap provided
prin("here")
# need popmap with at least 2 pops
write.table(0, file = meanfstFile, append = FALSE, quote = F, sep = '\t', row.names = T, col.names = NA);
write.table(0, file = fstFile, append = FALSE, quote = F, sep = '\t', row.names = T, col.names = NA);
......
<step_name>__Relatedness_gds_gds_name = os.path.basename(<step_name>__Relatedness_gds_inputs()["gds"]).split(".")[0]
rule <step_name>__Relatedness_gds:
input:
**<step_name>__Relatedness_gds_inputs(),
output:
k0 = config["results_dir"] + "/" + config["<step_name>__Relatedness_gds_output_dir"] + "/" + <step_name>__Relatedness_gds_gds_name + "_k0.txt",
k1 = config["results_dir"] + "/" + config["<step_name>__Relatedness_gds_output_dir"] + "/" + <step_name>__Relatedness_gds_gds_name + "_k1.txt",
k0vsk1_png = config["results_dir"] + "/" + config["<step_name>__Relatedness_gds_output_dir"] + "/K0vsK1_plot_mqc.png",
k1_heatmap_png = config["results_dir"] + "/" + config["<step_name>__Relatedness_gds_output_dir"] + "/K1_heatmap_mqc.png",
params:
method = config["<step_name>__Relatedness_gds_method"],
output_dir = config["results_dir"] + "/" + config["<step_name>__Relatedness_gds_output_dir"]+ "/",
command = config["<step_name>__Relatedness_gds_command"],
threads:
config["<step_name>__Relatedness_gds_threads"]
log:
config["results_dir"] + "/logs/" + config["<step_name>__Relatedness_gds_output_dir"] + "/Relatedness_gds_log.txt"
script:
config["<step_name>__Relatedness_gds_script"]
library(SNPRelate);
library(SeqArray);
#popmap_file = snakemake@input[["popmap_file"]]
gdsFile = snakemake@input[["gds"]]
k0vsk1_png = snakemake@output[["k0vsk1_png"]]
K1_heatmap_png = snakemake@output[["K1_heatmap_png"]]
k0File = snakemake@output[["k0"]]
k1File = snakemake@output[["k1"]]
methode = snakemake@params[["method"]]
threads = snakemake@threads
gds = seqOpen(gdsFile);
switch(methode,
MoM={
# PLINK method of moment
pibd <- snpgdsIBDMoM(gds,autosome.only=FALSE,num.thread=threads )
},
EM={
#by Maximum Likelihood Estimation. EM algorithm of searching maximum value of log likelihood function
pibd <- snpgdsIBDMLE(gds, method = "EM", autosome.only=FALSE,num.thread=threads)
},
downhill.simplex = {
#by Maximum Likelihood Estimation. downhill.simplex algorithm of searching maximum value of log likelihood function
pibd <- snpgdsIBDMLE(gds, method = "downhill.simplex", autosome.only=FALSE,num.thread=threads )
}
)
flag <- lower.tri(pibd$k0)
png(file=k0vsk1_png, width=1024, height=1024);
plot(NaN, xlim=c(0,1), ylim=c(0,1), xlab="k0", ylab="k1")
lines(c(0,1), c(1,0), col="red", lty=3)
points(pibd$k0[flag], pibd$k1[flag])
dev.off()
rownames(pibd$k0) = pibd$sample.id ; colnames(pibd$k0) = pibd$sample.id
rownames(pibd$k1) = pibd$sample.id ; colnames(pibd$k0) = pibd$sample.id
png(file=K1_heatmap_png, width=1024, height=1024);
heatmap(pibd$k1) ;
dev.off()
write.table(pibd$k0, file = k0File, append = FALSE, quote = F, sep = '\t', row.names = T, col.names = NA);
write.table(pibd$k1, file = k1File, append = FALSE, quote = F, sep = '\t', row.names = T, col.names = NA);
seqClose(gds);
{
id: Relatedness_gds,
name: identity-by-descent IBD,
article: "",
website: "",
git: "",
description: "identity-by-descent (IBD) estimation in SNPRelate either with the method of moments (MoM) (Purcell et al., 2007) or maximum likelihood estimation (MLE) (Milligan, 2003; Choi et al., 2009)",
version: "",
documentation: "",
multiqc: "custom",
commands:
[
{
name: Relatedness_gds,
command: "",
category: "genet_pop",
output_dir: Relatedness_gds,
inputs: [{ name: gds, type: "gds", file: "",description: "gds file"} ,
],
outputs: [
{ name: k0, type: "txt", file: "*_k0.txt", description: "IBD coefficient, the probability of sharing ZERO IBD" },
{ name: k1, type: "txt", file: "*_k1.txt", description: "IBD coefficient, the probability of sharing ONE IBD" },
{ name: k0vsk1_png, type: "png", file: "K0vsK1_plot_mqc.png", description: "K0 vs K1 plot" },
{ name: K1_heatmap_png, type: "png", file: "K1_heatmap_mqc.png", description: "A heatmap of K1 values" },
],
options: [
{
name: "Relatedness_gds_method",
type: "radio",
value: "MoM",
choices: [Plink Method of Moment: MoM, ML using EM algo: EM, ML using downhill.simplex algo: downhill.simplex],
label: "Method to calculate IBD coef.",
},
{
name: "Relatedness_gds_threads",
prefix: "--threads",
value: 16,
min: 1,
max: 64,
step: 1,
label: "Threads to use",
type: "numeric",
},
],
},
],
script: Relatedness_gds.script.R,
install:
{
SNPRelate: [
'Rscript -e ''if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager");
BiocManager::install("SNPRelate", update = TRUE, ask = FALSE)'' '
],
SeqArray: [
'Rscript -e ''if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager");
BiocManager::install("SeqArray", update = TRUE, ask = FALSE)'' '
],
},
citations:
{
SNPRelate: [
"Zheng X, Levine D, Shen J, Gogarten S, Laurie C, Weir B (2012). A High-performance Computing Toolset for Relatedness and Principal Component Analysis of SNP Data. Bioinformatics, 28(24), 3326-3328. doi: 10.1093/bioinformatics/bts606. "
],
SeqArray: [
"Zheng X, Gogarten S, Lawrence M, Stilp A, Conomos M, Weir B, Laurie C, Levine D (2017). SeqArray - A storage-efficient high-performance data format for WGS variant calls. Bioinformatics. doi: 10.1093/bioinformatics/btx145."
],
PLINK: [
"Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MAR, Bender D, Maller J, Sklar P, de Bakker PIW, Daly MJ & Sham PC. 2007. PLINK: a toolset for whole-genome association and population-based linkage analysis. American Journal of Human Genetics, 81."
]
},
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment