Commit 859f1e3f authored by khalid's avatar khalid
Browse files

Add bcftools stats for a vcfFile raw files

parent 18be31b9
......@@ -25,13 +25,13 @@ ibs.hc <- snpgdsHCluster(ibs)
{
pop_map= read.table(popmap_file, header=F, sep='\t') ;
colnames(pop_map)[c(1,2)]=c('sample.id', 'popcode') ;
pops = factor(pop_map$popcode)[match(ibsca$sample.id, pop_map$sample.id)]
pops = factor(pop_map$popcode)[match(ibs$sample.id, pop_map$sample.id)]
# individulas in the same population are clustered together
pop.idx <- order(pops)
# Determine groups of individuals by population information
rv2 <- snpgdsCutTree(ibs.hc, samp.group=pops)
rv <- snpgdsCutTree(ibs.hc, samp.group=pops)
} else {
pop.idx = seq(1, length(ibs$sample.id))
......@@ -57,8 +57,7 @@ png(file=ibs_mdscale_png, width=1024, height=1024);
{
plot(x, y, col=pops, xlab = "", ylab = "", main = "Multidimensional Scaling Analysis (on IBS distances)")
legend("topleft", legend=levels(pops), pch="o", text.col=1:nlevels(pops))
}
else{
} else{
plot(x, y, xlab = "", ylab = "", main = "Multidimensional Scaling Analysis (on IBS distances)")
}
dev.off()
......
......@@ -19,7 +19,7 @@
{ name: popmap_file, type: "popmap", file: "", description: "Path to tsv file with samples group"}
],
outputs: [
{ name: ibs, type: "txt", file: "*_k0.txt", description: "a matrix of IBS proportion" },
{ name: ibs, type: "txt", file: "*_ibs.txt", description: "a matrix of IBS proportion" },
{ name: ibs_dendro_png, type: "png", file: "ibs_dendro_plot_mqc.png", description: "HClust and determine groups of individuals by population information or automatically" },
{ name: ibs_heatmap_png, type: "png", file: "ibs_heatmap_mqc.png", description: "A heatmap of IBS pairwise identities" },
{ name: ibs_mdscale_png, type: "png", file: "ibs_mdscale_mqc.png", description: "Multidimensional scaling analysis on the matrix of genome-wide IBS pairwise distances" },
......
rule <step_name>__bcftools_vcfFile_stats:
input:
**<step_name>__bcftools_vcfFile_stats_inputs(),
output:
stats = config["results_dir"] + "/" + config["<step_name>__bcftools_vcfFile_stats_output_dir"] + "/Allsamples_vcf_stats.txt",
stats_image = config["results_dir"] + "/" + config["<step_name>__bcftools_vcfFile_stats_output_dir"] + "/Allsamples_stats_mqc.png",
params:
output_dir = config["results_dir"] + "/" + config["<step_name>__bcftools_vcfFile_stats_output_dir"]+ "/Allsamples",
command = config["<step_name>__bcftools_vcfFile_stats_command"],
log:
config["results_dir"] + "/logs/" + config["<step_name>__bcftools_vcfFile_stats_output_dir"] + "/Allsamples_bcftools_vcfFile_stats_log.txt"
shell:
"bcftools merge {input.vcf} | bgzip > {params.output_dir}.vcf.gz ; "
"{params.command} "
"-s - "
"{params.output_dir}.vcf.gz "
"> {output.stats} "
"2> {log}; "
"plot-vcfstats "
"-p {params.output_dir} "
"-P " # no pdf
"{output.stats}; "
# Merge images
"convert -size 1200x1000 \( {params.output_dir}/depth.0.png {params.output_dir}/substitutions.0.png {params.output_dir}/hwe.0.png +smush +400 \) "
"\( {params.output_dir}/indels_by_sample.0.png {params.output_dir}/snps_by_sample.0.png +smush +150 \) "
"\( {params.output_dir}/dp_by_sample.0.png {params.output_dir}/singletons_by_sample.0.png +smush +150 \) "
"\( {params.output_dir}/hets_by_sample.0.png {params.output_dir}/tstv_by_sample.0.png +smush +150 \) -append {output.stats_image}"
\ No newline at end of file
{
id: "bcftools_vcfFile_stats",
name: "BCFtools VCF Stats",
description: "BCFtools is a set of utilities that manipulate variant calls in the Variant Call Format (VCF) and its binary counterpart BCF.",
version: "1.9",
website: "https://samtools.github.io/bcftools/",
git: "https://github.com/samtools/bcftools",
documentation: "https://samtools.github.io/bcftools/howtos/index.html",
article: "10.1093/bioinformatics/btr509",
multiqc: "custom",
commands:
[
{
name: "bcftools_vcfFile_stats",
cname: "BCFtools VCFFile Stats",
command: "bcftools stats",
category: vcf_postprocess,
output_dir: "bcftools_vcfFile_stats",
inputs:
[
{
name: "vcf",
type: "vcfFile",
file: "*_variants.vcf.gz",
description: "Variant file (vcf)",
},
],
outputs:
[
{
name: "stats",
type: "txt",
file: "Allsamples_vcf_stats.txt",
description: "VCF file stats",
},
{
name: "stats_image",
type: "png",
file: "Allsamples_stats_mqc.png",
description: "VCF file stats graphs",
}
],
options:
[
],
},
],
install:
{
bcftools:
[
"cd /opt/biotools",
"wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2",
"tar -xvjf bcftools-1.9.tar.bz2",
"cd bcftools-1.9",
"./configure --prefix=/opt/biotools",
"make -j 10",
"make install",
"mv bcftools /opt/biotools/bin/",
"cd .. && rm -r bcftools-1.9.tar.bz2 bcftools-1.9",
"sed -i 's/python/python3/g' /opt/biotools/bin/plot-vcfstats",
"sed -i 's|11/2.54|20/2.54|' /opt/biotools/bin/plot-vcfstats",
"sed -i 's|10/2.54|14/2.54|' /opt/biotools/bin/plot-vcfstats",
"sed -i 's|window_len/2|window_len//2|g' /opt/biotools/bin/plot-vcfstats"
],
tabix:
[
"apt -y update && apt install -y tabix"
],
matplotlib:
[
"pip3 install matplotlib"
],
imagemagick:
[
"apt -y update && apt install -y imagemagick"
],
bcftools_dependencies: [
"apt -y update && apt install -y python-backports.functools-lru-cache"
]
},
citations: {
bcftools: [
"Heng Li, A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data, Bioinformatics, Volume 27, Issue 21, 1 November 2011, Pages 2987-2993, https://doi.org/10.1093/bioinformatics/btr509"
],
tabix: [
"Heng Li, Tabix: fast retrieval of sequence features from generic TAB-delimited files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718-719, https://doi.org/10.1093/bioinformatics/btq671"
],
matplotlib: [
"J. D. Hunter, 'Matplotlib: A 2D Graphics Environment,' in Computing in Science & Engineering, vol. 9, no. 3, pp. 90-95, May-June 2007, doi: 10.1109/MCSE.2007.55."
],
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment