Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mmassaviol
WAWmassaviol
Commits
859f1e3f
Commit
859f1e3f
authored
Sep 15, 2021
by
khalid
Browse files
Add bcftools stats for a vcfFile raw files
parent
18be31b9
Changes
4
Hide whitespace changes
Inline
Side-by-side
tools/IBS_gds/IBS_gds.script.R
View file @
859f1e3f
...
...
@@ -25,13 +25,13 @@ ibs.hc <- snpgdsHCluster(ibs)
{
pop_map
=
read.table
(
popmap_file
,
header
=
F
,
sep
=
'\t'
)
;
colnames
(
pop_map
)[
c
(
1
,
2
)]
=
c
(
'sample.id'
,
'popcode'
)
;
pops
=
factor
(
pop_map
$
popcode
)[
match
(
ibs
ca
$
sample.id
,
pop_map
$
sample.id
)]
pops
=
factor
(
pop_map
$
popcode
)[
match
(
ibs
$
sample.id
,
pop_map
$
sample.id
)]
# individulas in the same population are clustered together
pop.idx
<-
order
(
pops
)
# Determine groups of individuals by population information
rv
2
<-
snpgdsCutTree
(
ibs.hc
,
samp.group
=
pops
)
rv
<-
snpgdsCutTree
(
ibs.hc
,
samp.group
=
pops
)
}
else
{
pop.idx
=
seq
(
1
,
length
(
ibs
$
sample.id
))
...
...
@@ -57,8 +57,7 @@ png(file=ibs_mdscale_png, width=1024, height=1024);
{
plot
(
x
,
y
,
col
=
pops
,
xlab
=
""
,
ylab
=
""
,
main
=
"Multidimensional Scaling Analysis (on IBS distances)"
)
legend
(
"topleft"
,
legend
=
levels
(
pops
),
pch
=
"o"
,
text.col
=
1
:
nlevels
(
pops
))
}
else
{
}
else
{
plot
(
x
,
y
,
xlab
=
""
,
ylab
=
""
,
main
=
"Multidimensional Scaling Analysis (on IBS distances)"
)
}
dev.off
()
...
...
tools/IBS_gds/IBS_gds.yaml
View file @
859f1e3f
...
...
@@ -19,7 +19,7 @@
{
name
:
popmap_file
,
type
:
"
popmap"
,
file
:
"
"
,
description
:
"
Path
to
tsv
file
with
samples
group"
}
],
outputs
:
[
{
name
:
ibs
,
type
:
"
txt"
,
file
:
"
*_
k0
.txt"
,
description
:
"
a
matrix
of
IBS
proportion"
},
{
name
:
ibs
,
type
:
"
txt"
,
file
:
"
*_
ibs
.txt"
,
description
:
"
a
matrix
of
IBS
proportion"
},
{
name
:
ibs_dendro_png
,
type
:
"
png"
,
file
:
"
ibs_dendro_plot_mqc.png"
,
description
:
"
HClust
and
determine
groups
of
individuals
by
population
information
or
automatically"
},
{
name
:
ibs_heatmap_png
,
type
:
"
png"
,
file
:
"
ibs_heatmap_mqc.png"
,
description
:
"
A
heatmap
of
IBS
pairwise
identities"
},
{
name
:
ibs_mdscale_png
,
type
:
"
png"
,
file
:
"
ibs_mdscale_mqc.png"
,
description
:
"
Multidimensional
scaling
analysis
on
the
matrix
of
genome-wide
IBS
pairwise
distances"
},
...
...
tools/bcftools_vcfFile_stats/bcftools_vcfFile_stats.rule.snakefile
0 → 100755
View file @
859f1e3f
rule <step_name>__bcftools_vcfFile_stats:
input:
**<step_name>__bcftools_vcfFile_stats_inputs(),
output:
stats = config["results_dir"] + "/" + config["<step_name>__bcftools_vcfFile_stats_output_dir"] + "/Allsamples_vcf_stats.txt",
stats_image = config["results_dir"] + "/" + config["<step_name>__bcftools_vcfFile_stats_output_dir"] + "/Allsamples_stats_mqc.png",
params:
output_dir = config["results_dir"] + "/" + config["<step_name>__bcftools_vcfFile_stats_output_dir"]+ "/Allsamples",
command = config["<step_name>__bcftools_vcfFile_stats_command"],
log:
config["results_dir"] + "/logs/" + config["<step_name>__bcftools_vcfFile_stats_output_dir"] + "/Allsamples_bcftools_vcfFile_stats_log.txt"
shell:
"bcftools merge {input.vcf} | bgzip > {params.output_dir}.vcf.gz ; "
"{params.command} "
"-s - "
"{params.output_dir}.vcf.gz "
"> {output.stats} "
"2> {log}; "
"plot-vcfstats "
"-p {params.output_dir} "
"-P " # no pdf
"{output.stats}; "
# Merge images
"convert -size 1200x1000 \( {params.output_dir}/depth.0.png {params.output_dir}/substitutions.0.png {params.output_dir}/hwe.0.png +smush +400 \) "
"\( {params.output_dir}/indels_by_sample.0.png {params.output_dir}/snps_by_sample.0.png +smush +150 \) "
"\( {params.output_dir}/dp_by_sample.0.png {params.output_dir}/singletons_by_sample.0.png +smush +150 \) "
"\( {params.output_dir}/hets_by_sample.0.png {params.output_dir}/tstv_by_sample.0.png +smush +150 \) -append {output.stats_image}"
\ No newline at end of file
tools/bcftools_vcfFile_stats/bcftools_vcfFile_stats.yaml
0 → 100755
View file @
859f1e3f
{
id
:
"
bcftools_vcfFile_stats"
,
name
:
"
BCFtools
VCF
Stats"
,
description
:
"
BCFtools
is
a
set
of
utilities
that
manipulate
variant
calls
in
the
Variant
Call
Format
(VCF)
and
its
binary
counterpart
BCF."
,
version
:
"
1.9"
,
website
:
"
https://samtools.github.io/bcftools/"
,
git
:
"
https://github.com/samtools/bcftools"
,
documentation
:
"
https://samtools.github.io/bcftools/howtos/index.html"
,
article
:
"
10.1093/bioinformatics/btr509"
,
multiqc
:
"
custom"
,
commands
:
[
{
name
:
"
bcftools_vcfFile_stats"
,
cname
:
"
BCFtools
VCFFile
Stats"
,
command
:
"
bcftools
stats"
,
category
:
vcf_postprocess
,
output_dir
:
"
bcftools_vcfFile_stats"
,
inputs
:
[
{
name
:
"
vcf"
,
type
:
"
vcfFile"
,
file
:
"
*_variants.vcf.gz"
,
description
:
"
Variant
file
(vcf)"
,
},
],
outputs
:
[
{
name
:
"
stats"
,
type
:
"
txt"
,
file
:
"
Allsamples_vcf_stats.txt"
,
description
:
"
VCF
file
stats"
,
},
{
name
:
"
stats_image"
,
type
:
"
png"
,
file
:
"
Allsamples_stats_mqc.png"
,
description
:
"
VCF
file
stats
graphs"
,
}
],
options
:
[
],
},
],
install
:
{
bcftools
:
[
"
cd
/opt/biotools"
,
"
wget
https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2"
,
"
tar
-xvjf
bcftools-1.9.tar.bz2"
,
"
cd
bcftools-1.9"
,
"
./configure
--prefix=/opt/biotools"
,
"
make
-j
10"
,
"
make
install"
,
"
mv
bcftools
/opt/biotools/bin/"
,
"
cd
..
&&
rm
-r
bcftools-1.9.tar.bz2
bcftools-1.9"
,
"
sed
-i
's/python/python3/g'
/opt/biotools/bin/plot-vcfstats"
,
"
sed
-i
's|11/2.54|20/2.54|'
/opt/biotools/bin/plot-vcfstats"
,
"
sed
-i
's|10/2.54|14/2.54|'
/opt/biotools/bin/plot-vcfstats"
,
"
sed
-i
's|window_len/2|window_len//2|g'
/opt/biotools/bin/plot-vcfstats"
],
tabix
:
[
"
apt
-y
update
&&
apt
install
-y
tabix"
],
matplotlib
:
[
"
pip3
install
matplotlib"
],
imagemagick
:
[
"
apt
-y
update
&&
apt
install
-y
imagemagick"
],
bcftools_dependencies
:
[
"
apt
-y
update
&&
apt
install
-y
python-backports.functools-lru-cache"
]
},
citations
:
{
bcftools
:
[
"
Heng
Li,
A
statistical
framework
for
SNP
calling,
mutation
discovery,
association
mapping
and
population
genetical
parameter
estimation
from
sequencing
data,
Bioinformatics,
Volume
27,
Issue
21,
1
November
2011,
Pages
2987-2993,
https://doi.org/10.1093/bioinformatics/btr509"
],
tabix
:
[
"
Heng
Li,
Tabix:
fast
retrieval
of
sequence
features
from
generic
TAB-delimited
files,
Bioinformatics,
Volume
27,
Issue
5,
1
March
2011,
Pages
718-719,
https://doi.org/10.1093/bioinformatics/btq671"
],
matplotlib
:
[
"
J.
D.
Hunter,
'Matplotlib:
A
2D
Graphics
Environment,'
in
Computing
in
Science
&
Engineering,
vol.
9,
no.
3,
pp.
90-95,
May-June
2007,
doi:
10.1109/MCSE.2007.55."
],
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment