Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
khalid
radsex
Commits
7fa90400
Commit
7fa90400
authored
Aug 29, 2017
by
Romain Feron
Browse files
Implemented visualization module structure + heatmap visualization
parent
9e6df59e
Changes
14
Hide whitespace changes
Inline
Side-by-side
radseq_analysis/modules/__init__.py
View file @
7fa90400
...
...
@@ -2,4 +2,5 @@ from .sex_linked_haplotypes import analysis as sex_linked_haplotypes
from
.loci_matrix
import
analysis
as
loci_matrix
from
.stacks_privacy
import
analysis
as
stacks_privacy
from
.rescue
import
analysis
as
rescue
from
.visualize
import
analysis
as
visualization
from
.analysis
import
analysis
radseq_analysis/modules/analysis.py
View file @
7fa90400
...
...
@@ -4,11 +4,13 @@ from radseq_analysis.modules import sex_linked_haplotypes
from
radseq_analysis.modules
import
loci_matrix
from
radseq_analysis.modules
import
stacks_privacy
from
radseq_analysis.modules
import
rescue
from
radseq_analysis.modules
import
visualization
from
radseq_analysis.file_handler
import
load_popmap
from
radseq_analysis.file_handler
import
load_positions_list
def
analysis
(
input_dir
=
None
,
input_file_path
=
None
,
popmap_file_path
=
None
,
output_file_path
=
None
,
positions_file_path
=
None
,
...
...
@@ -30,11 +32,12 @@ def analysis(input_dir=None,
load_positions_list
(
positions_file_path
,
parameters
)
# TODO: support more batch numbers + move this inside analyses?
haplotypes_file_path
=
os
.
path
.
join
(
input_dir
,
'batch_0.haplotypes.tsv'
)
catalog_file_path
=
os
.
path
.
join
(
input_dir
,
'batch_0.catalog.tags.tsv.gz'
)
individual_files_paths
=
[
os
.
path
.
join
(
input_dir
,
f
)
for
f
in
os
.
listdir
(
input_dir
)
if
'tags'
in
f
and
'catalog'
not
in
f
]
if
input_dir
:
haplotypes_file_path
=
os
.
path
.
join
(
input_dir
,
'batch_0.haplotypes.tsv'
)
catalog_file_path
=
os
.
path
.
join
(
input_dir
,
'batch_0.catalog.tags.tsv.gz'
)
individual_files_paths
=
[
os
.
path
.
join
(
input_dir
,
f
)
for
f
in
os
.
listdir
(
input_dir
)
if
'tags'
in
f
and
'catalog'
not
in
f
]
if
analysis
==
'heatmap'
:
loci_matrix
(
haplotypes_file_path
,
parameters
)
...
...
@@ -44,3 +47,5 @@ def analysis(input_dir=None,
stacks_privacy
(
catalog_file_path
,
parameters
)
elif
analysis
==
'rescue'
:
rescue
(
sequences_file_path
,
catalog_file_path
,
individual_files_paths
,
coverage_file_path
,
parameters
)
elif
analysis
==
'visualize'
:
visualization
(
input_file_path
,
output_file_path
,
parameters
)
radseq_analysis/modules/visualize.py
0 → 100644
View file @
7fa90400
from
radseq_analysis
import
visualization
def
detect_plot_type
(
input_file
):
return
'haplotypes'
def
analysis
(
input_file
,
output_file
,
parameters
):
plot_type
=
detect_plot_type
(
input_file
)
if
plot_type
==
'haplotypes'
:
visualization
.
haplotypes
(
input_file
,
output_file
,
parameters
.
species
)
radseq_analysis/parameters/parser.py
View file @
7fa90400
...
...
@@ -15,6 +15,7 @@ Command: heatmap\tGenerates a matrix of haplotypes sex distribution
\t
haplotypes
\t
Extract haplotypes present in a given number of males and females
\t
frequencies
\t
Calculate haplotypes frequencies distribution in the population
\t
rescue
\t
Regroup stacks into alleles after analysis
\t
visualize
\t
Visualize analyses results using R
'''
)
parser
.
add_argument
(
'command'
,
help
=
'Command to run'
,
nargs
=
'?'
)
...
...
@@ -134,6 +135,8 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
Options: -i
\t
--input-folder
\t
Path to a folder containing the output of denovo_map
\t
-s
\t
--sequences
\t
Path to sequences file (result of haplotypes analysis)
\t
-m
\t
--popmap
\t
Path to population map
\t
-c
\t
--coverage-file
\t
Path to a coverage file (result of coverage analysis)
\t
-o
\t
--output-file
\t
Path to output file (default: extracted_alleles.tsv)
'''
)
parser
.
add_argument
(
'--input-folder'
,
'-i'
,
...
...
@@ -169,3 +172,40 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
output_file_path
=
args
.
output_file
,
coverage_file_path
=
args
.
coverage_file
,
analysis
=
'rescue'
)
def
visualize
(
self
):
parser
=
argparse
.
ArgumentParser
(
description
=
'Visualize analyses results using R'
,
usage
=
'''python3 radseq_analysis.py visualize -i input_file -o output_file -m popmap
Options: -i
\t
--input-file
\t
Path to a file generated by this pipeline (haplotype_matrix, frequencies, ...)
\t
-o
\t
--output-file
\t
Path to output file
\t
-m
\t
--popmap
\t
Path to population map
'''
)
parser
.
add_argument
(
'--input-file'
,
'-i'
,
help
=
'Path to a file generated by this pipeline'
)
parser
.
add_argument
(
'--output-file'
,
'-o'
,
help
=
'Path to output file'
)
parser
.
add_argument
(
'--popmap'
,
'-m'
,
help
=
'Path to a popmap file'
)
args
=
parser
.
parse_args
(
sys
.
argv
[
2
:])
if
not
args
.
input_file
or
not
os
.
path
.
isfile
(
args
.
input_file
):
print
(
'
\n
Error: no valid input file specified
\n
'
)
parser
.
print_usage
()
print
()
exit
(
1
)
if
not
args
.
output_file
:
print
(
'
\n
Error: no valid output file specified
\n
'
)
parser
.
print_usage
()
print
()
exit
(
1
)
if
not
args
.
popmap
or
not
os
.
path
.
isfile
(
args
.
popmap
):
print
(
'
\n
Error: no valid popmap file specified
\n
'
)
parser
.
print_usage
()
print
()
exit
(
1
)
analysis
(
input_file_path
=
args
.
input_file
,
popmap_file_path
=
args
.
popmap
,
output_file_path
=
args
.
output_file
,
analysis
=
'visualize'
)
radseq_analysis/shared/parameters.py
View file @
7fa90400
...
...
@@ -13,7 +13,8 @@ class Parameters:
popmap
=
{},
n_males
=
0
,
n_females
=
0
,
species
=
None
species
=
None
,
m_value
=
None
):
self
.
files_dir
=
files_dir
...
...
@@ -27,3 +28,4 @@ class Parameters:
self
.
n_males
=
n_males
self
.
n_females
=
n_females
self
.
species
=
species
self
.
m_value
=
m_value
radseq_analysis/visualisation/__init__.py
deleted
100644 → 0
View file @
9e6df59e
from
.visualisation
import
run
radseq_analysis/visualization/__init__.py
0 → 100644
View file @
7fa90400
from
radseq_analysis.visualization.haplotypes
import
haplotypes
radseq_analysis/visualization/haplotypes.py
0 → 100644
View file @
7fa90400
import
os
def
haplotypes
(
input_file
,
output_file
,
species_name
):
scripts_d
=
''
.
join
(
os
.
path
.
split
(
os
.
path
.
realpath
(
__file__
))[:
-
1
])
cmd
=
(
'Rscript '
+
os
.
path
.
join
(
scripts_d
,
'r_scripts'
,
'heatmap.R'
)
+
' '
+
input_file
+
' '
+
output_file
+
' '
+
species_name
)
os
.
system
(
cmd
)
radseq_analysis/visuali
s
ation/r_plots.py
→
radseq_analysis/visuali
z
ation/r_plots.py
View file @
7fa90400
File moved
radseq_analysis/visuali
s
ation/r_scripts/clustering.R
→
radseq_analysis/visuali
z
ation/r_scripts/clustering.R
View file @
7fa90400
File moved
radseq_analysis/visuali
s
ation/r_scripts/frequencies.R
→
radseq_analysis/visuali
z
ation/r_scripts/frequencies.R
View file @
7fa90400
File moved
radseq_analysis/visuali
s
ation/r_scripts/haplotypes.R
→
radseq_analysis/visuali
z
ation/r_scripts/haplotypes.R
View file @
7fa90400
File moved
radseq_analysis/visuali
s
ation/r_scripts/heatmap.R
→
radseq_analysis/visuali
z
ation/r_scripts/heatmap.R
View file @
7fa90400
library
(
readr
)
library
(
ggplot2
)
library
(
grid
)
library
(
gridExtra
)
#!/usr/bin/env Rscript
args
=
commandArgs
(
trailingOnly
=
TRUE
)
# test if there is at least one argument: if not, return an error
if
(
length
(
args
)
!=
3
){
stop
(
"Usage: R frequencies.R input_file.tsv output_file.png species_name"
)
}
suppressMessages
(
require
(
readr
))
suppressMessages
(
require
(
ggplot2
))
input_file_path
=
args
[
1
]
output_file_path
=
args
[
2
]
species_name
=
args
[
3
]
g_legend
<-
function
(
a.gplot
){
tmp
<-
ggplot_gtable
(
ggplot_build
(
a.gplot
))
...
...
@@ -9,7 +19,7 @@ g_legend<-function(a.gplot){
legend
<-
tmp
$
grobs
[[
leg
]]
return
(
legend
)}
sex_heatmap
=
function
(
file_path
,
m_value
){
sex_heatmap
=
function
(
file_path
,
species_name
=
"none"
){
data
=
suppressMessages
(
read_delim
(
file_path
,
"\t"
,
escape_double
=
FALSE
,
col_names
=
FALSE
,
trim_ws
=
TRUE
))
...
...
@@ -37,9 +47,14 @@ sex_heatmap = function(file_path, m_value){
}
d
$
Bin
=
factor
(
group
)
title
=
""
if
(
species_name
!=
"none"
)
{
title
=
paste
(
title
,
species_name
,
sep
=
""
)
}
g
=
ggplot
(
d
,
aes
(
x
=
Males
,
y
=
Females
))
+
geom_tile
(
aes
(
fill
=
Bin
),
color
=
"grey"
)
+
ggtitle
(
paste
(
"m="
,
m_value
,
sep
=
""
)
)
+
theme
(
plot.title
=
element_text
(
hjust
=
0.5
))
+
ggtitle
(
title
)
+
theme
(
plot.title
=
element_text
(
hjust
=
0.5
))
+
scale_fill_manual
(
name
=
"Number of\nhaplotypes"
,
values
=
palette
,
labels
=
bin_labels
)
+
scale_x_continuous
(
breaks
=
seq
(
0
,
max
(
d
$
Males
),
5
),
minor_breaks
=
seq
(
0
,
max
(
d
$
Males
),
1
))
+
scale_y_continuous
(
breaks
=
seq
(
0
,
max
(
d
$
Females
),
5
),
minor_breaks
=
seq
(
0
,
max
(
d
$
Females
),
1
))
...
...
@@ -47,53 +62,15 @@ sex_heatmap = function(file_path, m_value){
return
(
g
)
}
folder
=
"/home/rferon/work/analyses/radseq_all/other_species/astatotilapia/"
file_2
=
paste
(
folder
,
"m_2.tsv"
,
sep
=
""
)
if
(
file.exists
(
file_2
)){
plot_2
=
sex_heatmap
(
file_2
,
2
)
}
else
{
print
(
file_2
)
}
heatmap
=
sex_heatmap
(
input_file_path
,
species_name
)
file_3
=
paste
(
folder
,
"m_3.tsv"
,
sep
=
""
)
if
(
file.exists
(
file_3
)){
plot_3
=
sex_heatmap
(
file_3
,
3
)
}
else
{
print
(
file_3
)
}
file_5
=
paste
(
folder
,
"m_5.tsv"
,
sep
=
""
)
if
(
file.exists
(
file_5
))
{
plot_5
=
sex_heatmap
(
file_5
,
5
)
}
else
{
print
(
file_5
)
}
file_10
=
paste
(
folder
,
"m_10.tsv"
,
sep
=
""
)
if
(
file.exists
(
file_10
)){
plot_10
=
sex_heatmap
(
file_10
,
10
)
}
else
{
print
(
file_10
)
}
n_males
=
max
(
ggplot_build
(
plot_5
)
$
data
[[
1
]]
$
x
)
n_females
=
max
(
ggplot_build
(
plot_5
)
$
data
[[
1
]]
$
y
)
n_males
=
max
(
ggplot_build
(
heatmap
)
$
data
[[
1
]]
$
x
)
n_females
=
max
(
ggplot_build
(
heatmap
)
$
data
[[
1
]]
$
y
)
ratio
=
n_females
/
n_males
n_plots
=
0
if
(
file.exists
(
file_2
))
{
n_plots
=
n_plots
+
1
}
if
(
file.exists
(
file_3
))
{
n_plots
=
n_plots
+
1
}
if
(
file.exists
(
file_5
))
{
n_plots
=
n_plots
+
1
}
if
(
file.exists
(
file_10
))
{
n_plots
=
n_plots
+
1
}
width_c
=
1600
png
(
paste
(
folder
,
"heatmap.png"
,
sep
=
""
),
width
=
width_c
,
height
=
width_c
*
ratio
,
res
=
100
)
species_name
=
"Astatotilapia"
grid.arrange
(
plot_2
+
theme
(
legend.position
=
"none"
),
plot_3
+
theme
(
legend.position
=
"none"
),
plot_5
+
theme
(
legend.position
=
"none"
),
plot_10
+
theme
(
legend.position
=
"none"
),
ncol
=
2
,
nrow
=
2
,
top
=
species_name
,
right
=
g_legend
(
plot_2
),
widths
=
c
(
n_males
,
n_males
),
heights
=
c
(
n_females
,
n_females
))
width_c
=
1200
dev.off
()
png
(
output_file_path
,
width
=
width_c
,
height
=
width_c
*
ratio
,
res
=
130
)
print
(
heatmap
)
x
=
dev.off
()
radseq_analysis/visuali
s
ation/visualisation.py
→
radseq_analysis/visuali
z
ation/visualisation.py
View file @
7fa90400
File moved
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment