Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
khalid
radsex
Commits
dfb00de3
Commit
dfb00de3
authored
Oct 26, 2017
by
Romain Feron
Browse files
Implemented individual coverage module
parent
74e9d630
Changes
8
Hide whitespace changes
Inline
Side-by-side
radseq_analysis/file_handler/__init__.py
View file @
dfb00de3
...
...
@@ -4,4 +4,5 @@ from radseq_analysis.file_handler.individual_files import get_individual_sequenc
from
radseq_analysis.file_handler.popmap
import
load_popmap
from
radseq_analysis.file_handler.positions
import
load_positions_list
from
radseq_analysis.file_handler.sequences
import
get_sequences
from
radseq_analysis.file_handler.markers
import
get_markers
from
radseq_analysis.file_handler.coverage
import
get_coverage
radseq_analysis/file_handler/markers.py
0 → 100644
View file @
dfb00de3
def
get_markers
(
markers_file_path
):
'''
Extract information from a markers file (list of markers ID)
'''
markers_file
=
open
(
markers_file_path
)
markers
=
[
line
[:
-
1
]
for
line
in
markers_file
if
line
[:
-
1
]]
return
markers
radseq_analysis/modules/__init__.py
View file @
dfb00de3
...
...
@@ -2,5 +2,6 @@ from radseq_analysis.modules.sex_linked_haplotypes import analysis as sex_linked
from
radseq_analysis.modules.loci_matrix
import
analysis
as
loci_matrix
from
radseq_analysis.modules.stacks_privacy
import
analysis
as
stacks_privacy
from
radseq_analysis.modules.rescue
import
analysis
as
rescue
from
radseq_analysis.modules.individual_coverage
import
analysis
as
individual_coverage
from
radseq_analysis.modules.visualize
import
analysis
as
visualization
from
radseq_analysis.modules.analysis
import
analysis
radseq_analysis/modules/analysis.py
View file @
dfb00de3
...
...
@@ -4,6 +4,7 @@ from radseq_analysis.modules import sex_linked_haplotypes
from
radseq_analysis.modules
import
loci_matrix
from
radseq_analysis.modules
import
stacks_privacy
from
radseq_analysis.modules
import
rescue
from
radseq_analysis.modules
import
individual_coverage
from
radseq_analysis.modules
import
visualization
from
radseq_analysis.file_handler
import
load_popmap
from
radseq_analysis.file_handler
import
load_positions_list
...
...
@@ -16,6 +17,7 @@ def analysis(input_dir=None,
positions_file_path
=
None
,
sequences_file_path
=
None
,
coverage_file_path
=
None
,
markers_file_path
=
None
,
analysis
=
None
):
parameters
=
Parameters
(
files_dir
=
input_dir
,
...
...
@@ -50,5 +52,7 @@ def analysis(input_dir=None,
stacks_privacy
(
catalog_file_path
,
parameters
)
elif
analysis
==
'rescue'
:
rescue
(
sequences_file_path
,
catalog_file_path
,
individual_files_paths
,
coverage_file_path
,
parameters
)
elif
analysis
==
'coverage'
:
individual_coverage
(
markers_file_path
,
catalog_file_path
,
individual_files_paths
,
coverage_file_path
,
parameters
)
elif
analysis
==
'visualize'
:
visualization
(
input_file_path
,
popmap_file_path
,
output_file_path
,
parameters
)
radseq_analysis/modules/individual_coverage.py
0 → 100644
View file @
dfb00de3
from
radseq_analysis
import
file_handler
from
radseq_analysis
import
output
from
radseq_analysis.shared
import
Stack
import
os
from
collections
import
defaultdict
def
get_individual_names
(
individual_files_paths
):
return
[
os
.
path
.
split
(
file
)[
1
].
split
(
'.'
)[
0
]
for
file
in
individual_files_paths
]
def
initialize_markers
(
markers_list
):
markers
=
{}
for
marker_id
in
markers_list
:
s
=
Stack
()
s
.
add_haplotype
(
marker_id
)
markers
[
marker_id
]
=
s
return
markers
def
get_individual_data
(
individual_files_paths
,
correspondance
,
bar
=
False
):
individual_data
=
{}
try
:
from
progress.bar
import
Bar
bar
=
True
except
ImportError
:
bar
=
False
if
bar
:
progress_bar
=
Bar
(
' - Extracting individual data :'
,
max
=
len
(
individual_files_paths
))
else
:
print
(
' - Extracting individual data ...'
)
for
individual_file_path
in
individual_files_paths
:
if
bar
:
progress_bar
.
next
()
name
=
os
.
path
.
split
(
individual_file_path
)[
1
].
split
(
'.'
)[
0
]
data
=
file_handler
.
get_individual_sequences
(
individual_file_path
,
correspondance
)
individual_data
[
name
]
=
data
if
bar
:
print
()
return
individual_data
def
fill_individual_data
(
markers
,
individual_data
,
coverage
):
for
stack_id
,
stack
in
markers
.
items
():
for
haplotype_id
,
haplotype
in
stack
.
haplotypes
.
items
():
temp
=
defaultdict
(
int
)
for
name
,
data
in
individual_data
.
items
():
if
haplotype_id
in
data
.
keys
():
if
coverage
:
temp
[
name
]
=
int
(
int
(
data
[
haplotype_id
])
/
coverage
[
name
])
else
:
temp
[
name
]
=
data
[
haplotype_id
]
else
:
temp
[
name
]
=
0
markers
[
stack_id
].
haplotypes
[
haplotype_id
].
individuals
=
temp
def
analysis
(
markers_file_path
,
catalog_file_path
,
individual_files_paths
,
coverage_file_path
,
global_parameters
):
print
(
' - Loading extracted markers and catalog data ...'
)
coverage
=
None
if
coverage_file_path
:
coverage
=
file_handler
.
get_coverage
(
coverage_file_path
)
markers_list
=
file_handler
.
get_markers
(
markers_file_path
)
correspondance
=
file_handler
.
get_info_from_catalog
(
catalog_file_path
,
loci_list
=
markers_list
,
consensus
=
False
,
correspondance
=
True
)
individual_names
=
get_individual_names
(
individual_files_paths
)
print
(
' - Creating stacks ...'
)
markers
=
initialize_markers
(
markers_list
)
individual_data
=
get_individual_data
(
individual_files_paths
,
correspondance
)
print
(
' - Merging individual data in stacks ...'
)
fill_individual_data
(
markers
,
individual_data
,
coverage
)
output
.
markers
(
global_parameters
.
output_file_path
,
markers
,
individual_names
)
radseq_analysis/output/__init__.py
View file @
dfb00de3
...
...
@@ -2,3 +2,4 @@ from radseq_analysis.output.loci_matrix import loci_matrix
from
radseq_analysis.output.sex_linked_haplotypes
import
sex_linked_haplotypes
from
radseq_analysis.output.stacks_privacy
import
stacks_privacy
from
radseq_analysis.output.stacks
import
stacks
from
radseq_analysis.output.markers
import
markers
radseq_analysis/output/markers.py
0 → 100644
View file @
dfb00de3
# TODO: sort output by marker and individuals ?
def
markers
(
output_file_path
,
markers_data
,
individual_names
):
'''
Output markers data in the following format:
TODO
'''
output_file
=
open
(
output_file_path
,
'w'
)
output_file
.
write
(
'Marker_ID'
+
'
\t
'
)
output_file
.
write
(
'
\t
'
.
join
(
individual_names
)
+
'
\n
'
)
for
marker_id
,
marker
in
markers_data
.
items
():
output_file
.
write
(
marker_id
+
'
\t
'
)
output_file
.
write
(
'
\t
'
.
join
([
str
(
marker
.
haplotypes
[
marker_id
].
individuals
[
name
])
for
name
in
individual_names
])
+
'
\n
'
)
radseq_analysis/parameters/parser.py
View file @
dfb00de3
...
...
@@ -15,6 +15,7 @@ Command: heatmap\tGenerates a matrix of haplotypes sex distribution
\t
haplotypes
\t
Extract haplotypes present in a given number of males and females
\t
frequencies
\t
Calculate haplotypes frequencies distribution in the population
\t
rescue
\t
Regroup stacks into alleles after analysis
\t
coverage
\t
Extract individual coverage for a set of markers
\t
visualize
\t
Visualize analyses results using R
'''
)
...
...
@@ -169,6 +170,43 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
coverage_file_path
=
args
.
coverage_file
,
analysis
=
'rescue'
)
def
coverage
(
self
):
parser
=
argparse
.
ArgumentParser
(
description
=
'Extract individual coverage for a set of markers'
,
usage
=
'''python3 radseq_analysis.py coverage -i input_folder -a markers_file [-c coverage_file -o output_file]
Options: -i
\t
--input-folder
\t
Path to a folder containing the output of denovo_map
\t
-a
\t
--markers
\t
Path to markers file (list of markers to extract)
\t
-c
\t
--coverage-file
\t
Path to a coverage file (result of coverage analysis)
\t
-o
\t
--output-file
\t
Path to output file (default: markers_coverage.tsv)
'''
)
parser
.
add_argument
(
'--input-folder'
,
'-i'
,
help
=
'Path to a folder containing the output of denovo_map'
)
parser
.
add_argument
(
'--markers'
,
'-a'
,
help
=
'Path to markers file'
)
parser
.
add_argument
(
'--coverage-file'
,
'-c'
,
help
=
'Path to coverage file'
,
nargs
=
'?'
)
parser
.
add_argument
(
'--output-file'
,
'-o'
,
help
=
'Path to output file'
,
nargs
=
'?'
,
default
=
'extracted_alleles.tsv'
)
args
=
parser
.
parse_args
(
sys
.
argv
[
2
:])
if
not
args
.
input_folder
or
not
os
.
path
.
isdir
(
args
.
input_folder
):
print
(
'
\n
Error: no valid input folder specified
\n
'
)
parser
.
print_usage
()
print
()
exit
(
1
)
if
not
args
.
markers
or
not
os
.
path
.
isfile
(
args
.
markers
):
print
(
'
\n
Error: no valid markers file specified
\n
'
)
parser
.
print_usage
()
print
()
exit
(
1
)
analysis
(
input_dir
=
args
.
input_folder
,
markers_file_path
=
args
.
markers
,
output_file_path
=
args
.
output_file
,
coverage_file_path
=
args
.
coverage_file
,
analysis
=
'coverage'
)
def
visualize
(
self
):
parser
=
argparse
.
ArgumentParser
(
description
=
'Visualize analyses results using R'
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment