Commit 19189b44 authored by Romain Feron's avatar Romain Feron
Browse files

Implemented filtering using the new catalog parsing

parent 69d04ac4
from radseq_analysis.shared import *
from radseq_analysis import file_handler
from radseq_analysis import output
def get_majority_haplotypes(data):
male_majority = None
female_majority = None
if data['consensus'][MALES] > data['-'][MALES]:
male_majority = 'consensus'
else:
male_majority = '-'
if data['consensus'][FEMALES] > data['-'][FEMALES]:
female_majority = 'consensus'
else:
female_majority = '-'
return male_majority, female_majority
def locus_in_position_list(tags, positions_list):
if set(tags.keys()).issubset(['consensus', '-']):
for position in positions_list:
if (tags['consensus'][FEMALES] == position[1] and
tags['consensus'][MALES] == position[0]):
return True
return False
else:
print(tags)
return False
def filter(parameters):
def analysis(parameters):
print(' - Loading haplotypes file ...')
haplotypes, numbers = file_handler.get_haplotypes(parameters)
loci_to_extract = {}
haplotypes = file_handler.get_haplotypes(parameters)
print(' - Filtering loci ...')
for locus_id, data in numbers.items():
if locus_in_position_list(data, parameters.positions_list):
locus = Locus()
locus.n_males = data['consensus'][MALES]
locus.n_females = data['consensus'][FEMALES]
male_majority, female_majority = get_majority_haplotypes(data)
locus.outliers[MALES] = set(i for i, v in haplotypes[locus_id].items() if
v != male_majority and parameters.popmap[i] is 'M')
locus.outliers[FEMALES] = set(i for i, v in haplotypes[locus_id].items() if
v != female_majority and parameters.popmap[i] is 'F')
loci_to_extract[locus_id] = locus
return loci_to_extract
def analysis(parameters):
loci_to_extract = filter(parameters)
consensus = file_handler.get_info_from_catalog(parameters.catalog_file_path, loci_to_extract)
for locus_id, sequence in consensus.items():
loci_to_extract[locus_id].sequence = sequence
output.sex_linked_haplotypes(parameters.output_file_path,
loci_to_extract)
return loci_to_extract
output.sex_linked_haplotypes(parameters, haplotypes)
from radseq_analysis.shared import *
def sex_linked_haplotypes(output_file_path, loci_data):
def locus_in_position_list(locus, positions_list):
for position in positions_list:
if (locus['n_females'] == position[1] and
locus['n_males'] == position[0]):
return True
return False
def sex_linked_haplotypes(parameters, haplotypes):
'''
Output information on sex-linked haplotypes in the following format:
TODO
'''
with open(output_file_path, 'w') as output_file:
with open(parameters.output_file_path, 'w') as output_file:
output_file.write('Locus' + '\t' +
'Males' + '\t' +
'Females' + '\t' +
'Sequence' + '\t' +
'Male_outliers' + '\t' +
'Female_outliers' + '\n')
for locus, data in loci_data.items():
output_file.write(str(locus) + '\t' +
str(data.n_males) + '\t' +
str(data.n_females) + '\t' +
data.sequence + '\t' +
','.join(str(i) for i in data.outliers[MALES]) + '\t' +
','.join(str(i) for i in data.outliers[FEMALES]) + '\n')
'Sequence' + '\n')
for locus, data in haplotypes.items():
if locus_in_position_list(data, parameters.positions_list):
output_file.write(str(locus) + '\t' +
str(data['n_males']) + '\t' +
str(data['n_females']) + '\t' +
data['sequence'] + '\n')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment