Commit 147ce822 authored by Romain Feron's avatar Romain Feron
Browse files

Added faster catalog parsing when only needing individual ID

parent f635d662
......@@ -67,7 +67,7 @@ def get_info_from_catalog(catalog_path,
return frequencies_data
def get_haplotypes(parameters):
def get_haplotypes(parameters, individuals_only=False):
'''
Extract haplotypes information, sorted by sex, from a catalog file
......@@ -87,11 +87,14 @@ def get_haplotypes(parameters):
tabs = line.split('\t')
locus_id = tabs[2]
indiv_ids = tabs[8].split(',')
haplotypes_data[locus_id] = {}
haplotypes_data[locus_id]['sequence'] = tabs[9]
haplotypes_data[locus_id]['individuals'] = {}
for individual in indiv_ids:
haplotypes_data[locus_id]['individuals'][individual.split('_')[0]] = individual.split('_')[1]
if not individuals_only:
haplotypes_data[locus_id] = {}
haplotypes_data[locus_id]['sequence'] = tabs[9]
haplotypes_data[locus_id]['individuals'] = {}
for individual in indiv_ids:
haplotypes_data[locus_id]['individuals'][individual.split('_')[0]] = individual.split('_')[1]
else:
haplotypes_data[locus_id] = [individual.split('_')[0] for individual in indiv_ids]
catalog.close()
......
......@@ -5,7 +5,7 @@ from radseq_analysis import output
def fill_loci_matrix(parameters):
print(' - Loading haplotypes from file ...')
haplotypes = file_handler.get_haplotypes(parameters)
haplotypes = file_handler.get_haplotypes(parameters, individuals_only=True)
loci_matrix = [[0 for x in range(int(parameters.n_males) + 1)] for
y in range(int(parameters.n_females) + 1)]
......@@ -13,10 +13,8 @@ def fill_loci_matrix(parameters):
print(' - Generating loci matrix ...')
for locus_id, data in haplotypes.items():
n_males = len({i for i in data['individuals'].keys() if
parameters.popmap[parameters.order[i]] is 'M'})
n_females = len({i for i in data['individuals'].keys() if
parameters.popmap[parameters.order[i]] is 'F'})
n_males = len({i for i in data if parameters.popmap[parameters.order[i]] is 'M'})
n_females = len({i for i in data if parameters.popmap[parameters.order[i]] is 'F'})
loci_matrix[n_females][n_males] += 1
print(' - Generating output ...')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment