Commit f558f42e authored by Romain Feron's avatar Romain Feron
Browse files

Merge branch 'master' of github.com:INRA-LPGP/radseq_analyses_pipeline

parents 545dee1c 74e9d630
......@@ -6,11 +6,12 @@ def load_positions_list(positions_file_path, global_parameters):
with open(positions_file_path) as positions_file:
for line in positions_file:
try:
fields = line[:-1].split('\t')
positions.append(tuple([int(fields[0]), int(fields[1])]))
except IndexError:
print('Error: cannot read positions file')
exit(1)
if line[:-1]:
try:
fields = line[:-1].split('\t')
positions.append(tuple([int(fields[0]), int(fields[1])]))
except IndexError:
print('Error: cannot read positions file')
exit(1)
global_parameters.positions_list = positions
......@@ -13,7 +13,11 @@ TODO: better system calls (check for errors)
def create_temp_seq_file(sequences, species):
temp_sequences_name = species + '_sequences.temp'
read_lengths = {len(data.sequence) for locus, data in sequences.items()}
if len(read_lengths) == 1:
read_length = list(read_lengths)[0]
else:
exit('Error: input sequences have variable lengths')
with open(temp_sequences_name, 'w') as o:
for locus_id, locus in sequences.items():
o.write('>' +
......@@ -21,6 +25,7 @@ def create_temp_seq_file(sequences, species):
str(locus.n_males) + '_' +
str(locus.n_females) + '\n')
o.write(locus.sequence + '\n')
return read_length
def create_temp_catalog_file(catalog_data, species):
......@@ -75,24 +80,23 @@ def cleanup_temp_files(species):
os.remove(temp_db_logs_name)
def filter_blast_output(species):
def filter_blast_output(species, read_length, n_mismatches=4):
stacks = defaultdict(lambda: dict())
temp_blast_results_name = species + '_blast_results.temp'
results = open(temp_blast_results_name)
for line in results:
fields = line[:-1].split('\t')
if fields[2] == '94' and int(fields[3]) >= 90:
if fields[2] == str(read_length) and int(fields[3]) >= read_length - n_mismatches:
stacks[fields[0]][fields[1]] = [fields[3], fields[4], fields[5]]
return stacks
def get_matching_sequences(sequences, catalog_data, species):
create_temp_seq_file(sequences, species)
read_length = create_temp_seq_file(sequences, species)
create_temp_catalog_file(catalog_data, species)
create_blast_db(species)
run_blast(species)
stacks = filter_blast_output(species)
stacks = filter_blast_output(species, read_length=read_length, n_mismatches=4)
cleanup_temp_files(species)
return stacks
......@@ -131,7 +131,7 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
def rescue(self):
parser = argparse.ArgumentParser(
description='Regroup stacks into alleles after analysis',
usage='''python3 radseq_analysis.py rescue -i input_folder -s sequences_file [-c coverage_file -o output_file]
usage='''python3 radseq_analysis.py rescue -i input_folder -m popmap -s sequences_file [-c coverage_file -o output_file]
Options: -i\t--input-folder\tPath to a folder containing the output of denovo_map
\t -s\t--sequences\tPath to sequences file (result of haplotypes analysis)
......@@ -161,11 +161,7 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
parser.print_usage()
print()
exit(1)
if not args.popmap or not os.path.isfile(args.popmap):
print('\nError: no valid popmap file specified\n')
parser.print_usage()
print()
exit(1)
analysis(input_dir=args.input_folder,
sequences_file_path=args.sequences,
popmap_file_path=args.popmap,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment