Commit dfb00de3 authored by Romain Feron's avatar Romain Feron
Browse files

Implemented individual coverage module

parent 74e9d630
...@@ -4,4 +4,5 @@ from radseq_analysis.file_handler.individual_files import get_individual_sequenc ...@@ -4,4 +4,5 @@ from radseq_analysis.file_handler.individual_files import get_individual_sequenc
from radseq_analysis.file_handler.popmap import load_popmap from radseq_analysis.file_handler.popmap import load_popmap
from radseq_analysis.file_handler.positions import load_positions_list from radseq_analysis.file_handler.positions import load_positions_list
from radseq_analysis.file_handler.sequences import get_sequences from radseq_analysis.file_handler.sequences import get_sequences
from radseq_analysis.file_handler.markers import get_markers
from radseq_analysis.file_handler.coverage import get_coverage from radseq_analysis.file_handler.coverage import get_coverage
def get_markers(markers_file_path):
'''
Extract information from a markers file (list of markers ID)
'''
markers_file = open(markers_file_path)
markers = [line[:-1] for line in markers_file if line[:-1]]
return markers
...@@ -2,5 +2,6 @@ from radseq_analysis.modules.sex_linked_haplotypes import analysis as sex_linked ...@@ -2,5 +2,6 @@ from radseq_analysis.modules.sex_linked_haplotypes import analysis as sex_linked
from radseq_analysis.modules.loci_matrix import analysis as loci_matrix from radseq_analysis.modules.loci_matrix import analysis as loci_matrix
from radseq_analysis.modules.stacks_privacy import analysis as stacks_privacy from radseq_analysis.modules.stacks_privacy import analysis as stacks_privacy
from radseq_analysis.modules.rescue import analysis as rescue from radseq_analysis.modules.rescue import analysis as rescue
from radseq_analysis.modules.individual_coverage import analysis as individual_coverage
from radseq_analysis.modules.visualize import analysis as visualization from radseq_analysis.modules.visualize import analysis as visualization
from radseq_analysis.modules.analysis import analysis from radseq_analysis.modules.analysis import analysis
...@@ -4,6 +4,7 @@ from radseq_analysis.modules import sex_linked_haplotypes ...@@ -4,6 +4,7 @@ from radseq_analysis.modules import sex_linked_haplotypes
from radseq_analysis.modules import loci_matrix from radseq_analysis.modules import loci_matrix
from radseq_analysis.modules import stacks_privacy from radseq_analysis.modules import stacks_privacy
from radseq_analysis.modules import rescue from radseq_analysis.modules import rescue
from radseq_analysis.modules import individual_coverage
from radseq_analysis.modules import visualization from radseq_analysis.modules import visualization
from radseq_analysis.file_handler import load_popmap from radseq_analysis.file_handler import load_popmap
from radseq_analysis.file_handler import load_positions_list from radseq_analysis.file_handler import load_positions_list
...@@ -16,6 +17,7 @@ def analysis(input_dir=None, ...@@ -16,6 +17,7 @@ def analysis(input_dir=None,
positions_file_path=None, positions_file_path=None,
sequences_file_path=None, sequences_file_path=None,
coverage_file_path=None, coverage_file_path=None,
markers_file_path=None,
analysis=None): analysis=None):
parameters = Parameters(files_dir=input_dir, parameters = Parameters(files_dir=input_dir,
...@@ -50,5 +52,7 @@ def analysis(input_dir=None, ...@@ -50,5 +52,7 @@ def analysis(input_dir=None,
stacks_privacy(catalog_file_path, parameters) stacks_privacy(catalog_file_path, parameters)
elif analysis == 'rescue': elif analysis == 'rescue':
rescue(sequences_file_path, catalog_file_path, individual_files_paths, coverage_file_path, parameters) rescue(sequences_file_path, catalog_file_path, individual_files_paths, coverage_file_path, parameters)
elif analysis == 'coverage':
individual_coverage(markers_file_path, catalog_file_path, individual_files_paths, coverage_file_path, parameters)
elif analysis == 'visualize': elif analysis == 'visualize':
visualization(input_file_path, popmap_file_path, output_file_path, parameters) visualization(input_file_path, popmap_file_path, output_file_path, parameters)
from radseq_analysis import file_handler
from radseq_analysis import output
from radseq_analysis.shared import Stack
import os
from collections import defaultdict
def get_individual_names(individual_files_paths):
return [os.path.split(file)[1].split('.')[0] for file in individual_files_paths]
def initialize_markers(markers_list):
markers = {}
for marker_id in markers_list:
s = Stack()
s.add_haplotype(marker_id)
markers[marker_id] = s
return markers
def get_individual_data(individual_files_paths, correspondance, bar=False):
individual_data = {}
try:
from progress.bar import Bar
bar = True
except ImportError:
bar = False
if bar:
progress_bar = Bar(' - Extracting individual data :', max=len(individual_files_paths))
else:
print(' - Extracting individual data ...')
for individual_file_path in individual_files_paths:
if bar:
progress_bar.next()
name = os.path.split(individual_file_path)[1].split('.')[0]
data = file_handler.get_individual_sequences(individual_file_path,
correspondance)
individual_data[name] = data
if bar:
print()
return individual_data
def fill_individual_data(markers, individual_data, coverage):
for stack_id, stack in markers.items():
for haplotype_id, haplotype in stack.haplotypes.items():
temp = defaultdict(int)
for name, data in individual_data.items():
if haplotype_id in data.keys():
if coverage:
temp[name] = int(int(data[haplotype_id]) / coverage[name])
else:
temp[name] = data[haplotype_id]
else:
temp[name] = 0
markers[stack_id].haplotypes[haplotype_id].individuals = temp
def analysis(markers_file_path, catalog_file_path,
individual_files_paths, coverage_file_path, global_parameters):
print(' - Loading extracted markers and catalog data ...')
coverage = None
if coverage_file_path:
coverage = file_handler.get_coverage(coverage_file_path)
markers_list = file_handler.get_markers(markers_file_path)
correspondance = file_handler.get_info_from_catalog(catalog_file_path,
loci_list=markers_list,
consensus=False,
correspondance=True)
individual_names = get_individual_names(individual_files_paths)
print(' - Creating stacks ...')
markers = initialize_markers(markers_list)
individual_data = get_individual_data(individual_files_paths, correspondance)
print(' - Merging individual data in stacks ...')
fill_individual_data(markers, individual_data, coverage)
output.markers(global_parameters.output_file_path, markers, individual_names)
...@@ -2,3 +2,4 @@ from radseq_analysis.output.loci_matrix import loci_matrix ...@@ -2,3 +2,4 @@ from radseq_analysis.output.loci_matrix import loci_matrix
from radseq_analysis.output.sex_linked_haplotypes import sex_linked_haplotypes from radseq_analysis.output.sex_linked_haplotypes import sex_linked_haplotypes
from radseq_analysis.output.stacks_privacy import stacks_privacy from radseq_analysis.output.stacks_privacy import stacks_privacy
from radseq_analysis.output.stacks import stacks from radseq_analysis.output.stacks import stacks
from radseq_analysis.output.markers import markers
# TODO: sort output by marker and individuals ?
def markers(output_file_path, markers_data, individual_names):
'''
Output markers data in the following format:
TODO
'''
output_file = open(output_file_path, 'w')
output_file.write('Marker_ID' + '\t')
output_file.write('\t'.join(individual_names) + '\n')
for marker_id, marker in markers_data.items():
output_file.write(marker_id + '\t')
output_file.write('\t'.join([str(marker.haplotypes[marker_id].individuals[name]) for
name in individual_names]) + '\n')
...@@ -15,6 +15,7 @@ Command: heatmap\tGenerates a matrix of haplotypes sex distribution ...@@ -15,6 +15,7 @@ Command: heatmap\tGenerates a matrix of haplotypes sex distribution
\t haplotypes\tExtract haplotypes present in a given number of males and females \t haplotypes\tExtract haplotypes present in a given number of males and females
\t frequencies\tCalculate haplotypes frequencies distribution in the population \t frequencies\tCalculate haplotypes frequencies distribution in the population
\t rescue\tRegroup stacks into alleles after analysis \t rescue\tRegroup stacks into alleles after analysis
\t coverage\tExtract individual coverage for a set of markers
\t visualize\tVisualize analyses results using R \t visualize\tVisualize analyses results using R
''' '''
) )
...@@ -169,6 +170,43 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m ...@@ -169,6 +170,43 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
coverage_file_path=args.coverage_file, coverage_file_path=args.coverage_file,
analysis='rescue') analysis='rescue')
def coverage(self):
parser = argparse.ArgumentParser(
description='Extract individual coverage for a set of markers',
usage='''python3 radseq_analysis.py coverage -i input_folder -a markers_file [-c coverage_file -o output_file]
Options: -i\t--input-folder\tPath to a folder containing the output of denovo_map
\t -a\t--markers\tPath to markers file (list of markers to extract)
\t -c\t--coverage-file\tPath to a coverage file (result of coverage analysis)
\t -o\t--output-file\tPath to output file (default: markers_coverage.tsv)
''')
parser.add_argument('--input-folder', '-i',
help='Path to a folder containing the output of denovo_map')
parser.add_argument('--markers', '-a',
help='Path to markers file')
parser.add_argument('--coverage-file', '-c',
help='Path to coverage file', nargs='?')
parser.add_argument('--output-file', '-o',
help='Path to output file', nargs='?',
default='extracted_alleles.tsv')
args = parser.parse_args(sys.argv[2:])
if not args.input_folder or not os.path.isdir(args.input_folder):
print('\nError: no valid input folder specified\n')
parser.print_usage()
print()
exit(1)
if not args.markers or not os.path.isfile(args.markers):
print('\nError: no valid markers file specified\n')
parser.print_usage()
print()
exit(1)
analysis(input_dir=args.input_folder,
markers_file_path=args.markers,
output_file_path=args.output_file,
coverage_file_path=args.coverage_file,
analysis='coverage')
def visualize(self): def visualize(self):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='Visualize analyses results using R', description='Visualize analyses results using R',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment