Commit 2cb2085b authored by Romain Feron's avatar Romain Feron
Browse files

Removed association module

Statistics are now done in R when plotting the heatmap.
Test will be added again in this pipeline in a module to generate fasta file for significant sequences
parent 56c79066
......@@ -2,6 +2,5 @@ from radseq_analysis.modules.sex_linked_haplotypes import analysis as sex_linked
from radseq_analysis.modules.loci_matrix import analysis as loci_matrix
from radseq_analysis.modules.stacks_privacy import analysis as stacks_privacy
from radseq_analysis.modules.rescue import analysis as rescue
from radseq_analysis.modules.association import analysis as association
from radseq_analysis.modules.visualize import analysis as visualization
from radseq_analysis.modules.analysis import analysis
......@@ -4,7 +4,6 @@ from radseq_analysis.modules import sex_linked_haplotypes
from radseq_analysis.modules import loci_matrix
from radseq_analysis.modules import stacks_privacy
from radseq_analysis.modules import rescue
from radseq_analysis.modules import association
from radseq_analysis.modules import visualization
from radseq_analysis.file_handler import load_popmap
from radseq_analysis.file_handler import load_positions_list
......@@ -53,5 +52,3 @@ def analysis(input_dir=None,
rescue(sequences_file_path, catalog_file_path, individual_files_paths, coverage_file_path, parameters)
elif analysis == 'visualize':
visualization(input_file_path, popmap_file_path, output_file_path, parameters)
elif analysis == 'association':
association(haplotypes_file_path, parameters)
from collections import defaultdict
from radseq_analysis import file_handler
from radseq_analysis import output
def analysis(haplotypes_file_path, global_parameters):
haplotypes, numbers = file_handler.get_haplotypes(haplotypes_file_path, global_parameters,
haplotypes=True, numbers=True)
output.plink_map_file(global_parameters.output_file_path,
numbers,
global_parameters.n_males,
global_parameters.n_females)
individuals = defaultdict(lambda: defaultdict(str))
for haplotype in sorted(haplotypes.keys(), key=int):
for individual in haplotypes[haplotype]:
if haplotypes[haplotype][individual] == 'consensus':
individuals[individual][haplotype] = 'A'
else:
individuals[individual][haplotype] = 'G'
output.plink_ped_file(global_parameters.output_file_path,
global_parameters.popmap,
individuals)
......@@ -2,5 +2,3 @@ from radseq_analysis.output.loci_matrix import loci_matrix
from radseq_analysis.output.sex_linked_haplotypes import sex_linked_haplotypes
from radseq_analysis.output.stacks_privacy import stacks_privacy
from radseq_analysis.output.stacks import stacks
from radseq_analysis.output.plink_map_file import plink_map_file
from radseq_analysis.output.plink_ped_file import plink_ped_file
def plink_map_file(output_file_root, haplotypes, n_males, n_females):
'''
Output a fake plink map file with all loci on chromosome 1 separated by 100 bp
'''
output_file_path = output_file_root + '.map'
with open(output_file_path, 'w') as output_file:
for i, haplotype in enumerate(sorted(haplotypes.keys(), key=int)):
id_n = (haplotype + '_' +
str(haplotypes[haplotype]['consensus']['males']) + '_' +
str(haplotypes[haplotype]['consensus']['females']))
output_file.write('\t'.join(['1', id_n, '0', str(100 * i)]) + '\n')
def plink_ped_file(output_file_root, popmap, haplotypes):
'''
Output a plink ped file based on haplotypes distribution
'''
output_file_path = output_file_root + '.ped'
with open(output_file_path, 'w') as output_file:
for individual, data in haplotypes.items():
if popmap[individual] == 'M':
output_file.write('1' + '\t')
output_file.write(individual + '\t')
output_file.write('0' + '\t')
output_file.write('0' + '\t')
output_file.write('1' + '\t')
output_file.write('1' + '\t')
else:
output_file.write('1' + '\t')
output_file.write(individual + '\t')
output_file.write('0' + '\t')
output_file.write('0' + '\t')
output_file.write('2' + '\t')
output_file.write('2' + '\t')
output_file.write('\t'.join([data[haplotype] + '\t' + data[haplotype] for
haplotype in sorted(data.keys(), key=int)]) + '\n')
......@@ -15,7 +15,6 @@ Command: heatmap\tGenerates a matrix of haplotypes sex distribution
\t haplotypes\tExtract haplotypes present in a given number of males and females
\t frequencies\tCalculate haplotypes frequencies distribution in the population
\t rescue\tRegroup stacks into alleles after analysis
\t association\tGenerates files for plink association analysis
\t visualize\tVisualize analyses results using R
'''
)
......@@ -129,38 +128,6 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
output_file_path=args.output_file,
analysis='frequencies')
def association(self):
parser = argparse.ArgumentParser(
description='Generates files for plink association analysis',
usage='''python3 radseq_analysis.py association -i input_folder -m popmap [-o output_file]
Options: -i\t--input-folder\tPath to a folder containing the output of denovo_map
\t -m\t--popmap\tPath to population map
\t -o\t--output-file\tPath to output file (default: association.[ped/map])
''')
parser.add_argument('--input-folder', '-i',
help='Path to a folder containing the output of denovo_map')
parser.add_argument('--output-file', '-o',
help='Path to output file', nargs='?',
default='association')
parser.add_argument('--popmap', '-m',
help='Path to a popmap file')
args = parser.parse_args(sys.argv[2:])
if not args.input_folder or not os.path.isdir(args.input_folder):
print('\nError: no valid input folder specified\n')
parser.print_usage()
print()
exit(1)
if not args.popmap or not os.path.isfile(args.popmap):
print('\nError: no valid popmap file specified\n')
parser.print_usage()
print()
exit(1)
analysis(input_dir=args.input_folder,
output_file_path=args.output_file,
popmap_file_path=args.popmap,
analysis='association')
def rescue(self):
parser = argparse.ArgumentParser(
description='Regroup stacks into alleles after analysis',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment