Commit 7e2d349d authored by RomainFeron's avatar RomainFeron
Browse files

Implemented map function using table parser

parent b05cbfd0
...@@ -96,7 +96,7 @@ void table_parser(Parameters& parameters, const Popmap& popmap, MarkersQueue& ma ...@@ -96,7 +96,7 @@ void table_parser(Parameters& parameters, const Popmap& popmap, MarkersQueue& ma
queue_mutex.unlock(); queue_mutex.unlock();
tmp_queue_real_size = 0; tmp_queue_real_size = 0;
} }
if (marker_n % (10 * marker_processed_tick) == 0) std::cerr << "Processed " << marker_n << " markers (" << marker_n / (marker_processed_tick) << " %)" << std::endl; if (marker_n % (10 * marker_processed_tick) == 0) std::cerr << "Loaded " << marker_n << " markers (" << marker_n / (marker_processed_tick) << " %)" << std::endl;
// Reset variables // Reset variables
temp = ""; temp = "";
field_n = 0; field_n = 0;
......
...@@ -47,7 +47,7 @@ inline Parameters parse_args(int& argc, char** argv) { ...@@ -47,7 +47,7 @@ inline Parameters parse_args(int& argc, char** argv) {
// Map subcommand parser // Map subcommand parser
CLI::App* map = parser.add_subcommand("map", "Align markers to a genome and compute metrics for each aligned marker"); CLI::App* map = parser.add_subcommand("map", "Align markers to a genome and compute metrics for each aligned marker");
map->add_option("-s,--markers-file", parameters.subset_file_path, "Path to a set of markers to align, either a depth table from \"process\", \"signif\", or \"subset\" or a fasta file from \"subset\" or \"signif\"")->required()->check(CLI::ExistingFile); map->add_option("-s,--markers-file", parameters.markers_table_path, "Path to a set of markers to align, either a depth table from \"process\", \"signif\", or \"subset\" or a fasta file from \"subset\" or \"signif\"")->required()->check(CLI::ExistingFile);
map->add_option("-o,--output-file", parameters.output_file_path, "Path to the output file (mapping position, group bias, and probability of association with group for all aligned markers)")->required(); map->add_option("-o,--output-file", parameters.output_file_path, "Path to the output file (mapping position, group bias, and probability of association with group for all aligned markers)")->required();
map->add_option("-p,--popmap", parameters.popmap_file_path, "Path to a tabulated file specifying groups for all individuals (population map)")->required()->check(CLI::ExistingFile); map->add_option("-p,--popmap", parameters.popmap_file_path, "Path to a tabulated file specifying groups for all individuals (population map)")->required()->check(CLI::ExistingFile);
map->add_option("-g,--genome-file", parameters.genome_file_path, "Path to the genome file in fasta format")->required()->check(CLI::ExistingFile); map->add_option("-g,--genome-file", parameters.genome_file_path, "Path to the genome file in fasta format")->required()->check(CLI::ExistingFile);
......
...@@ -31,8 +31,11 @@ void map(Parameters& parameters) { ...@@ -31,8 +31,11 @@ void map(Parameters& parameters) {
std::unordered_map<std::string, uint64_t> contig_lengths = get_contig_lengths(parameters.genome_file_path); std::unordered_map<std::string, uint64_t> contig_lengths = get_contig_lengths(parameters.genome_file_path);
std::thread parsing_thread(table_parser, std::ref(parameters), std::ref(popmap), std::ref(markers_queue), std::ref(queue_mutex), std::ref(header), std::ref(parsing_ended), true, false); // Check if bwa index files exist for the genome and build index if it's missing
std::thread processing_thread(processor, std::ref(markers_queue), std::ref(parameters), std::ref(popmap), std::ref(queue_mutex), std::ref(aligned_markers), std::ref(parsing_ended), 100); build_bwa_index(parameters);
std::thread parsing_thread(table_parser, std::ref(parameters), std::ref(popmap), std::ref(markers_queue), std::ref(queue_mutex), std::ref(header), std::ref(parsing_ended), false, false);
std::thread processing_thread(processor, std::ref(markers_queue), std::ref(parameters), std::ref(popmap), std::ref(queue_mutex), std::ref(aligned_markers), std::ref(parsing_ended), 1000);
parsing_thread.join(); parsing_thread.join();
processing_thread.join(); processing_thread.join();
...@@ -69,8 +72,13 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm ...@@ -69,8 +72,13 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm
// Filtering // Filtering
uint16_t min_individuals = static_cast<uint16_t>(popmap.n_individuals * parameters.map_min_frequency); uint16_t min_individuals = static_cast<uint16_t>(popmap.n_individuals * parameters.map_min_frequency);
// Load bwa index for the genome // Loading bwa index
bwaidx_t* index = load_bwa_index(parameters); bwaidx_t* index = bwa_idx_load(parameters.genome_file_path.c_str(), BWA_IDX_ALL);
if (index == nullptr) {
std::cerr << "**Error: failed to load index for reference \"" + parameters.genome_file_path + "\"." << std::endl;
exit(1);
}
// BWA mem parameters // BWA mem parameters
mem_opt_t *opt; mem_opt_t *opt;
...@@ -82,21 +90,24 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm ...@@ -82,21 +90,24 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm
int best_alignment[3] {0, -1, 0}; // Info about best alignment: index, score, count int best_alignment[3] {0, -1, 0}; // Info about best alignment: index, score, count
AlignedMarker seq; AlignedMarker seq;
// For logging uint x[4] = {0, 0, 0, 0};
uint32_t markers_stats[2] {0, 0}; // Total markers, retained markers
while (keep_going) { while (keep_going) {
// Get a batch of markers from the queue // Get a batch of markers from the queue
batch = get_batch(markers_queue, queue_mutex, batch_size); batch = get_batch(markers_queue, queue_mutex, batch_size);
++x[2];
if (batch.size() > 0) { // Batch not empty if (batch.size() > 0) { // Batch not empty
for (auto marker: batch) { ++x[3];
for (auto& marker: batch) {
++x[0];
++markers_stats[0]; // Increment total markers count if (marker.n_individuals >= min_individuals) {
if (marker.n_individuals > min_individuals) { ++x[1];
ar = mem_align1(opt, index->bwt, index->bns, index->pac, marker.sequence.size(), marker.sequence.c_str()); // Align the marker to the reference ar = mem_align1(opt, index->bwt, index->bns, index->pac, marker.sequence.size(), marker.sequence.c_str()); // Align the marker to the reference
...@@ -130,20 +141,17 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm ...@@ -130,20 +141,17 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm
seq.contig = index->bns->anns[best.rid].name; seq.contig = index->bns->anns[best.rid].name;
seq.position = best.pos; seq.position = best.pos;
aligned_markers.push_back(seq); aligned_markers.push_back(seq);
++markers_stats[1]; // Increment retained marlers count
} }
free(best.cigar); // Deallocate cigar string for best hit free(best.cigar); // Deallocate cigar string for best hit
free(ar.a); // Deallocate the hit list free(ar.a); // Deallocate the hit list
} }
if (markers_stats[0] % 100000 == 0) std::cout << "**Info: processed " << markers_stats[0] / 1000 << " K. markers and retained " << markers_stats[1] / 1000 << " K." << std::endl;
// Reset variables // Reset variables
best_alignment[0] = 0; best_alignment[0] = 0;
best_alignment[1] = -1; best_alignment[1] = -1;
best_alignment[2] = 0; best_alignment[2] = 0;
break;
} }
...@@ -196,7 +204,7 @@ std::unordered_map<std::string, uint64_t> get_contig_lengths(const std::string& ...@@ -196,7 +204,7 @@ std::unordered_map<std::string, uint64_t> get_contig_lengths(const std::string&
bwaidx_t* load_bwa_index(Parameters& parameters) { void build_bwa_index(Parameters& parameters) {
// Generate BWA index if it does not exist // Generate BWA index if it does not exist
std::ifstream bwa_index_temp; std::ifstream bwa_index_temp;
...@@ -217,12 +225,4 @@ bwaidx_t* load_bwa_index(Parameters& parameters) { ...@@ -217,12 +225,4 @@ bwaidx_t* load_bwa_index(Parameters& parameters) {
} }
std::cerr << "**Info: loading BWA index file" << std::endl; std::cerr << "**Info: loading BWA index file" << std::endl;
bwaidx_t* index = bwa_idx_load(parameters.genome_file_path.c_str(), BWA_IDX_ALL); // load the BWA index
if (index == nullptr) {
std::cerr << "**Error: failed to load index for reference \"" + parameters.genome_file_path + "\"." << std::endl;
exit(1);
}
return index;
} }
...@@ -31,4 +31,4 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm ...@@ -31,4 +31,4 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm
std::unordered_map<std::string, uint64_t> get_contig_lengths(const std::string& genome_file_path); std::unordered_map<std::string, uint64_t> get_contig_lengths(const std::string& genome_file_path);
// //
bwaidx_t* load_bwa_index(Parameters& parameters); void build_bwa_index(Parameters& parameters);
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment