Commit 7e2d349d authored by RomainFeron's avatar RomainFeron
Browse files

Implemented map function using table parser

parent b05cbfd0
......@@ -96,7 +96,7 @@ void table_parser(Parameters& parameters, const Popmap& popmap, MarkersQueue& ma
queue_mutex.unlock();
tmp_queue_real_size = 0;
}
if (marker_n % (10 * marker_processed_tick) == 0) std::cerr << "Processed " << marker_n << " markers (" << marker_n / (marker_processed_tick) << " %)" << std::endl;
if (marker_n % (10 * marker_processed_tick) == 0) std::cerr << "Loaded " << marker_n << " markers (" << marker_n / (marker_processed_tick) << " %)" << std::endl;
// Reset variables
temp = "";
field_n = 0;
......
......@@ -47,7 +47,7 @@ inline Parameters parse_args(int& argc, char** argv) {
// Map subcommand parser
CLI::App* map = parser.add_subcommand("map", "Align markers to a genome and compute metrics for each aligned marker");
map->add_option("-s,--markers-file", parameters.subset_file_path, "Path to a set of markers to align, either a depth table from \"process\", \"signif\", or \"subset\" or a fasta file from \"subset\" or \"signif\"")->required()->check(CLI::ExistingFile);
map->add_option("-s,--markers-file", parameters.markers_table_path, "Path to a set of markers to align, either a depth table from \"process\", \"signif\", or \"subset\" or a fasta file from \"subset\" or \"signif\"")->required()->check(CLI::ExistingFile);
map->add_option("-o,--output-file", parameters.output_file_path, "Path to the output file (mapping position, group bias, and probability of association with group for all aligned markers)")->required();
map->add_option("-p,--popmap", parameters.popmap_file_path, "Path to a tabulated file specifying groups for all individuals (population map)")->required()->check(CLI::ExistingFile);
map->add_option("-g,--genome-file", parameters.genome_file_path, "Path to the genome file in fasta format")->required()->check(CLI::ExistingFile);
......
......@@ -31,8 +31,11 @@ void map(Parameters& parameters) {
std::unordered_map<std::string, uint64_t> contig_lengths = get_contig_lengths(parameters.genome_file_path);
std::thread parsing_thread(table_parser, std::ref(parameters), std::ref(popmap), std::ref(markers_queue), std::ref(queue_mutex), std::ref(header), std::ref(parsing_ended), true, false);
std::thread processing_thread(processor, std::ref(markers_queue), std::ref(parameters), std::ref(popmap), std::ref(queue_mutex), std::ref(aligned_markers), std::ref(parsing_ended), 100);
// Check if bwa index files exist for the genome and build index if it's missing
build_bwa_index(parameters);
std::thread parsing_thread(table_parser, std::ref(parameters), std::ref(popmap), std::ref(markers_queue), std::ref(queue_mutex), std::ref(header), std::ref(parsing_ended), false, false);
std::thread processing_thread(processor, std::ref(markers_queue), std::ref(parameters), std::ref(popmap), std::ref(queue_mutex), std::ref(aligned_markers), std::ref(parsing_ended), 1000);
parsing_thread.join();
processing_thread.join();
......@@ -69,8 +72,13 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm
// Filtering
uint16_t min_individuals = static_cast<uint16_t>(popmap.n_individuals * parameters.map_min_frequency);
// Load bwa index for the genome
bwaidx_t* index = load_bwa_index(parameters);
// Loading bwa index
bwaidx_t* index = bwa_idx_load(parameters.genome_file_path.c_str(), BWA_IDX_ALL);
if (index == nullptr) {
std::cerr << "**Error: failed to load index for reference \"" + parameters.genome_file_path + "\"." << std::endl;
exit(1);
}
// BWA mem parameters
mem_opt_t *opt;
......@@ -82,21 +90,24 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm
int best_alignment[3] {0, -1, 0}; // Info about best alignment: index, score, count
AlignedMarker seq;
// For logging
uint32_t markers_stats[2] {0, 0}; // Total markers, retained markers
uint x[4] = {0, 0, 0, 0};
while (keep_going) {
// Get a batch of markers from the queue
batch = get_batch(markers_queue, queue_mutex, batch_size);
++x[2];
if (batch.size() > 0) { // Batch not empty
for (auto marker: batch) {
++x[3];
for (auto& marker: batch) {
++x[0];
++markers_stats[0]; // Increment total markers count
if (marker.n_individuals >= min_individuals) {
if (marker.n_individuals > min_individuals) {
++x[1];
ar = mem_align1(opt, index->bwt, index->bns, index->pac, marker.sequence.size(), marker.sequence.c_str()); // Align the marker to the reference
......@@ -130,20 +141,17 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm
seq.contig = index->bns->anns[best.rid].name;
seq.position = best.pos;
aligned_markers.push_back(seq);
++markers_stats[1]; // Increment retained marlers count
}
free(best.cigar); // Deallocate cigar string for best hit
free(ar.a); // Deallocate the hit list
}
if (markers_stats[0] % 100000 == 0) std::cout << "**Info: processed " << markers_stats[0] / 1000 << " K. markers and retained " << markers_stats[1] / 1000 << " K." << std::endl;
// Reset variables
best_alignment[0] = 0;
best_alignment[1] = -1;
best_alignment[2] = 0;
break;
}
......@@ -196,7 +204,7 @@ std::unordered_map<std::string, uint64_t> get_contig_lengths(const std::string&
bwaidx_t* load_bwa_index(Parameters& parameters) {
void build_bwa_index(Parameters& parameters) {
// Generate BWA index if it does not exist
std::ifstream bwa_index_temp;
......@@ -217,12 +225,4 @@ bwaidx_t* load_bwa_index(Parameters& parameters) {
}
std::cerr << "**Info: loading BWA index file" << std::endl;
bwaidx_t* index = bwa_idx_load(parameters.genome_file_path.c_str(), BWA_IDX_ALL); // load the BWA index
if (index == nullptr) {
std::cerr << "**Error: failed to load index for reference \"" + parameters.genome_file_path + "\"." << std::endl;
exit(1);
}
return index;
}
......@@ -31,4 +31,4 @@ void processor(MarkersQueue& markers_queue, Parameters& parameters, Popmap& popm
std::unordered_map<std::string, uint64_t> get_contig_lengths(const std::string& genome_file_path);
//
bwaidx_t* load_bwa_index(Parameters& parameters);
void build_bwa_index(Parameters& parameters);
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment