Commit 1677244e authored by RomainFeron's avatar RomainFeron
Browse files

Refactoring complete + full header documentation for Doxygen

parent 327a5ce4
......@@ -13,20 +13,32 @@ HEADERS += \
src/analysis.h \
src/arg_parser.h \
src/depth.h \
src/distrib.h \
src/freq.h \
src/map.h \
src/marker.h \
src/markers_table.h \
src/parameters.h \
src/popmap.h \
src/process.h \
src/signif.h \
src/stats.h \
src/subset.h \
src/utils.h
SOURCES += \
src/analysis.cpp \
src/depth.cpp \
src/distrib.cpp \
src/freq.cpp \
src/main.cpp \
src/map.cpp \
src/marker.cpp \
src/markers_table.cpp \
src/popmap.cpp \
src/process.cpp \
src/signif.cpp \
src/stats.cpp \
include/kfun/kfun.cpp
include/kfun/kfun.cpp \
src/subset.cpp
......@@ -78,8 +78,7 @@ rebuild-all:
$(MAKE) -j $(JOBS)
# Linking
# $(BIN)/radsex: $(OBJS) $(INCLUDE)/kfun/kfun.o
$(BIN)/radsex: build/stats.o build/main.o build/analysis.o build/marker.o build/popmap.o build/markers_table.o build/depth.o $(INCLUDE)/kfun/kfun.o
$(BIN)/radsex: $(OBJS) $(INCLUDE)/kfun/kfun.o
$(CC) $(CFLAGS) -I $(INCLUDE) -o $(BIN)/radsex $^ $(LDFLAGS)
# Build a single object file. Added libs as dependency so they are built before object files
......
/*
* Copyright (C) 2020 Romain Feron
* This file is part of RADSex.
* RADSex is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* RADSex is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with RADSex. If not, see <https://www.gnu.org/licenses/>.
*/
/*!
* @file analysis.cpp
* @brief Implements the Analysis base class.
*/
#include "analysis.h"
Analysis::Analysis(Parameters& parameters, bool compare_groups, bool store_groups, bool store_sequence) : markers_table(&parameters) {
this->parameters = parameters;
......@@ -10,12 +34,15 @@ Analysis::Analysis(Parameters& parameters, bool compare_groups, bool store_group
this->t_begin = std::chrono::steady_clock::now(); // Record starting time
(parameters.popmap_file_path != "") ? this->popmap = Popmap(parameters, compare_groups) : this->popmap = Popmap(); // Load popmap if required by the analysis
(parameters.popmap_file_path != "") ? this->popmap = Popmap(this->parameters, this->compare_groups) : this->popmap = Popmap(); // Load popmap if required by the analysis
this->markers_table.set_popmap(&this->popmap);
}
void Analysis::print_starting_log() {
log("RADSex <" + this->parameters.command + "> started");
......@@ -25,6 +52,8 @@ void Analysis::print_starting_log() {
void Analysis::print_groups() {
log("Comparing groups \"" + this->parameters.group1 + "\" and \"" + this->parameters.group2 + "\"");
......@@ -33,6 +62,8 @@ void Analysis::print_groups() {
void Analysis::print_ending_log() {
log("RADSex <" + this->parameters.command + "> ended (total runtime: " + get_runtime(t_begin) + ")");
......@@ -41,17 +72,19 @@ void Analysis::print_ending_log() {
void Analysis::run() {
this->print_starting_log();
this->extra_setup();
// Execute parsing and processing threads
this->parsing_thread = std::thread(&MarkersTable::start_parser, std::ref(this->markers_table), store_groups, store_sequence); // Initialize parsing thread
this->parsing_thread = std::thread(&MarkersTable::start_parser, std::ref(this->markers_table), this->store_sequence, this->store_groups); // Initialize parsing thread
this->processing_thread = std::thread(&Analysis::processor, this); // Initialize processing thread with the processor method of the current instance
this->parsing_thread.join();
// this->processing_thread.join();
this->processing_thread.join();
this->generate_output();
this->print_ending_log();
......@@ -59,6 +92,8 @@ void Analysis::run() {
void Analysis::processor() {
std::vector<Marker> batch; // Store a batch of markers
......
/*
* Copyright (C) 2020 Romain Feron
* This file is part of RADSex.
* RADSex is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* RADSex is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with RADSex. If not, see <https://www.gnu.org/licenses/>.
*/
/*!
* @file analysis.h
* @brief Defines the Analysis base class.
*/
#pragma once
#include "markers_table.h"
#include "parameters.h"
#include "popmap.h"
#include "stats.h"
#include "markers_table.h"
#include "utils.h"
/*!
* \brief Base class for analyses subclasses.
*
* This class implements methods and attributes shared by all analyses:
*
* - Load parameters, load popmap, and initialize markers table parser in the constructor
* - Start markers table parsing thread and marker processing thread in run()
* - Read batches of markers from the markers queue in processor()
*
* Three virtual methods are defined to implement analyses-specific behaviour:
*
* - extra_setup(): called at the beginning of run(), implement analysis-specific variables setup (e.g. BWA index in map function)
* - process_marker(): called for each marker obtained from the markers queue, implement what to do with a marker
* - generate_output(): called at the end of run(), implement generating output file when needed
*
*/
class Analysis {
public:
std::chrono::steady_clock::time_point t_begin;
std::chrono::steady_clock::time_point t_begin; ///< Record analysis starting time
Parameters parameters; ///< Store RADSex parameters
bool compare_groups; ///< Flag indicating whether groups should be checked when loading the popmap
bool store_groups; ///< Flag indicating whether group info should be computed for each marker when parsing the markers table
bool store_sequence; ///< Flag indicating whether marker sequences should be stored for each marker when parsing the markers table
Parameters parameters;
bool compare_groups;
bool store_groups;
bool store_sequence;
Popmap popmap; ///< Popmap instance storing group for each individual
MarkersTable markers_table; ///< Markers table instance implementing the markers table parser and markers queue
Popmap popmap;
MarkersTable markers_table;
std::thread parsing_thread; ///< Markers table parsing thread
std::thread processing_thread; ///< Markers processing thread
std::thread parsing_thread;
std::thread processing_thread;
Analysis();
Analysis(Parameters& parameters, bool check_groups, bool store_groups, bool store_sequence);
/*!
* \brief Default Analysis constructor
*/
Analysis() {};
/*!
* \brief Standard Analysis constructor
*
* Initialize MarkersTable instance, load parameters and popmap
*
* \param parameters Parameters object storing the value of all RADSex parameters
* \param compare_groups If true, groups are checked when loading the popmap
* \param store_groups If true, group info is computed for each marker when parsing the markers table
* \param store_sequence If true, marker sequences are stored for each marker when parsing the markers table
*/
Analysis(Parameters& parameters, bool compare_groups, bool store_groups, bool store_sequence);
/*!
* \brief Print analysis starting log
*
* Print starting log message with the name of the analysis and group counts for analyses that make use of groups
*/
void print_starting_log();
/*!
* \brief Print group counts
*
* Print log message with group counts for groups compared by an analysis
*/
void print_groups();
/*!
* \brief Print analysis ending log
*
* Print ending log message with total runtime
*/
void print_ending_log();
/*!
* \brief Process batches of markers
*
* Get a batch of markers from the markers queue and call process_marker() on each marker. \n
* Wait if queue is empty and update progress bar with processed markers.
*/
void processor();
/*!
* \brief Run the analysis
*
* Start the parsing and processing threads and print logs at start and end. Call extra_setup() and the start and generate_output() at the end.
*/
void run();
/*!
* \brief Extra setup steps
*
* Function to be overloaded to implement extra setup steps required by an analysis, called at the beginning of run().
*/
virtual void extra_setup() {};
virtual void process_marker(Marker& marker) {};
/*!
* \brief Process a single marker
*
* Function to be overloaded to implement processing of a single marker in an analysis, called on each marker by processor().
*/
virtual void process_marker(Marker&) {};
/*!
* \brief Generate output
*
* Function to be overloaded to implement output file generation when required by an analysis, called at the end of run().
*/
virtual void generate_output() {};
};
......@@ -16,7 +16,10 @@
* along with RADSex. If not, see <https://www.gnu.org/licenses/>.
*/
/** @file */
/*!
* @file arg_parser.h
* @brief Implements CLI11 overriding classes for CLI parsing.
*/
#pragma once
#include <iostream>
......@@ -25,6 +28,7 @@
#include "parameters.h"
#include "utils.h"
/*!
* \brief Print CLI parsing failure message
*
......@@ -60,7 +64,6 @@ inline std::string failure_message(const CLI::App* parser, const CLI::Error& err
/*!
* \brief Custom CLI11 Formatter
*
......@@ -74,6 +77,7 @@ class CustomFormatter : public CLI::Formatter {
uint column_widths[3] {0, 0, 0}; ///< Maximum width of each column, in order: flags, type, description
uint border_width = 4; ///< Define number of spaces between two columns
/*!
* \brief Format help message for an option
*
......@@ -124,13 +128,11 @@ class CustomFormatter : public CLI::Formatter {
/*!
* \brief Format command description
*
* Remove command description in CLI help message (not useful).
*
* \param app Pointer to a CLI::App instance
* Remove command description in CLI help message.
* \return An empty string
*/
virtual std::string make_description(const CLI::App *app) const {
virtual std::string make_description(const CLI::App*) const {
return "";
}
......
......@@ -16,10 +16,15 @@
* along with RADSex. If not, see <https://www.gnu.org/licenses/>.
*/
/*!
* @file depth.cpp
* @brief Implements the Depth class.
*/
#include "depth.h"
Depth::Depth(Parameters& parameters, bool check_groups, bool store_groups, bool store_sequence) : Analysis(parameters, check_groups, store_groups, store_sequence) {
Depth::Depth(Parameters& parameters, bool compare_groups, bool store_groups, bool store_sequence) : Analysis(parameters, compare_groups, store_groups, store_sequence) {
this->results = DepthResults(this->markers_table.header.n_individuals);
......@@ -27,6 +32,23 @@ Depth::Depth(Parameters& parameters, bool check_groups, bool store_groups, bool
void Depth::process_marker(Marker& marker) {
for (uint i = 0; i < marker.individual_depths.size(); ++i) {
if (marker.n_individuals >= 0.75 * this->markers_table.header.n_individuals) this->results.depths[i].push_back(marker.individual_depths[i]); // Only consider markers present in at least 75% of individuals
if (marker.individual_depths[i] > 0) ++this->results.individual_markers_count[i]; // Increment total number of markers for this individual
}
}
void Depth::generate_output() {
// Generate output file
......@@ -46,8 +68,7 @@ void Depth::generate_output() {
const uint16_t max_depth = *(end - 1); // Sorted vector: maximum depth is the last element
const uint64_t total_depth = static_cast<uint64_t>(std::accumulate(start, end, 0));
// uint16_t median_depth = find_median(this->results.depths[i]);
uint16_t median_depth = 0;
const uint16_t median_depth = find_median(this->results.depths[i]);
// Output metrics
output_file << this->markers_table.header.header[i+2] << "\t" << this->popmap.get_group(this->markers_table.header.header[i + 2]) << "\t" << this->results.individual_markers_count[i] << "\t"
......@@ -56,16 +77,3 @@ void Depth::generate_output() {
}
}
void Depth::process_marker(Marker& marker) {
for (uint i = 0; i < marker.individual_depths.size(); ++i) {
if (marker.n_individuals >= 0.75 * this->markers_table.header.n_individuals) this->results.depths[i].push_back(marker.individual_depths[i]); // Only consider markers present in at least 75% of individuals
if (marker.individual_depths[i] > 0) ++this->results.individual_markers_count[i]; // Increment total number of markers for this individual
}
}
......@@ -16,7 +16,10 @@
* along with RADSex. If not, see <https://www.gnu.org/licenses/>.
*/
/** @file */
/*!
* @file depth.h
* @brief Defines the Depth class implementing the "depth" analysis.
*/
#pragma once
#include <numeric>
......@@ -29,15 +32,16 @@
/*!
* \brief DepthsResults struct
* \brief DepthResults struct
*
* Store results for the depth function.
* Store results for the "depth" analysis.
*/
struct DepthResults {
std::vector<std::vector<uint16_t>> depths; ///< Vector of size n_individuals storing the depth of each marker in each individual
std::vector<uint32_t> individual_markers_count; ///< Vector of size individuals storing the number of markers retained in each individual
std::vector<uint32_t> individual_markers_count; ///< Vector of size n_individuals storing the number of markers retained in each individual
/*!
* \brief DepthsResults default constructor
......@@ -47,7 +51,7 @@ struct DepthResults {
/*!
* \brief DepthsResults constructor
* \brief DepthsResults standard constructor
*
* Initialize the size of #depths and #individual_markers_count to n_individuals.
*
......@@ -66,12 +70,10 @@ struct DepthResults {
/*!
* \brief Class Depth
* \brief Implements the "depth" analysis
*
* Compute the minimum, maximum, median, and average marker depth for each individual. \n
* This function creates a parsing thread which reads a markers table file and stores markers into a queue,
* and a processing thread which reads batches of markers from the queue and compute metrics. \n
* After all markers are processed, the function generates an tabulated output file with columns: \n
* After all markers are processed, generate a tabulated output file with columns: \n
* Individual | Group | Markers | Retained | Min_depth | Max_depth | Median_depth | Average_depth
*
*/
......@@ -80,18 +82,49 @@ class Depth: public Analysis {
public:
/*!
* \brief Default Depth constructor
*/
Depth();
Depth(Parameters& parameters, bool check_groups, bool store_groups, bool store_sequence);
/*!
* \brief Standard Depth constructor
*
* Initialize the DepthResults instance.
*
* \param parameters Parameters object storing the value of all RADSex parameters
* \param compare_groups If true, groups are checked when loading the popmap
* \param store_groups If true, group info is computed for each marker when parsing the markers table
* \param store_sequence If true, marker sequences are stored for each marker when parsing the markers table
*/
Depth(Parameters& parameters, bool compare_groups = false, bool store_groups = false, bool store_sequence = false);
/*!
* \brief Process a single marker
*
* If a marker is present in more than 75% of individuals, store individual depths in results.
*
* \param marker Current marker info stored in a Marker instance.
*/
void process_marker(Marker& marker);
/*!
* \brief generate_output
* \brief Generate the output file
*
* Compute minimum, maximum, median, and average depth for all markers retained in process_marker. \n
* Generate a tabulated output file with columns: \n
* Individual | Group | Markers | Retained | Min_depth | Max_depth | Median_depth | Average_depth
*/
void generate_output();
private:
DepthResults results; ///< Structure to store analysis results
DepthResults results; ///< DepthResults instance to store individual marker depths and total number of retained markers
};
......@@ -16,107 +16,53 @@
* along with RADSex. If not, see <https://www.gnu.org/licenses/>.
*/
#include "distrib.h"
void distrib(Parameters& parameters) {
// Collect info
const std::chrono::steady_clock::time_point t_begin = std::chrono::steady_clock::now();
const Popmap popmap = load_popmap(parameters, false);
const Header header = get_header(parameters.markers_table_path);
log("RADSex distrib started");
log("Comparing groups \"" + parameters.group1 + "\" and \"" + parameters.group2 + "\"");
// Initialize structure to store results
DistribResults results;
// Initialize parsing parameters
MarkersQueue markers_queue;
std::mutex queue_mutex;
bool parsing_ended = false;
// Execute parsing and processing threads
std::thread parsing_thread(table_parser, std::ref(parameters), std::ref(popmap), std::ref(markers_queue), std::ref(parsing_ended), true, false);
std::thread processing_thread(processor, std::ref(markers_queue), std::ref(parameters), std::ref(queue_mutex), std::ref(results), std::ref(parsing_ended), BATCH_SIZE);
parsing_thread.join();
processing_thread.join();
double chi_squared = 0;
// Compute p-values of association with group
for (uint g = 0; g <= popmap.group_counts.at(parameters.group1); ++g) {
for (uint h = 0; h <= popmap.group_counts.at(parameters.group2); ++h) {
if (g + h != 0) { // Exclude markers absent from all individuals
chi_squared = get_chi_squared(g, h, popmap.group_counts.at(parameters.group1), popmap.group_counts.at(parameters.group2));
results.distribution[g][h].second = std::min(1.0, get_chi_squared_p(chi_squared)); // p-value corrected with Bonferroni, with max value of 1
}
}
}
if (not parameters.disable_correction) parameters.signif_threshold /= results.n_markers; // Bonferroni correction: divide threshold by total number of tests (i.e. number of retained markers)
/*!
* @file distrib.cpp
* @brief Implements the Distrib class.
*/
// Generate the output file
std::ofstream output_file(parameters.output_file_path);
output_file << parameters.group1 << "\t" << parameters.group2 << "\t" << "Markers" << "\t" << "P" << "\t" << "Signif" << "\t" << "Bias" << "\n";
#include "distrib.h"
for (uint g = 0; g <= popmap.group_counts.at(parameters.group1); ++g) {
for (uint h = 0; h <= popmap.group_counts.at(parameters.group2); ++h) {
void Distrib::process_marker(Marker& marker) {
if (g + h != 0) { // Exclude markers absent from all individuals
if (marker.n_individuals > 0) {
output_file << g << "\t" << h << "\t" << results.distribution[g][h].first << "\t" << results.distribution[g][h].second << "\t"
<< (static_cast<float>(results.distribution[g][h].second) < parameters.signif_threshold ? "True" : "False") << "\t"
<< static_cast<float>(g) / static_cast<float>(popmap.group_counts.at(parameters.group1)) - (static_cast<float>(h) / static_cast<float>(popmap.group_counts.at(parameters.group2))) << "\n";
++this->results.distribution[marker.group_counts[parameters.group1]][marker.group_counts[parameters.group2]].first;
++this->results.n_markers;
}
}
}
log("RADSex distrib ended (total runtime: " + get_runtime(t_begin) + ")");
}
void processor(MarkersTable& markers_queue, Parameters& parameters, DistribResults& results, bool& parsing_ended) {
std::vector<Marker> batch; // Store a batch of markers
bool keep_going = true;
const uint marker_processed_tick = static_cast<uint>(markers_queue.n_markers / 100); // Number of markers for 1% of total markers, used in logging
uint64_t n_processed_markers = 0;
while (keep_going) {
void Distrib::generate_output() {
// Get a batch of markers from the queue
batch = get_batch(markers_queue);