Commit 3a7340ca authored by RomainFeron's avatar RomainFeron
Browse files

Added corrected p-values output to subset

parent f74a72d0
......@@ -50,7 +50,7 @@ void Signif::generate_output() {
if (not this->parameters.output_fasta) {
output_file << "#source:signif;min_depth:" << parameters.min_depth << ";signif_threshold:" << parameters.signif_threshold <<
";bonferroni:" << std::boolalpha << (not parameters.disable_correction);
";bonferroni:" << std::boolalpha << (not parameters.disable_correction) << "\n";
output_file << print_list(this->markers_table.header.header, "\t") << "\n";
}
......
......@@ -24,37 +24,65 @@
#include "subset.h"
void Subset::extra_setup() {
void Subset::process_marker(Marker& marker) {
if (marker.n_individuals > 0) ++this->results.n_markers; // Increment total number of markers for Bonferroni correction
if (marker.group_counts[parameters.group1] >= this->parameters.subset_min_group1 and
marker.group_counts[parameters.group1] <= this->parameters.subset_max_group1 and
marker.group_counts[parameters.group2] >= this->parameters.subset_min_group2 and
marker.group_counts[parameters.group2] <= this->parameters.subset_max_group2 and
marker.n_individuals >= this->parameters.subset_min_individuals and
marker.n_individuals <= this->parameters.subset_max_individuals) {
marker.p = get_p_association(marker.group_counts[parameters.group1], marker.group_counts[parameters.group2], this->popmap.get_count(parameters.group1), this->popmap.get_count(parameters.group2));
this->results.markers.push_back(marker);
}
}
this->output_file = open_output(parameters.output_file_path);
if (not parameters.output_fasta) {
void Subset::generate_output() {
std::ofstream output_file = open_output(this->parameters.output_file_path);
if (not this->parameters.output_fasta) {
output_file << "#source:subset;min_depth:" << parameters.min_depth << ";filters:[" <<
parameters.subset_min_group1 << "," << parameters.subset_max_group1 << "," <<
parameters.subset_min_group2 << "," << parameters.subset_max_group2 << "," <<
parameters.subset_min_individuals << "," << parameters.subset_max_individuals <<
"];signif_threshold:" << parameters.signif_threshold << ";bonferroni:" << std::boolalpha << (not parameters.disable_correction);
this->output_file << print_list(this->markers_table.header.header, "\t") << "\n";
}
}
if (not this->parameters.disable_correction) {
// Bonferroni correction: divide threshold by total number of tests (i.e. number of retained markers)
this->parameters.signif_threshold /= this->results.n_markers;
} else {
// Set number of markers to 1 so corrected p-values are the same as original p-values
this->results.n_markers = 1;
void Subset::process_marker(Marker& marker) {
}
if (marker.group_counts[parameters.group1] >= this->parameters.subset_min_group1 and marker.group_counts[parameters.group1] <= this->parameters.subset_max_group1 and
marker.group_counts[parameters.group2] >= this->parameters.subset_min_group2 and marker.group_counts[parameters.group2] <= this->parameters.subset_max_group2 and
marker.n_individuals >= this->parameters.subset_min_individuals and marker.n_individuals <= this->parameters.subset_max_individuals) {
// Second pass: filter markers with threshold corrected p < 0.05
for (auto marker: this->results.markers) {
marker.p = get_p_association(marker.group_counts[parameters.group1], marker.group_counts[parameters.group2], this->popmap.get_count(parameters.group1), this->popmap.get_count(parameters.group2));
marker.p_corr = std::min(1.0, marker.p * static_cast<double>(results.n_markers));
this->parameters.output_fasta ? marker.output_as_fasta(this->output_file, this->parameters.min_depth) : marker.output_as_table(this->output_file);
this->parameters.output_fasta ? marker.output_as_fasta(output_file, this->parameters.min_depth) : marker.output_as_table(output_file);
}
output_file.close();
}
......@@ -34,6 +34,20 @@
#include "utils.h"
/*!
* \brief SubsetResults struct
*
* Store results for the "subset" analysis.
*/
struct SubsetResults {
std::vector<Marker> markers; ///< Vector of markers retained after filtering
uint64_t n_markers = 0; ///< Number of markers present in at least one individual with depth higher than min_depth (for Bonferroni correction)
};
/*!
* \brief Implements the "subset" analysis
*
......@@ -70,23 +84,28 @@ class Subset: public Analysis {
/*!
* \brief Process a single marker
*
* Output markers verifying conditions defined with CLI arguments --min-group1, --max-group1, --min-group2, --max-group1, --min-individuals, and --max-individuals.
* Store markers verifying conditions defined with CLI arguments
* --min-group1, --max-group1, --min-group2, --max-group1, --min-individuals, and --max-individuals.
*
* \param marker Current marker info stored in a Marker instance.
*/
void process_marker(Marker& marker);
/*!
* \brief Extra setup steps
* \brief Generate the output file
*
* Open output file and print header if not a FASTA file.
* Apply Bonferroni correction (if not disabled) to retained markers p-values. \n
* Export markers verifying the conditions defined with CLI arguments, either: \n
* - a markers depth table, i.e. tabulated file for columns "ID | Sequence | Depth in individual 1 | .... | Depth in individual N"
* - a FASTA file with headers formatted as
* "<id>_<group1:count>_<group2:count>_p:<p-value of association with group>_pcorr:<corrected p-value>_mindepth:<minimum depth to consider a marker present in the analysis>"
*/
void extra_setup();
void generate_output();
private:
SubsetResults results;
std::ofstream output_file; ///< Output file stream
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment