Commit 053f9f20 authored by RomainFeron's avatar RomainFeron
Browse files

Added corrected p-values in output

parent f93730e3
...@@ -41,11 +41,22 @@ void Distrib::process_marker(Marker& marker) { ...@@ -41,11 +41,22 @@ void Distrib::process_marker(Marker& marker) {
void Distrib::generate_output() { void Distrib::generate_output() {
if (not this->parameters.disable_correction) this->parameters.signif_threshold /= this->results.n_markers; // Bonferroni correction: divide threshold by total number of tests (i.e. number of retained markers) if (not this->parameters.disable_correction) {
// Bonferroni correction: divide threshold by total number of tests (i.e. number of retained markers)
this->parameters.signif_threshold /= this->results.n_markers;
} else {
// Set number of markers to 1 so corrected p-values are the same as original p-values
this->results.n_markers = 1;
}
// Generate the output file // Generate the output file
std::ofstream output_file = open_output(this->parameters.output_file_path); std::ofstream output_file = open_output(this->parameters.output_file_path);
output_file << this->parameters.group1 << "\t" << this->parameters.group2 << "\t" << "Markers" << "\t" << "P" << "\t" << "Signif" << "\t" << "Bias" << "\n"; output_file << this->parameters.group1 << "\t" << this->parameters.group2 << "\t" << "Markers" << "\t" <<
"P" << "\t" << "CorrectedP" << "\t" << "Signif" << "\t" << "Bias" << "\n";
for (uint g = 0; g <= this->popmap.get_count(this->parameters.group1); ++g) { for (uint g = 0; g <= this->popmap.get_count(this->parameters.group1); ++g) {
...@@ -56,6 +67,7 @@ void Distrib::generate_output() { ...@@ -56,6 +67,7 @@ void Distrib::generate_output() {
this->results.distribution[g][h].second = get_p_association(g, h, this->popmap.get_count(parameters.group1), this->popmap.get_count(parameters.group2)); this->results.distribution[g][h].second = get_p_association(g, h, this->popmap.get_count(parameters.group1), this->popmap.get_count(parameters.group2));
output_file << g << "\t" << h << "\t" << this->results.distribution[g][h].first << "\t" << this->results.distribution[g][h].second << "\t" output_file << g << "\t" << h << "\t" << this->results.distribution[g][h].first << "\t" << this->results.distribution[g][h].second << "\t"
<< std::min(1.0, this->results.distribution[g][h].second * this->results.n_markers) << "\t"
<< (static_cast<float>(this->results.distribution[g][h].second) < this->parameters.signif_threshold ? "True" : "False") << "\t" << (static_cast<float>(this->results.distribution[g][h].second) < this->parameters.signif_threshold ? "True" : "False") << "\t"
<< static_cast<float>(g) / static_cast<float>(this->popmap.get_count(this->parameters.group1)) - (static_cast<float>(h) / static_cast<float>(this->popmap.get_count(this->parameters.group2))) << "\n"; << static_cast<float>(g) / static_cast<float>(this->popmap.get_count(this->parameters.group1)) - (static_cast<float>(h) / static_cast<float>(this->popmap.get_count(this->parameters.group2))) << "\n";
......
...@@ -38,10 +38,6 @@ void Map::extra_setup() { ...@@ -38,10 +38,6 @@ void Map::extra_setup() {
// Compute contig lengths from genome file // Compute contig lengths from genome file
this->load_contig_lengths(); this->load_contig_lengths();
// Open output file and output the header
this->output_file = open_output(this->parameters.output_file_path);
this->output_file << "Contig\tPosition\tLength\tMarker_id\tBias\tP\tSignif\n";
} }
...@@ -105,12 +101,27 @@ void Map::process_marker(Marker& marker) { ...@@ -105,12 +101,27 @@ void Map::process_marker(Marker& marker) {
void Map::generate_output() { void Map::generate_output() {
if (not this->parameters.disable_correction) this->parameters.signif_threshold /= this->results.n_markers; // Open output file and output the header
this->output_file = open_output(this->parameters.output_file_path);
this->output_file << "Contig\tPosition\tLength\tMarker_id\tBias\tP\tPcorr\tSignif\n";
if (not this->parameters.disable_correction) {
// Bonferroni correction: divide threshold by total number of tests (i.e. number of retained markers)
this->parameters.signif_threshold /= this->results.n_markers;
} else {
// Set number of markers to 1 so corrected p-values are the same as original p-values
this->results.n_markers = 1;
}
for (auto marker: this->results.aligned_markers) { for (auto marker: this->results.aligned_markers) {
output_file << marker.contig << "\t" << marker.position << "\t" << this->contig_lengths[marker.contig] << "\t" << marker.id << "\t" output_file << marker.contig << "\t" << marker.position << "\t" << this->contig_lengths[marker.contig] << "\t" << marker.id << "\t"
<< marker.bias << "\t" << marker.p << "\t" << (static_cast<float>(marker.p) < this->parameters.signif_threshold ? "True" : "False") << "\n"; << marker.bias << "\t" << marker.p << "\t" << std::min(1.0, marker.p * this->results.n_markers) << "\t"
<< (static_cast<float>(marker.p) < this->parameters.signif_threshold ? "True" : "False") << "\n";
} }
......
...@@ -69,6 +69,6 @@ void Marker::output_as_fasta(std::ofstream& output_file, const uint min_depth) c ...@@ -69,6 +69,6 @@ void Marker::output_as_fasta(std::ofstream& output_file, const uint min_depth) c
output_file << ">" << this->id; output_file << ">" << this->id;
for (auto group: this->group_counts) output_file << "_" << group.first << ":" << group.second; for (auto group: this->group_counts) output_file << "_" << group.first << ":" << group.second;
output_file << "_p:" << this->p << "_mindepth:" << min_depth << "\n" << this->sequence << "\n"; output_file << "_p:" << this->p << "_pcorr:" << this->p_corr << "_mindepth:" << min_depth << "\n" << this->sequence << "\n";
} }
...@@ -44,6 +44,7 @@ class Marker { ...@@ -44,6 +44,7 @@ class Marker {
std::unordered_map<std::string, uint> group_counts; ///< Map storing group counts for the marker {group -> number of individuals in which the marker is present for this group} std::unordered_map<std::string, uint> group_counts; ///< Map storing group counts for the marker {group -> number of individuals in which the marker is present for this group}
uint n_individuals = 0; ///< Total number of individuals in which the marker is present uint n_individuals = 0; ///< Total number of individuals in which the marker is present
float p = 0; ///< P-value of association with group float p = 0; ///< P-value of association with group
float p_corr = 0; ///< P-value of association with group after Bonferroni correction
/*! /*!
* \brief Default Marker constructor * \brief Default Marker constructor
...@@ -90,6 +91,7 @@ class Marker { ...@@ -90,6 +91,7 @@ class Marker {
* *
* \param output_file Output file stream * \param output_file Output file stream
* \param min_depth Minimum depth to consider a marker present in the current analysis * \param min_depth Minimum depth to consider a marker present in the current analysis
* \param n_markers Number of markers retained from the dataset to compute corrected p-value
*/ */
void output_as_fasta(std::ofstream& output_file, const uint min_depth) const; void output_as_fasta(std::ofstream& output_file, const uint min_depth) const;
......
...@@ -47,15 +47,33 @@ void Signif::generate_output() { ...@@ -47,15 +47,33 @@ void Signif::generate_output() {
std::ofstream output_file = open_output(this->parameters.output_file_path); std::ofstream output_file = open_output(this->parameters.output_file_path);
if (not this->parameters.output_fasta) output_file << print_list(this->markers_table.header.header, "\t") << "\n"; if (not this->parameters.output_fasta) {
if (not this->parameters.disable_correction) this->parameters.signif_threshold /= this->results.n_markers; // Bonferroni correction: divide threshold by number of tests output_file << "#source:signif;min_depth:" << parameters.min_depth << ";signif_threshold:" << parameters.signif_threshold <<
";bonferroni:" << std::boolalpha << (not parameters.disable_correction);
output_file << print_list(this->markers_table.header.header, "\t") << "\n";
}
if (not this->parameters.disable_correction) {
// Bonferroni correction: divide threshold by total number of tests (i.e. number of retained markers)
this->parameters.signif_threshold /= this->results.n_markers;
} else {
// Set number of markers to 1 so corrected p-values are the same as original p-values
this->results.n_markers = 1;
}
// Second pass: filter markers with threshold corrected p < 0.05 // Second pass: filter markers with threshold corrected p < 0.05
for (auto marker: this->results.candidate_markers) { for (auto marker: this->results.candidate_markers) {
if (static_cast<float>(marker.p) < this->parameters.signif_threshold) { if (static_cast<float>(marker.p) < this->parameters.signif_threshold) {
marker.p_corr = std::min(1.0, marker.p * static_cast<double>(results.n_markers));
this->parameters.output_fasta ? marker.output_as_fasta(output_file, this->parameters.min_depth) : marker.output_as_table(output_file); this->parameters.output_fasta ? marker.output_as_fasta(output_file, this->parameters.min_depth) : marker.output_as_table(output_file);
} }
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment