Commit f3ac4bdb authored by Romain Feron's avatar Romain Feron
Browse files

Implemented parameters to control significance threshold and to disable bonferroni correction

parent cc72651f
...@@ -86,9 +86,9 @@ void distrib(Parameters& parameters) { ...@@ -86,9 +86,9 @@ void distrib(Parameters& parameters) {
// Generate the output file // Generate the output file
if (!parameters.output_matrix) { if (!parameters.output_matrix) {
output_sex_distribution(parameters.output_file_path, results, n_males, n_females); output_distrib(parameters.output_file_path, results, n_males, n_females, parameters.signif_threshold, parameters.disable_correction);
} else { } else {
output_sex_distribution_matrix(parameters.output_file_path, results, n_males, n_females); output_distrib_matrix(parameters.output_file_path, results, n_males, n_females);
} }
} }
} }
...@@ -175,7 +175,7 @@ void map(Parameters& parameters) { ...@@ -175,7 +175,7 @@ void map(Parameters& parameters) {
} while (input_file); } while (input_file);
// Generate the output file // Generate the output file
output_mapping(parameters.output_file_path, sequences); output_map(parameters.output_file_path, sequences, parameters.signif_threshold, parameters.disable_correction);
input_file.close(); input_file.close();
free(opt); free(opt);
......
#include "output.h" #include "output.h"
void output_process_reads(std::string& output_file_path, std::vector<std::string>& individuals, std::unordered_map<std::string, std::unordered_map<std::string, uint16_t>>& results, uint min_depth) { void output_process(std::string& output_file_path, std::vector<std::string>& individuals, std::unordered_map<std::string, std::unordered_map<std::string, uint16_t>>& results, uint min_depth) {
/* Input: /* Input:
* - Path to an output file * - Path to an output file
...@@ -48,7 +48,7 @@ void output_process_reads(std::string& output_file_path, std::vector<std::string ...@@ -48,7 +48,7 @@ void output_process_reads(std::string& output_file_path, std::vector<std::string
void output_sex_distribution_matrix(std::string& output_file_path, sd_table& results, uint n_males, uint n_females) { void output_distrib_matrix(std::string& output_file_path, sd_table& results, uint n_males, uint n_females) {
/* Input: /* Input:
* - Path to an output file * - Path to an output file
...@@ -73,14 +73,14 @@ void output_sex_distribution_matrix(std::string& output_file_path, sd_table& res ...@@ -73,14 +73,14 @@ void output_sex_distribution_matrix(std::string& output_file_path, sd_table& res
void output_sex_distribution(std::string& output_file_path, sd_table& results, uint n_males, uint n_females) { void output_distrib(std::string& output_file_path, sd_table& results, uint n_males, uint n_females, float signif_threshold, bool disable_correction) {
/* Input: /* Input:
* - Path to an output file * - Path to an output file
* - A matrix of counts [Males: [Females: Count]] * - A matrix of counts [Males: [Females: Count]]
* Output: a table with the following structure: * Output: a table with the following structure:
* Number of males | Number of females | Number of sequences | P-value * Number of males | Number of females | Number of markers | P-value | Significance
* <int> | <int> | <int> | <float> * <int> | <int> | <int> | <float> | <bool>
*/ */
std::ofstream output_file; std::ofstream output_file;
...@@ -90,7 +90,6 @@ void output_sex_distribution(std::string& output_file_path, sd_table& results, u ...@@ -90,7 +90,6 @@ void output_sex_distribution(std::string& output_file_path, sd_table& results, u
output_file << "Males" << "\t" << "Females" << "\t" << "Markers" << "\t" << "P" << "\t" << "Signif" << "\n"; output_file << "Males" << "\t" << "Females" << "\t" << "Markers" << "\t" << "P" << "\t" << "Signif" << "\n";
uint n_markers = 0; uint n_markers = 0;
double threshold = 0.05;
// Determine the total number of markers // Determine the total number of markers
for (uint f=0; f <= n_females; ++f) { for (uint f=0; f <= n_females; ++f) {
...@@ -99,13 +98,14 @@ void output_sex_distribution(std::string& output_file_path, sd_table& results, u ...@@ -99,13 +98,14 @@ void output_sex_distribution(std::string& output_file_path, sd_table& results, u
} }
} }
threshold /= n_markers; if (not disable_correction) signif_threshold /= n_markers;
// Generate output file // Generate output file
for (uint m=0; m <= n_males; ++m) { for (uint m=0; m <= n_males; ++m) {
for (uint f=0; f <= n_females; ++f) { for (uint f=0; f <= n_females; ++f) {
if (f + m != 0) { if (f + m != 0) {
output_file << m << "\t" << f << "\t" << results[m][f].first << "\t" << results[m][f].second << "\t" << (results[m][f].second < threshold ? "True" : "False") << "\n"; output_file << m << "\t" << f << "\t" << results[m][f].first << "\t" << results[m][f].second << "\t" <<
(static_cast<float>(results[m][f].second) < signif_threshold ? "True" : "False") << "\n";
} }
} }
} }
...@@ -113,7 +113,7 @@ void output_sex_distribution(std::string& output_file_path, sd_table& results, u ...@@ -113,7 +113,7 @@ void output_sex_distribution(std::string& output_file_path, sd_table& results, u
void output_group_loci(std::string& output_file_path, std::unordered_map<std::string, std::vector<Locus>>& results, std::vector<std::string>& header) { void output_loci(std::string& output_file_path, std::unordered_map<std::string, std::vector<Locus>>& results, std::vector<std::string>& header) {
/* Input: /* Input:
* - Path to an output file * - Path to an output file
...@@ -171,18 +171,18 @@ void output_group_loci(std::string& output_file_path, std::unordered_map<std::st ...@@ -171,18 +171,18 @@ void output_group_loci(std::string& output_file_path, std::unordered_map<std::st
// Create output file for the mapping analysis // Create output file for the mapping analysis
void output_mapping(std::string& output_file_path, std::vector<MappedSequence> sequences) { void output_map(std::string& output_file_path, std::vector<MappedSequence> sequences, float signif_threshold, bool disable_correction) {
std::ofstream output_file; std::ofstream output_file;
output_file.open(output_file_path); output_file.open(output_file_path);
output_file << "Sequence" << "\t" << "Contig" << "\t" << "Position" << "\t" << "SexBias" << "\t" << "P" << "\t" << "Signif" << "\n"; output_file << "Sequence" << "\t" << "Contig" << "\t" << "Position" << "\t" << "SexBias" << "\t" << "P" << "\t" << "Signif" << "\n";
double threshold = 0.05 / sequences.size(); if (not disable_correction) signif_threshold /= sequences.size();
for (auto s: sequences) { for (auto s: sequences) {
output_file << s.id << "\t" << s.contig << "\t" << s.position << "\t" << s.sex_bias << "\t" << s.p << "\t" << output_file << s.id << "\t" << s.contig << "\t" << s.position << "\t" << s.sex_bias << "\t" << s.p << "\t" <<
(s.p < threshold ? "True" : "False") << "\n"; (static_cast<float>(s.p) < signif_threshold ? "True" : "False") << "\n";
} }
output_file.close(); output_file.close();
......
...@@ -8,16 +8,16 @@ ...@@ -8,16 +8,16 @@
#include "utils.h" #include "utils.h"
// Generate an output file for "process" // Generate an output file for "process"
void output_process_reads(std::string& output_file_path, std::vector<std::string>& individuals, std::unordered_map<std::string, std::unordered_map<std::string, uint16_t>>& results, uint min_depth); void output_process(std::string& output_file_path, std::vector<std::string>& individuals, std::unordered_map<std::string, std::unordered_map<std::string, uint16_t>>& results, uint min_depth);
// Generate an output file for "distrib" in matrix format // Generate an output file for "distrib" in matrix format
void output_sex_distribution_matrix(std::string& output_file_path, sd_table& results, uint n_males, uint n_females); void output_distrib_matrix(std::string& output_file_path, sd_table& results, uint n_males, uint n_females);
// Generate an output file for "distrib" in table format // Generate an output file for "distrib" in table format
void output_sex_distribution(std::string& output_file_path, sd_table& results, uint n_males, uint n_females); void output_distrib(std::string& output_file_path, sd_table& results, uint n_males, uint n_females, float signif_threshold, bool disable_correction);
// Generate an output file for "loci" // Generate an output file for "loci"
void output_group_loci(std::string& output_file_path, std::unordered_map<std::string, std::vector<Locus>>& results, std::vector<std::string>& header); void output_loci(std::string& output_file_path, std::unordered_map<std::string, std::vector<Locus>>& results, std::vector<std::string>& header);
// Generate an output file for "map" // Generate an output file for "map"
void output_mapping(std::string& output_file_path, std::vector<MappedSequence> sequences); void output_map(std::string& output_file_path, std::vector<MappedSequence> sequences, float signif_threshold, bool disable_correction);
...@@ -19,7 +19,7 @@ struct Parameters { ...@@ -19,7 +19,7 @@ struct Parameters {
uint n_threads = 1; uint n_threads = 1;
uint min_depth = 1; uint min_depth = 1;
float signif_threshold = static_cast<float>(0.05); float signif_threshold = static_cast<float>(0.05);
bool bonferroni = true; bool disable_correction = true;
// "loci" specific parameters // "loci" specific parameters
uint loci_max_distance = 1; uint loci_max_distance = 1;
......
...@@ -90,7 +90,7 @@ void process(Parameters& parameters) { ...@@ -90,7 +90,7 @@ void process(Parameters& parameters) {
// Generate the output file // Generate the output file
output_process_reads(parameters.output_file_path, individuals, results, parameters.min_depth); output_process(parameters.output_file_path, individuals, results, parameters.min_depth);
} }
......
...@@ -79,6 +79,8 @@ void RADSex::setup_distrib_parser() { ...@@ -79,6 +79,8 @@ void RADSex::setup_distrib_parser() {
this->add_popmap(subparser); this->add_popmap(subparser);
this->add_output_file(subparser); this->add_output_file(subparser);
this->add_min_depth(subparser); this->add_min_depth(subparser);
this->add_signif_threshold(subparser);
this->add_disable_correction(subparser);
this->add_output_matrix(subparser); this->add_output_matrix(subparser);
} }
...@@ -115,6 +117,8 @@ void RADSex::setup_map_parser() { ...@@ -115,6 +117,8 @@ void RADSex::setup_map_parser() {
this->add_min_depth(subparser); this->add_min_depth(subparser);
this->add_map_min_quality(subparser); this->add_map_min_quality(subparser);
this->add_map_min_frequency(subparser); this->add_map_min_frequency(subparser);
this->add_signif_threshold(subparser);
this->add_disable_correction(subparser);
} }
...@@ -134,6 +138,8 @@ void RADSex::setup_signif_parser() { ...@@ -134,6 +138,8 @@ void RADSex::setup_signif_parser() {
this->add_popmap(subparser); this->add_popmap(subparser);
this->add_min_depth(subparser); this->add_min_depth(subparser);
this->add_output_fasta(subparser); this->add_output_fasta(subparser);
this->add_signif_threshold(subparser);
this->add_disable_correction(subparser);
} }
...@@ -208,12 +214,12 @@ void RADSex::add_min_depth(CLI::App* subparser) { ...@@ -208,12 +214,12 @@ void RADSex::add_min_depth(CLI::App* subparser) {
} }
void RADSex::add_signif_threshold(CLI::App* subparser) { void RADSex::add_signif_threshold(CLI::App* subparser) {
CLI::Option* option = subparser->add_option("-S,--signif-threshold", this->parameters.min_depth, "Significance threshold when assessing significance", true); CLI::Option* option = subparser->add_option("-S,--signif-threshold", this->parameters.signif_threshold, "Significance threshold when assessing significance", true);
option->check(CLI::Range(0, 1)); option->check(CLI::Range(0.0, 1.0));
} }
void RADSex::add_bonferroni(CLI::App* subparser) { void RADSex::add_disable_correction(CLI::App* subparser) {
subparser->add_flag("-b,--bonferroni-correction", this->parameters.min_depth, "If set, Bonferroni correction will be used when assessing significance"); subparser->add_flag("-C,--disable-correction", this->parameters.disable_correction, "If set, Bonferroni correction will NOT be used when assessing significance");
} }
void RADSex::add_subset_min_males(CLI::App* subparser) { void RADSex::add_subset_min_males(CLI::App* subparser) {
......
...@@ -69,7 +69,7 @@ class RADSex { ...@@ -69,7 +69,7 @@ class RADSex {
void add_n_threads(CLI::App* subparser); void add_n_threads(CLI::App* subparser);
void add_min_depth(CLI::App* subparser); void add_min_depth(CLI::App* subparser);
void add_signif_threshold(CLI::App* subparser); void add_signif_threshold(CLI::App* subparser);
void add_bonferroni(CLI::App* subparser); void add_disable_correction(CLI::App* subparser);
// "loci" specific parameters // "loci" specific parameters
void add_loci_max_distance(CLI::App* subparser); void add_loci_max_distance(CLI::App* subparser);
......
...@@ -64,7 +64,7 @@ void signif(Parameters& parameters) { ...@@ -64,7 +64,7 @@ void signif(Parameters& parameters) {
++seq_count; ++seq_count;
chi_squared = get_chi_squared(sex_count[0], sex_count[1], total_males, total_females); chi_squared = get_chi_squared(sex_count[0], sex_count[1], total_males, total_females);
p = get_chi_squared_p(chi_squared); p = get_chi_squared_p(chi_squared);
if (p < 0.05) { // First pass: we filter sequences with at least one male or one female and non-corrected p < 0.05 if (static_cast<float>(p) < parameters.signif_threshold) { // First pass: we filter sequences with at least one male or one female and non-corrected p < 0.05
candidate_sequences[temp_line][0] = p; candidate_sequences[temp_line][0] = p;
candidate_sequences[temp_line][1] = sex_count[0]; candidate_sequences[temp_line][1] = sex_count[0];
candidate_sequences[temp_line][2] = sex_count[1]; candidate_sequences[temp_line][2] = sex_count[1];
...@@ -87,11 +87,11 @@ void signif(Parameters& parameters) { ...@@ -87,11 +87,11 @@ void signif(Parameters& parameters) {
} while(input_file); } while(input_file);
double significance_threshold = 0.05 / seq_count; // Bonferroni correction: divide threshold by number of tests if (not parameters.disable_correction) parameters.signif_threshold /= seq_count; // Bonferroni correction: divide threshold by number of tests
// Second pass: filter with bonferroni // Second pass: filter with bonferroni
for (auto sequence: candidate_sequences) { for (auto sequence: candidate_sequences) {
if (sequence.second[0] < significance_threshold) { if (static_cast<float>(sequence.second[0]) < parameters.signif_threshold) {
if (parameters.output_fasta) { if (parameters.output_fasta) {
line = split(sequence.first, "\t"); line = split(sequence.first, "\t");
output_file << ">" << line[0] << "_" << int(sequence.second[1]) << "M_" << int(sequence.second[2]) << "F_cov:" << parameters.min_depth << "_p:" << sequence.second[0] << "\n" << line[1] << "\n"; output_file << ">" << line[0] << "_" << int(sequence.second[1]) << "M_" << int(sequence.second[2]) << "F_cov:" << parameters.min_depth << "_p:" << sequence.second[0] << "\n" << line[1] << "\n";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment