Commit f3ac4bdb authored by Romain Feron's avatar Romain Feron
Browse files

Implemented parameters to control significance threshold and to disable bonferroni correction

parent cc72651f
......@@ -86,9 +86,9 @@ void distrib(Parameters& parameters) {
// Generate the output file
if (!parameters.output_matrix) {
output_sex_distribution(parameters.output_file_path, results, n_males, n_females);
output_distrib(parameters.output_file_path, results, n_males, n_females, parameters.signif_threshold, parameters.disable_correction);
} else {
output_sex_distribution_matrix(parameters.output_file_path, results, n_males, n_females);
output_distrib_matrix(parameters.output_file_path, results, n_males, n_females);
}
}
}
......@@ -175,7 +175,7 @@ void map(Parameters& parameters) {
} while (input_file);
// Generate the output file
output_mapping(parameters.output_file_path, sequences);
output_map(parameters.output_file_path, sequences, parameters.signif_threshold, parameters.disable_correction);
input_file.close();
free(opt);
......
#include "output.h"
void output_process_reads(std::string& output_file_path, std::vector<std::string>& individuals, std::unordered_map<std::string, std::unordered_map<std::string, uint16_t>>& results, uint min_depth) {
void output_process(std::string& output_file_path, std::vector<std::string>& individuals, std::unordered_map<std::string, std::unordered_map<std::string, uint16_t>>& results, uint min_depth) {
/* Input:
* - Path to an output file
......@@ -48,7 +48,7 @@ void output_process_reads(std::string& output_file_path, std::vector<std::string
void output_sex_distribution_matrix(std::string& output_file_path, sd_table& results, uint n_males, uint n_females) {
void output_distrib_matrix(std::string& output_file_path, sd_table& results, uint n_males, uint n_females) {
/* Input:
* - Path to an output file
......@@ -73,14 +73,14 @@ void output_sex_distribution_matrix(std::string& output_file_path, sd_table& res
void output_sex_distribution(std::string& output_file_path, sd_table& results, uint n_males, uint n_females) {
void output_distrib(std::string& output_file_path, sd_table& results, uint n_males, uint n_females, float signif_threshold, bool disable_correction) {
/* Input:
* - Path to an output file
* - A matrix of counts [Males: [Females: Count]]
* Output: a table with the following structure:
* Number of males | Number of females | Number of sequences | P-value
* <int> | <int> | <int> | <float>
* Number of males | Number of females | Number of markers | P-value | Significance
* <int> | <int> | <int> | <float> | <bool>
*/
std::ofstream output_file;
......@@ -90,7 +90,6 @@ void output_sex_distribution(std::string& output_file_path, sd_table& results, u
output_file << "Males" << "\t" << "Females" << "\t" << "Markers" << "\t" << "P" << "\t" << "Signif" << "\n";
uint n_markers = 0;
double threshold = 0.05;
// Determine the total number of markers
for (uint f=0; f <= n_females; ++f) {
......@@ -99,13 +98,14 @@ void output_sex_distribution(std::string& output_file_path, sd_table& results, u
}
}
threshold /= n_markers;
if (not disable_correction) signif_threshold /= n_markers;
// Generate output file
for (uint m=0; m <= n_males; ++m) {
for (uint f=0; f <= n_females; ++f) {
if (f + m != 0) {
output_file << m << "\t" << f << "\t" << results[m][f].first << "\t" << results[m][f].second << "\t" << (results[m][f].second < threshold ? "True" : "False") << "\n";
output_file << m << "\t" << f << "\t" << results[m][f].first << "\t" << results[m][f].second << "\t" <<
(static_cast<float>(results[m][f].second) < signif_threshold ? "True" : "False") << "\n";
}
}
}
......@@ -113,7 +113,7 @@ void output_sex_distribution(std::string& output_file_path, sd_table& results, u
void output_group_loci(std::string& output_file_path, std::unordered_map<std::string, std::vector<Locus>>& results, std::vector<std::string>& header) {
void output_loci(std::string& output_file_path, std::unordered_map<std::string, std::vector<Locus>>& results, std::vector<std::string>& header) {
/* Input:
* - Path to an output file
......@@ -171,18 +171,18 @@ void output_group_loci(std::string& output_file_path, std::unordered_map<std::st
// Create output file for the mapping analysis
void output_mapping(std::string& output_file_path, std::vector<MappedSequence> sequences) {
void output_map(std::string& output_file_path, std::vector<MappedSequence> sequences, float signif_threshold, bool disable_correction) {
std::ofstream output_file;
output_file.open(output_file_path);
output_file << "Sequence" << "\t" << "Contig" << "\t" << "Position" << "\t" << "SexBias" << "\t" << "P" << "\t" << "Signif" << "\n";
double threshold = 0.05 / sequences.size();
if (not disable_correction) signif_threshold /= sequences.size();
for (auto s: sequences) {
output_file << s.id << "\t" << s.contig << "\t" << s.position << "\t" << s.sex_bias << "\t" << s.p << "\t" <<
(s.p < threshold ? "True" : "False") << "\n";
(static_cast<float>(s.p) < signif_threshold ? "True" : "False") << "\n";
}
output_file.close();
......
......@@ -8,16 +8,16 @@
#include "utils.h"
// Generate an output file for "process"
void output_process_reads(std::string& output_file_path, std::vector<std::string>& individuals, std::unordered_map<std::string, std::unordered_map<std::string, uint16_t>>& results, uint min_depth);
void output_process(std::string& output_file_path, std::vector<std::string>& individuals, std::unordered_map<std::string, std::unordered_map<std::string, uint16_t>>& results, uint min_depth);
// Generate an output file for "distrib" in matrix format
void output_sex_distribution_matrix(std::string& output_file_path, sd_table& results, uint n_males, uint n_females);
void output_distrib_matrix(std::string& output_file_path, sd_table& results, uint n_males, uint n_females);
// Generate an output file for "distrib" in table format
void output_sex_distribution(std::string& output_file_path, sd_table& results, uint n_males, uint n_females);
void output_distrib(std::string& output_file_path, sd_table& results, uint n_males, uint n_females, float signif_threshold, bool disable_correction);
// Generate an output file for "loci"
void output_group_loci(std::string& output_file_path, std::unordered_map<std::string, std::vector<Locus>>& results, std::vector<std::string>& header);
void output_loci(std::string& output_file_path, std::unordered_map<std::string, std::vector<Locus>>& results, std::vector<std::string>& header);
// Generate an output file for "map"
void output_mapping(std::string& output_file_path, std::vector<MappedSequence> sequences);
void output_map(std::string& output_file_path, std::vector<MappedSequence> sequences, float signif_threshold, bool disable_correction);
......@@ -19,7 +19,7 @@ struct Parameters {
uint n_threads = 1;
uint min_depth = 1;
float signif_threshold = static_cast<float>(0.05);
bool bonferroni = true;
bool disable_correction = true;
// "loci" specific parameters
uint loci_max_distance = 1;
......
......@@ -90,7 +90,7 @@ void process(Parameters& parameters) {
// Generate the output file
output_process_reads(parameters.output_file_path, individuals, results, parameters.min_depth);
output_process(parameters.output_file_path, individuals, results, parameters.min_depth);
}
......
......@@ -79,6 +79,8 @@ void RADSex::setup_distrib_parser() {
this->add_popmap(subparser);
this->add_output_file(subparser);
this->add_min_depth(subparser);
this->add_signif_threshold(subparser);
this->add_disable_correction(subparser);
this->add_output_matrix(subparser);
}
......@@ -115,6 +117,8 @@ void RADSex::setup_map_parser() {
this->add_min_depth(subparser);
this->add_map_min_quality(subparser);
this->add_map_min_frequency(subparser);
this->add_signif_threshold(subparser);
this->add_disable_correction(subparser);
}
......@@ -134,6 +138,8 @@ void RADSex::setup_signif_parser() {
this->add_popmap(subparser);
this->add_min_depth(subparser);
this->add_output_fasta(subparser);
this->add_signif_threshold(subparser);
this->add_disable_correction(subparser);
}
......@@ -208,12 +214,12 @@ void RADSex::add_min_depth(CLI::App* subparser) {
}
void RADSex::add_signif_threshold(CLI::App* subparser) {
CLI::Option* option = subparser->add_option("-S,--signif-threshold", this->parameters.min_depth, "Significance threshold when assessing significance", true);
option->check(CLI::Range(0, 1));
CLI::Option* option = subparser->add_option("-S,--signif-threshold", this->parameters.signif_threshold, "Significance threshold when assessing significance", true);
option->check(CLI::Range(0.0, 1.0));
}
void RADSex::add_bonferroni(CLI::App* subparser) {
subparser->add_flag("-b,--bonferroni-correction", this->parameters.min_depth, "If set, Bonferroni correction will be used when assessing significance");
void RADSex::add_disable_correction(CLI::App* subparser) {
subparser->add_flag("-C,--disable-correction", this->parameters.disable_correction, "If set, Bonferroni correction will NOT be used when assessing significance");
}
void RADSex::add_subset_min_males(CLI::App* subparser) {
......
......@@ -69,7 +69,7 @@ class RADSex {
void add_n_threads(CLI::App* subparser);
void add_min_depth(CLI::App* subparser);
void add_signif_threshold(CLI::App* subparser);
void add_bonferroni(CLI::App* subparser);
void add_disable_correction(CLI::App* subparser);
// "loci" specific parameters
void add_loci_max_distance(CLI::App* subparser);
......
......@@ -64,7 +64,7 @@ void signif(Parameters& parameters) {
++seq_count;
chi_squared = get_chi_squared(sex_count[0], sex_count[1], total_males, total_females);
p = get_chi_squared_p(chi_squared);
if (p < 0.05) { // First pass: we filter sequences with at least one male or one female and non-corrected p < 0.05
if (static_cast<float>(p) < parameters.signif_threshold) { // First pass: we filter sequences with at least one male or one female and non-corrected p < 0.05
candidate_sequences[temp_line][0] = p;
candidate_sequences[temp_line][1] = sex_count[0];
candidate_sequences[temp_line][2] = sex_count[1];
......@@ -87,11 +87,11 @@ void signif(Parameters& parameters) {
} while(input_file);
double significance_threshold = 0.05 / seq_count; // Bonferroni correction: divide threshold by number of tests
if (not parameters.disable_correction) parameters.signif_threshold /= seq_count; // Bonferroni correction: divide threshold by number of tests
// Second pass: filter with bonferroni
for (auto sequence: candidate_sequences) {
if (sequence.second[0] < significance_threshold) {
if (static_cast<float>(sequence.second[0]) < parameters.signif_threshold) {
if (parameters.output_fasta) {
line = split(sequence.first, "\t");
output_file << ">" << line[0] << "_" << int(sequence.second[1]) << "M_" << int(sequence.second[2]) << "F_cov:" << parameters.min_depth << "_p:" << sequence.second[0] << "\n" << line[1] << "\n";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment