Commit 949a1144 by Romain Feron

### Refactored chi squared calculation into a separate file

parent 4cbeeac0
 ... ... @@ -116,34 +116,3 @@ void significant_sequences(Parameters& parameters) { input_file.close(); } } double get_chi_squared_p(double chi_squared) { /* p is given by 1 - P(chi_squared, df) where P is the Cumulative Distribution Function of the Chi-squared distribution. * P is also the regularized gamma function. Here we use samtool's implementation of the regularized gamma function by Hen Li. * Source: https://en.wikipedia.org/wiki/Chi-squared_distribution#Cumulative_distribution_function * DF is always 1 in our case */ return 1 - kf_gammap(0.5, chi_squared/2); } double get_chi_squared(uint n_males, uint n_females, uint total_males, uint total_females) { /* Chi squared is computed from the number of males and females with the sequence, as well as total number of males and females in the population. * Yates correction is applied and the shortcut formula for 2x2 table is used. * Source: https://en.wikipedia.org/wiki/Yates%27s_correction_for_continuity */ uint N = total_males + total_females; uint Ns = total_males, Nf = total_females; uint Na = n_males + n_females, Nb = total_males + total_females - n_males - n_females; int temp = (n_males * total_females) - (n_females * total_males); temp = std::abs(temp); double temp2 = std::max(0.0, double(temp) - N/2); temp2 *= temp2; return N * temp2 / Ns / Nf / Na / Nb; }
 ... ... @@ -7,17 +7,8 @@ #include "parameters.h" #include "popmap_file.h" #include "output.h" #include "kfun/kfun.h" #include "stats.h" // Main function implementing the analysis void significant_sequences(Parameters& parameters); // Compute Chi-square from the number of males and females with a sequence as well as total males and total females double get_chi_squared(uint n_males, uint n_females, uint total_males, uint total_females); // Compute the p-value for a Chi-square double get_chi_squared_p(double chi_squared);
src/stats.cpp 0 → 100644
 #include "stats.h" double get_chi_squared_p(double chi_squared) { /* p is given by 1 - P(chi_squared, df) where P is the Cumulative Distribution Function of the Chi-squared distribution. * P is also the regularized gamma function. Here we use samtool's implementation of the regularized gamma function by Heng Li. * Source: https://en.wikipedia.org/wiki/Chi-squared_distribution#Cumulative_distribution_function * DF is always 1 in our case */ return 1 - kf_gammap(0.5, chi_squared/2); } double get_chi_squared(uint n_males, uint n_females, uint total_males, uint total_females) { /* Chi squared is computed from the number of males and females with the sequence, as well as total number of males and females in the population. * Yates correction is applied and the shortcut formula for 2x2 table is used. * Source: https://en.wikipedia.org/wiki/Yates%27s_correction_for_continuity */ uint N = total_males + total_females; uint Ns = total_males, Nf = total_females; uint Na = n_males + n_females, Nb = total_males + total_females - n_males - n_females; int temp = (n_males * total_females) - (n_females * total_males); temp = std::abs(temp); double temp2 = std::max(0.0, double(temp) - N/2); temp2 *= temp2; return N * temp2 / Ns / Nf / Na / Nb; }
src/stats.h 0 → 100644
 #pragma once #include "utils.h" #include "kfun/kfun.h" // Compute Chi-square from the number of males and females with a sequence as well as total males and total females double get_chi_squared(uint n_males, uint n_females, uint total_males, uint total_females); // Compute the p-value for a Chi-square double get_chi_squared_p(double chi_squared);
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment