Commit 949a1144 authored by Romain Feron's avatar Romain Feron
Browse files

Refactored chi squared calculation into a separate file

parent 4cbeeac0
......@@ -116,34 +116,3 @@ void significant_sequences(Parameters& parameters) {
input_file.close();
}
}
double get_chi_squared_p(double chi_squared) {
/* p is given by 1 - P(chi_squared, df) where P is the Cumulative Distribution Function of the Chi-squared distribution.
* P is also the regularized gamma function. Here we use samtool's implementation of the regularized gamma function by Hen Li.
* Source: https://en.wikipedia.org/wiki/Chi-squared_distribution#Cumulative_distribution_function
* DF is always 1 in our case
*/
return 1 - kf_gammap(0.5, chi_squared/2);
}
double get_chi_squared(uint n_males, uint n_females, uint total_males, uint total_females) {
/* Chi squared is computed from the number of males and females with the sequence, as well as total number of males and females in the population.
* Yates correction is applied and the shortcut formula for 2x2 table is used.
* Source: https://en.wikipedia.org/wiki/Yates%27s_correction_for_continuity
*/
uint N = total_males + total_females;
uint Ns = total_males, Nf = total_females;
uint Na = n_males + n_females, Nb = total_males + total_females - n_males - n_females;
int temp = (n_males * total_females) - (n_females * total_males);
temp = std::abs(temp);
double temp2 = std::max(0.0, double(temp) - N/2);
temp2 *= temp2;
return N * temp2 / Ns / Nf / Na / Nb;
}
......@@ -7,17 +7,8 @@
#include "parameters.h"
#include "popmap_file.h"
#include "output.h"
#include "kfun/kfun.h"
#include "stats.h"
// Main function implementing the analysis
void significant_sequences(Parameters& parameters);
// Compute Chi-square from the number of males and females with a sequence as well as total males and total females
double get_chi_squared(uint n_males, uint n_females, uint total_males, uint total_females);
// Compute the p-value for a Chi-square
double get_chi_squared_p(double chi_squared);
#include "stats.h"
double get_chi_squared_p(double chi_squared) {
/* p is given by 1 - P(chi_squared, df) where P is the Cumulative Distribution Function of the Chi-squared distribution.
* P is also the regularized gamma function. Here we use samtool's implementation of the regularized gamma function by Heng Li.
* Source: https://en.wikipedia.org/wiki/Chi-squared_distribution#Cumulative_distribution_function
* DF is always 1 in our case
*/
return 1 - kf_gammap(0.5, chi_squared/2);
}
double get_chi_squared(uint n_males, uint n_females, uint total_males, uint total_females) {
/* Chi squared is computed from the number of males and females with the sequence, as well as total number of males and females in the population.
* Yates correction is applied and the shortcut formula for 2x2 table is used.
* Source: https://en.wikipedia.org/wiki/Yates%27s_correction_for_continuity
*/
uint N = total_males + total_females;
uint Ns = total_males, Nf = total_females;
uint Na = n_males + n_females, Nb = total_males + total_females - n_males - n_females;
int temp = (n_males * total_females) - (n_females * total_males);
temp = std::abs(temp);
double temp2 = std::max(0.0, double(temp) - N/2);
temp2 *= temp2;
return N * temp2 / Ns / Nf / Na / Nb;
}
#pragma once
#include "utils.h"
#include "kfun/kfun.h"
// Compute Chi-square from the number of males and females with a sequence as well as total males and total females
double get_chi_squared(uint n_males, uint n_females, uint total_males, uint total_females);
// Compute the p-value for a Chi-square
double get_chi_squared_p(double chi_squared);
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment