Commit cc72651f authored by Romain Feron's avatar Romain Feron
Browse files

Major code cleanup. Rewrote CLI and parameters system entirely using CLI11.

CLI should be frozen until 1.0.0 hopefully. Setup for implementing the last changes before 1.0.0
parent e86c5826
This diff is collapsed.
CLI11 1.7 Copyright (c) 2017-2019 University of Cincinnati, developed by Henry
Schreiner under NSF AWARD 1414736. All rights reserved.
Redistribution and use in source and binary forms of CLI11, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Compiler options
CC = g++
OPTCFLAGS = -O2
CFLAGS = -Wall -std=c++11 $(OPTCFLAGS)
CFLAGS = -Wall -Wno-maybe-uninitialized -std=c++11 $(OPTCFLAGS)
LDFLAGS = -pthread -static-libstdc++ -lz
# Directory organisation
......
......@@ -4,9 +4,8 @@ void depth(Parameters& parameters) {
std::unordered_map<std::string, bool> popmap = load_popmap(parameters);
std::string par = "input_file_path";
std::ifstream input_file;
input_file.open(parameters.get_value_from_name<std::string>(par));
input_file.open(parameters.markers_table_path);
if (input_file.is_open()) {
......@@ -17,19 +16,7 @@ void depth(Parameters& parameters) {
header = split(temp, "\t");
// Map with column number --> index of sex_count (0 = male, 1 = female, 2 = no sex)
std::unordered_map<uint, uint> sex_columns;
for (uint i=0; i<header.size(); ++i) {
if (popmap.find(header[i]) != popmap.end()) {
if (popmap[header[i]]) {
sex_columns[i] = 0; // Male --> column 0
} else {
sex_columns[i] = 1; // Female --> column 1
}
} else {
sex_columns[i] = 2; // First and second columns (id and sequence) are counted as no sex
}
}
std::unordered_map<uint, uint> sex_columns = get_column_sex(popmap, line);
std::unordered_map<std::string, uint> individual_depths;
......@@ -42,29 +29,29 @@ void depth(Parameters& parameters) {
// Read a chunk of size given by the buffer
input_file.read(buffer, sizeof(buffer));
k = input_file.gcount();
k = static_cast<uint>(input_file.gcount());
for (uint i=0; i<k; ++i) {
// Read the buffer character by character
switch(buffer[i]) {
case '\r':
break;
case '\t': // New field
if (sex_columns[field_n] != 2) individual_depths[header[field_n]] += std::stoul(temp); // Increment the appropriate counter
temp = "";
++field_n;
break;
case '\n': // New line (also a new field)
if (sex_columns[field_n] != 2) individual_depths[header[field_n]] += std::stoul(temp); // Increment the appropriate counter
// Reset variables
temp = "";
field_n = 0;
break;
default:
temp += buffer[i];
break;
case '\t': // New field
if (sex_columns[field_n] != 2) individual_depths[header[field_n]] += std::stoul(temp); // Increment the appropriate counter
temp = "";
++field_n;
break;
case '\n': // New line (also a new field)
if (sex_columns[field_n] != 2) individual_depths[header[field_n]] += std::stoul(temp); // Increment the appropriate counter
// Reset variables
temp = "";
field_n = 0;
break;
default:
temp += buffer[i];
break;
}
}
......@@ -72,10 +59,8 @@ void depth(Parameters& parameters) {
input_file.close();
par = "output_file_path";
std::string output_file_path = parameters.get_value_from_name<std::string>(par);
std::ofstream output_file;
output_file.open(output_file_path);
output_file.open(parameters.output_file_path);
if (output_file.is_open()) {
......
#pragma once
#include <string>
#include <vector>
#include <fstream>
#include <string>
#include <unordered_map>
#include "utils.h"
#include <vector>
#include "output.h"
#include "parameters.h"
#include "popmap.h"
#include "output.h"
#include "utils.h"
// Main function implementing the analysis
// Calculate the number of reads retained in each individual
void depth(Parameters& parameters);
......@@ -13,16 +13,9 @@ void distrib(Parameters& parameters) {
// Find number of males and females
uint n_males = 0, n_females = 0;
for (auto i: popmap) if (i.second) ++n_males; else ++n_females;
// Extra increment for easier comparison
++n_males;
++n_females;
std::string par = "input_file_path";
std::ifstream input_file;
input_file.open(parameters.get_value_from_name<std::string>(par));
par = "min_cov";
int min_cov = parameters.get_value_from_name<int>(par) - 1; // -1 allows comparison with > instead of >=
input_file.open(parameters.markers_table_path);
if (input_file) {
......@@ -33,19 +26,7 @@ void distrib(Parameters& parameters) {
line = split(temp, "\t");
// Map with column number --> index of sex_count (0 = male, 1 = female, 2 = no sex)
std::unordered_map<uint, uint> sex_columns;
for (uint i=0; i<line.size(); ++i) {
if (popmap.find(line[i]) != popmap.end()) {
if (popmap[line[i]]) {
sex_columns[i] = 0; // Male --> column 0
} else {
sex_columns[i] = 1; // Female --> column 1
}
} else {
sex_columns[i] = 2; // First and second columns (id and sequence) are counted as no sex
}
}
std::unordered_map<uint, uint> sex_columns = get_column_sex(popmap, line);
// Define variables used to read the file
char buffer[65536];
......@@ -57,32 +38,32 @@ void distrib(Parameters& parameters) {
// Read a chunk of size given by the buffer
input_file.read(buffer, sizeof(buffer));
k = input_file.gcount();
k = static_cast<uint>(input_file.gcount());
for (uint i=0; i<k; ++i) {
// Read the buffer character by character
switch(buffer[i]) {
case '\r':
break;
case '\t': // New field
if (sex_columns[field_n] != 2 and std::stoi(temp) > min_cov) ++sex_count[sex_columns[field_n]]; // Increment the appropriate counter
temp = "";
++field_n;
break;
case '\n': // New line (also a new field)
if (sex_columns[field_n] != 2 and std::stoi(temp) > min_cov) ++sex_count[sex_columns[field_n]]; // Increment the appropriate counter
++results[sex_count[0]][sex_count[1]].first; // Update the results
// Reset variables
temp = "";
field_n = 0;
sex_count[0] = 0;
sex_count[1] = 0;
break;
default:
temp += buffer[i];
break;
case '\t': // New field
if (sex_columns[field_n] != 2 and static_cast<uint>(std::stoi(temp)) >= parameters.min_depth) ++sex_count[sex_columns[field_n]]; // Increment the appropriate counter
temp = "";
++field_n;
break;
case '\n': // New line (also a new field)
if (sex_columns[field_n] != 2 and static_cast<uint>(std::stoi(temp)) >= parameters.min_depth) ++sex_count[sex_columns[field_n]]; // Increment the appropriate counter
++results[sex_count[0]][sex_count[1]].first; // Update the results
// Reset variables
temp = "";
field_n = 0;
sex_count[0] = 0;
sex_count[1] = 0;
break;
default:
temp += buffer[i];
break;
}
}
......@@ -94,8 +75,8 @@ void distrib(Parameters& parameters) {
double chi_squared = 0;
// Compute p-values
for (uint f=0; f < n_females; ++f) {
for (uint m=0; m < n_males; ++m) {
for (uint f=0; f <= n_females; ++f) {
for (uint m=0; m <= n_males; ++m) {
if (f + m != 0) {
chi_squared = get_chi_squared(m, f, n_males, n_females);
results[m][f].second = std::min(1.0, get_chi_squared_p(chi_squared)); // p-value corrected with Bonferroni, with max of 1
......@@ -103,17 +84,11 @@ void distrib(Parameters& parameters) {
}
}
par = "output_file_path";
std::string output_file_path = parameters.get_value_from_name<std::string>(par);
par = "output_matrix";
bool output_matrix = parameters.get_value_from_name<bool>(par);
// Generate the output file
if (!output_matrix) {
output_sex_distribution(output_file_path, results, n_males, n_females);
if (!parameters.output_matrix) {
output_sex_distribution(parameters.output_file_path, results, n_males, n_females);
} else {
output_sex_distribution_matrix(output_file_path, results, n_males, n_females);
output_sex_distribution_matrix(parameters.output_file_path, results, n_males, n_females);
}
}
}
#pragma once
#include <string>
#include <vector>
#include <fstream>
#include <string>
#include <unordered_map>
#include "utils.h"
#include <vector>
#include "output.h"
#include "parameters.h"
#include "popmap.h"
#include "output.h"
#include "utils.h"
// Main function implementing the analysis
// Compute the distribution of markers between males and females
void distrib(Parameters& parameters);
......@@ -7,22 +7,13 @@ void freq(Parameters &parameters) {
* Number of individuals | Number of markers found in this number of individuals
*/
std::string par = "input_file_path";
std::ifstream input_file;
input_file.open(parameters.get_value_from_name<std::string>(par));
par = "min_cov";
int min_cov = parameters.get_value_from_name<int>(par) - 1; // -1 allows comparison with > instead of >=
input_file.open(parameters.markers_table_path);
if (input_file) {
std::map<uint, uint> results; // Results --> {Frequency: count}
par = "output_file_path";
std::ofstream output_file;
output_file.open(parameters.get_value_from_name<std::string>(par));
output_file << "Frequency" << "\t" << "Count" << "\n"; // Header of output file
// First line of input file is the header, which is not used in this analysis
std::string temp = "";
std::getline(input_file, temp);
......@@ -35,36 +26,40 @@ void freq(Parameters &parameters) {
// Read a chunk of size given by the buffer
input_file.read(buffer, sizeof(buffer));
k = input_file.gcount();
k = static_cast<uint>(input_file.gcount());
for (uint i=0; i<k; ++i) {
// Read the buffer character by character
switch(buffer[i]) {
case '\r':
break;
case '\t': // New field
if (field_n > 1 and std::stoi(temp) > min_cov) ++count; // Individual fields start at 2
temp = "";
++field_n;
break;
case '\n': // New line (also a new field)
if (field_n > 1 and std::stoi(temp) > min_cov) ++count; // Individual fields start at 2
++results[count];
// Reset variables
temp = "";
field_n = 0;
count = 0;
break;
default:
temp += buffer[i];
break;
case '\t': // New field
if (field_n > 1 and static_cast<uint>(std::stoi(temp)) >= parameters.min_depth) ++count; // Individual fields start at 2
temp = "";
++field_n;
break;
case '\n': // New line (also a new field)
if (field_n > 1 and static_cast<uint>(std::stoi(temp)) >= parameters.min_depth) ++count; // Individual fields start at 2
++results[count];
// Reset variables
temp = "";
field_n = 0;
count = 0;
break;
default:
temp += buffer[i];
break;
}
}
} while (input_file);
std::ofstream output_file;
output_file.open(parameters.output_file_path);
output_file << "Frequency" << "\t" << "Count" << "\n"; // Header of output file
// Cannot iterate over the map normally as it
uint n_individuals = results.rbegin()->first + 1; // Find the maximum number of individuals
for (uint i=1; i < n_individuals; ++i) output_file << i << "\t" << results[i] << "\n"; // Iterate over the map
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment