Commit 6ef18e03 authored by Romain Feron's avatar Romain Feron
Browse files

Implemented frequencies analysis

parent eb9d658a
......@@ -2,4 +2,73 @@
void frequencies(Parameters &parameters) {
/* The frequencies function parses through a file generated by process_reads to compute the distribution of markers
* between individuals. The output file has the following structure:
* Number of individuals | Number of markers found in this number of individuals
*/
std::string par = "input_file_path";
std::ifstream input_file;
input_file.open(parameters.get_value_from_name<std::string>(par));
par = "min_cov";
int min_cov = parameters.get_value_from_name<int>(par) - 1; // -1 allows comparison with > instead of >=
if (input_file) {
std::map<uint, uint> results; // Results --> {Frequency: count}
par = "output_file_path";
std::ofstream output_file;
output_file.open(parameters.get_value_from_name<std::string>(par));
output_file << "Frequency" << "\t" << "Count" << "\n"; // Header of output file
// First line of input file is the header, which is not used in this analysis
std::string temp = "";
std::getline(input_file, temp);
// Define variables used to read the file
char buffer[65536];
uint k = 0, field_n = 0, count = 0;
do {
// Read a chunk of size given by the buffer
input_file.read(buffer, sizeof(buffer));
k = input_file.gcount();
for (uint i=0; i<k; ++i) {
// Read the buffer character by character
switch(buffer[i]) {
case '\r':
break;
case '\t': // New field
if (field_n > 1 and std::stoi(temp) > min_cov) ++count; // Individual fields start at 2
temp = "";
++field_n;
break;
case '\n': // New line (also a new field)
if (field_n > 1 and std::stoi(temp) > min_cov) ++count; // Individual fields start at 2
++results[count];
// Reset variables
temp = "";
field_n = 0;
count = 0;
break;
default:
temp += buffer[i];
break;
}
}
} while (input_file);
// Cannot iterate over the map normally as it
uint n_individuals = results.rbegin()->first + 1; // Find the maximum number of individuals
for (uint i=1; i < n_individuals; ++i) output_file << i << "\t" << results[i] << "\n"; // Iterate over the map
output_file.close();
input_file.close();
}
}
......@@ -2,7 +2,7 @@
#include <string>
#include <vector>
#include <fstream>
#include <unordered_map>
#include <map>
#include "utils.h"
#include "parameters.h"
#include "popmap_file.h"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment