#include "freq.h" void freq(Parameters ¶meters) { /* The frequencies function parses through a file generated by process_reads to compute the distribution of markers * between individuals. The output file has the following structure: * Number of individuals | Number of markers found in this number of individuals */ std::ifstream input_file; input_file.open(parameters.markers_table_path); if (input_file) { std::map results; // Results --> {Frequency: count} std::vector line; std::string temp = ""; // First line is a comment with number of markers in the table std::getline(input_file, temp); line = split(temp, " : "); if (line.size() == 2) uint n_markers = static_cast(std::stoi(line[1])); // Second line is the header, not used in this analysis std::getline(input_file, temp); line = split(temp, "\t"); // Define variables used to read the file char buffer[65536]; uint k = 0, field_n = 0, count = 0; do { // Read a chunk of size given by the buffer input_file.read(buffer, sizeof(buffer)); k = static_cast(input_file.gcount()); for (uint i=0; i 1 and static_cast(std::stoi(temp)) >= parameters.min_depth) ++count; // Individual fields start at 2 temp = ""; ++field_n; break; case '\n': // New line (also a new field) if (field_n > 1 and static_cast(std::stoi(temp)) >= parameters.min_depth) ++count; // Individual fields start at 2 ++results[count]; // Reset variables temp = ""; field_n = 0; count = 0; break; default: temp += buffer[i]; break; } } } while (input_file); std::ofstream output_file; output_file.open(parameters.output_file_path); output_file << "Frequency" << "\t" << "Count" << "\n"; // Header of output file // Cannot iterate over the map normally as it uint n_individuals = results.rbegin()->first + 1; // Find the maximum number of individuals for (uint i=1; i < n_individuals; ++i) output_file << i << "\t" << results[i] << "\n"; // Iterate over the map output_file.close(); input_file.close(); } }