frequencies.cpp 2.63 KB
Newer Older
1
2
3
4
#include "frequencies.h"

void frequencies(Parameters &parameters) {

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
    /* The frequencies function parses through a file generated by process_reads to compute the distribution of markers
     * between individuals. The output file has the following structure:
     * Number of individuals | Number of markers found in this number of individuals
     */

    std::string par = "input_file_path";
    std::ifstream input_file;
    input_file.open(parameters.get_value_from_name<std::string>(par));

    par = "min_cov";
    int min_cov = parameters.get_value_from_name<int>(par) - 1; // -1 allows comparison with > instead of >=

    if (input_file) {

        std::map<uint, uint> results;  // Results --> {Frequency: count}

        par = "output_file_path";
        std::ofstream output_file;
        output_file.open(parameters.get_value_from_name<std::string>(par));
        output_file << "Frequency" << "\t" << "Count" << "\n"; // Header of output file

        // First line of input file is the header, which is not used in this analysis
        std::string temp = "";
        std::getline(input_file, temp);

        // Define variables used to read the file
        char buffer[65536];
        uint k = 0, field_n = 0, count = 0;

        do {

            // Read a chunk of size given by the buffer
            input_file.read(buffer, sizeof(buffer));
            k = input_file.gcount();

            for (uint i=0; i<k; ++i) {

                // Read the buffer character by character
                switch(buffer[i]) {

                case '\r':
                    break;
                case '\t':  // New field
                    if (field_n > 1 and std::stoi(temp) > min_cov) ++count;  // Individual fields start at 2
                    temp = "";
                    ++field_n;
                    break;
                case '\n':  // New line (also a new field)
                    if (field_n > 1 and std::stoi(temp) > min_cov) ++count;  // Individual fields start at 2
                    ++results[count];
                    // Reset variables
                    temp = "";
                    field_n = 0;
                    count = 0;
                    break;
                default:
                    temp += buffer[i];
                    break;
                }
            }

        } while (input_file);

        // Cannot iterate over the map normally as it
        uint n_individuals = results.rbegin()->first + 1; // Find the maximum number of individuals
        for (uint i=1; i < n_individuals; ++i) output_file << i << "\t" << results[i] << "\n";  // Iterate over the map
        output_file.close();
        input_file.close();
    }
74
}