freq.cpp 2.75 KB
Newer Older
1
#include "freq.h"
2

3
void freq(Parameters &parameters) {
4

5
6
7
8
9
10
    /* The frequencies function parses through a file generated by process_reads to compute the distribution of markers
     * between individuals. The output file has the following structure:
     * Number of individuals | Number of markers found in this number of individuals
     */

    std::ifstream input_file;
11
    input_file.open(parameters.markers_table_path);
12
13
14
15
16

    if (input_file) {

        std::map<uint, uint> results;  // Results --> {Frequency: count}

17
        std::vector<std::string> line;
18
        std::string temp = "";
19
20
21
22
23
24
25

        // First line is a comment with number of markers in the table
        std::getline(input_file, temp);
        line = split(temp, " : ");
        if (line.size() == 2) uint n_markers = static_cast<uint>(std::stoi(line[1]));

        // Second line is the header, not used in this analysis
26
        std::getline(input_file, temp);
27
        line = split(temp, "\t");
28
29
30
31
32
33
34
35
36

        // Define variables used to read the file
        char buffer[65536];
        uint k = 0, field_n = 0, count = 0;

        do {

            // Read a chunk of size given by the buffer
            input_file.read(buffer, sizeof(buffer));
37
            k = static_cast<uint>(input_file.gcount());
38
39
40
41
42
43

            for (uint i=0; i<k; ++i) {

                // Read the buffer character by character
                switch(buffer[i]) {

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
                    case '\t':  // New field
                        if (field_n > 1 and static_cast<uint>(std::stoi(temp)) >= parameters.min_depth) ++count;  // Individual fields start at 2
                        temp = "";
                        ++field_n;
                        break;

                    case '\n':  // New line (also a new field)
                        if (field_n > 1 and static_cast<uint>(std::stoi(temp)) >= parameters.min_depth) ++count;  // Individual fields start at 2
                        ++results[count];
                        // Reset variables
                        temp = "";
                        field_n = 0;
                        count = 0;
                        break;

                    default:
                        temp += buffer[i];
                        break;
62
63
64
65
66
                }
            }

        } while (input_file);

67
68
69
70
        std::ofstream output_file;
        output_file.open(parameters.output_file_path);
        output_file << "Frequency" << "\t" << "Count" << "\n"; // Header of output file

71
72
73
74
75
76
        // Cannot iterate over the map normally as it
        uint n_individuals = results.rbegin()->first + 1; // Find the maximum number of individuals
        for (uint i=1; i < n_individuals; ++i) output_file << i << "\t" << results[i] << "\n";  // Iterate over the map
        output_file.close();
        input_file.close();
    }
77

78
}