loci.cpp 6.07 KB
Newer Older
1
//#include "loci.h"
2

3
//void loci(Parameters& parameters) {
4

5
//    std::mutex results_mutex;
6

7
//    std::unordered_map<std::string, std::vector<Locus>> results;
8

9
//    std::vector<std::string> header;
10

11
12
13
14
////    std::cout << " - Loading subset of loci :" << std::endl;
////    par = "input_file_path";
////    std::string input_file_path = parameters.get_value_from_name<std::string>(par);
////    std::vector<Locus> sequences = load_coverage_matrix(input_file_path, 1, false, header);
15

16
////    std::cout << " - Loading coverage table :" << std::endl;
17

18
////    std::vector<Locus> coverage_matrix = load_coverage_matrix(coverage_matrix_path, min_cov, true, header);
19

20
21
////    par = "max_distance";
////    int max_distance = parameters.get_value_from_name<int>(par) + 1; // +1 to compare with < instead of <= later
22

23
////    std::cout << std::endl << " - Starting alignments" << std::endl;
24

25
////    auto sequence = sequences.begin();
26

27
28
29
30
31
32
////    // Create and run all sequence processors
////    par = "n_threads";
////    std::vector<std::thread> threads;
////    for (int i=0; i<parameters.get_value_from_name<int>(par); ++i) {
////        threads.push_back(std::thread(sequence_processor, std::ref(sequence), std::ref(sequences), std::ref(coverage_matrix), std::ref(results), std::ref(results_mutex), max_distance));
////    }
33

34
////    for (auto &t : threads) t.join();
35

36
37
38
39
////    par = "output_file_path";
////    std::string output_file_path = parameters.get_value_from_name<std::string>(par);
////    output_group_loci(output_file_path, results, header);
//}
40
41
42



43
//std::vector<Locus> load_coverage_matrix(std::string& file_path, int min_cov, bool print, std::vector<std::string>& header, float freq_het, float freq_hom, float range_het, float range_hom) {
44

45
46
//    /* Load a coverage matrix in memory
//     */
47

48
49
//    std::ifstream input_file(file_path);
//    std::vector<Locus> coverage_matrix;
50

51
//    if (input_file) {
52

53
54
55
56
57
//        // First line is the header. The header is used when printing the output.
//        std::string temp;
//        std::getline(input_file, temp);
//        header = split(temp,"\t");
//        temp = "";
58

59
60
61
62
63
//        // Define variables used to read the file
//        char buffer[65536];
//        uint k = 0, field_n = 0, seq_count = 0;
//        Locus locus;
//        bool keep_locus = false;
64

65
//        do {
66

67
68
69
//            // Read a chunk of size given by the buffer
//            input_file.read(buffer, sizeof(buffer));
//            k = static_cast<uint>(input_file.gcount());
70

71
//            for (uint i=0; i<k; ++i) {
72

73
74
//                // Read the buffer character by character
//                switch(buffer[i]) {
75

76
//                    case '\t':  // New field
77

78
79
80
81
82
83
84
85
86
87
88
//                        switch(field_n) {
//                            case 0:
//                                locus.id = temp;
//                                break;
//                            case 1:
//                                locus.sequence =temp;
//                                break;
//                            default:
//                                locus.coverage.push_back(temp);
//                                break;
//                        }
89

90
91
92
//                        temp = "";
//                        ++field_n;
//                        break;
93

94
//                    case '\n':  // New line (also a new field)
95

96
97
//                        locus.coverage.push_back(temp);
//                        keep_locus = false;
98

99
100
101
102
103
104
//                        for (auto c: locus.coverage) {
//                            if (std::stoi(c) > min_cov) {
//                                keep_locus = true;
//                                break;
//                            }
//                        }
105

106
107
108
109
110
111
112
113
114
115
116
//                        if (keep_locus) coverage_matrix.push_back(locus);
//                        if (seq_count % 1000000 == 0 and seq_count > 0 and print) std::cout << "   - Processed " << seq_count / 1000000 <<
//                                                                                               " M. lines and stored " << coverage_matrix.size() << " sequences\n";
//                        // Reset variables
//                        locus.id = "";
//                        locus.sequence = "";
//                        locus.coverage.resize(0);
//                        temp = "";
//                        field_n = 0;
//                        ++seq_count;
//                        break;
117

118
119
120
121
122
123
124
125
126
//                    default:
//                        temp += buffer[i];
//                        break;
//                }
//            }
//        } while (input_file);
//    }
//    return coverage_matrix;
//}
127
128
129
130




131
132
//void sequence_processor(std::vector<Locus>::iterator& sequence, std::vector<Locus>& sequences, std::vector<Locus>& coverage_matrix,
//                        std::unordered_map<std::string, std::vector<Locus>>& results, std::mutex& results_mutex, int max_distance) {
133

134
//    while(sequence != sequences.end()) {
135

136
137
138
139
140
141
//        results_mutex.lock();
//        Locus locus = *sequence;
//        ++sequence;
//        results_mutex.unlock();
//        process_sequence(locus, coverage_matrix, results, results_mutex, max_distance);
//    }
142

143
//}
144
145
146



147
//void process_sequence(Locus& locus, std::vector<Locus>& coverage_matrix, std::unordered_map<std::string, std::vector<Locus>>& results, std::mutex& results_mutex, int max_distance) {
148

149
150
151
//    uint seq_len = static_cast<uint>(locus.sequence.size());
//    std::unordered_map<std::string, std::vector<Locus>> temp_results;
//    const char* seq = locus.sequence.c_str();
152

153
154
155
156
157
158
159
160
//    EdlibAlignResult alignment_result;
//    for (auto l: coverage_matrix) {
//        alignment_result = edlibAlign(seq, seq_len, l.sequence.c_str(), seq_len, edlibDefaultAlignConfig());
//        if (alignment_result.status == EDLIB_STATUS_OK and alignment_result.editDistance < max_distance) {
//            temp_results[locus.id].push_back(l);
//        }
//        edlibFreeAlignResult(alignment_result);
//    }
161

162
163
164
165
//    results_mutex.lock();
//    for (auto l: temp_results) results[l.first] = l.second;
//    results_mutex.unlock();
//}
166
167


168
//void filter(std::unordered_map<std::string, std::vector<Locus>>& results) {
169

170
//}