Commit e86c5826 authored by Romain Feron's avatar Romain Feron
Browse files

added personal tests and valgrind output to gitignore

parent e62a932e
......@@ -72,7 +72,7 @@ Thumbs.db
*.exe
# Personal
test/
tests*
bin/
build/
*.pro
......@@ -80,3 +80,4 @@ build/
/include/*/*.o
/include/*/*.a
*/_build/*
callgrind*
#pragma once
#include <string>
#include "parameters.h"
struct Analysis {
/* The Analysis structure stores information about an analysis:
* - name of the analysis
* - description (used for help message)
* - parameters used in this analysis
* It also stores a pointer to the function implementing the analysis.
*/
std::string name = "";
std::string description = "";
std::vector<std::string> options;
void (*run)(Parameters&);
Analysis() {
this->name = "";
this->description = "";
this->options = {};
this->run = nullptr;
}
Analysis(std::string name, std::string description, std::vector<std::string> options, void (*run_function)(Parameters&)) {
this->name = name;
this->description = description;
this->options = options;
this->run = run_function;
}
};
#pragma once
#include <string>
#include <fstream>
#include <dirent.h>
struct Parameter {
/* The Parameter structure stores the information necessary to use and process the parameters
* This structure also offers a few methods to process parameter values
*/
// Properties of a parameter object
std::string name;
std::string description;
std::string flag;
std::string default_value;
std::string type; // Type to be displayed in help message
std::string internal_type; // Type to be used for internal value testing
std::string value;
bool required; // Whether the parameter value has to be specified by user or a default value can be used
// Constructor: set the values of the different properties
Parameter(std::string const& name = "", std::string const& description = "", std::string const& flag = "", std::string const& default_value = "",
std::string const& type = "", std::string const& internal_type = "", std::string const& value = "", bool required = false) {
this->name = name;
this->description = description;
this->flag = flag;
this->default_value = default_value;
this->type = type;
this->internal_type = internal_type;
this->required = required;
if (value != "") this->value = value; else this->value = default_value;
}
// Tests if the parameter was initialized or was just returned as a null (dummy) parameter by a function
bool is_real() {
if (this->name == "" and this->flag == "" and this->type == "" and this->value == "") return false;
return true;
}
// Tests if the value of the parameter matches its type
bool test_value() {
if (this->internal_type == "bool") {
if (this->value == "0" or this->value == "1") return true; else return false;
} else if (this->internal_type == "int") {
return this->value.find_first_not_of( "0123456789" ) == std::string::npos;
} else if (this->internal_type == "float") {
return this->value.find_first_not_of( "0123456789." ) == std::string::npos;
} else if (this->internal_type == "ifile") {
std::ifstream file;
file.open(this->value);
return file.is_open();
} else if (this->internal_type == "ofile") {
std::ofstream file;
file.open(this->value);
return file.is_open();
} else if (this->internal_type == "dir") {
DIR* dir = opendir(this->value.c_str());
return (dir != NULL);
} else if (this->name == "output_format"){
if (this->value == "fasta" or this->value == "table") return true;
return false;
} else {
return true;
}
}
// Sets the value of the parameter. Special check for directory path.
void set_value(std::string& value) {
if (this->internal_type == "dir" and value.back() != '/') value += "/";
this->value = value;
}
};
#include "parameters.h"
/* The function "get_value_from_name" requires template specialization.
* Strings are converted to the specified type using stringstreams, which are "cut" by spaces.
* The below function specializes the function for values of type "string" in order to handle strings containing spaces
*/
template<>
std::string Parameters::get_value_from_name<std::string>(const std::string& name) {
for (auto p: this->list) {
if (p.name == name) {
return p.value;
}
}
std::cout << " ** Error: looking for the value of parameter \"" << name <<"\" which does not exist." << std::endl;
exit(0);
}
#pragma once
#include "parameter.h"
#include <vector>
#include <iostream>
#include <algorithm>
#include <sstream>
#ifndef PARAMETERS_2_H
#define PARAMETERS_2_H
struct Parameters {
/* The Parameters structure store all parameters to be used in the analyses and provides a few methods
* to access these parameters easily.
*/
// Initialize all possible parameters to be used in the analyses
// Arguments: name, help message, flag, default value, type, internal type, value, required
// Flags : -h, -f, -d, -o, -u, -t, -c, -p, -b, -a, -m, -g, --min-males, --min-females, --max-males, --max-females, --min-individuals, --output-matrix, -q,
// Parameter constructor: Parameter(name, description, flag, default_value, type, internal_type, value, required)
std::vector<Parameter> list {Parameter("help", "Prints this message", "--help", "0", "bool", "bool", "", false),
Parameter("input_file_path", "Path to an input file", "--input-file", "", "string", "ifile", "", true),
Parameter("input_dir_path", "Path to an input directory", "--input-dir", "", "string", "dir", "", true),
Parameter("output_file_path", "Path to an output file", "--output-file", "", "string", "ofile", "", true),
Parameter("output_dir_path", "Path to an output directory", "--output-dir", "", "string", "dir", "", true),
Parameter("coverage_matrix_path", "Path to an coverage table file", "--coverage-table", "", "string", "ifile", "", true),
Parameter("n_threads", "Number of threads", "--threads", "1", "int", "int", "", false),
Parameter("min_cov", "Minimum coverage to consider a marker", "--min-cov", "1", "int", "int", "", false),
Parameter("max_distance", "Maximum Levenstein distance between two sequences in a locus", "--max-distance", "1", "int", "int", "", false),
Parameter("popmap_file_path", "Path to a popmap file", "--popmap-file", "", "string", "ifile", "", true),
Parameter("barcodes_file_path", "Path to a barcodes file", "--barcodes-file", "", "string", "ifile", "", true),
Parameter("genome_file_path", "Path to a genome file in fasta format", "--genome-file", "", "string", "ifile", "", true),
Parameter("min_males", "Minimum number of males in the subset", "--min-males", "0", "int", "int", "", false),
Parameter("min_females", "Minimum number of females in the subset", "--min-females", "0", "int", "int", "", false),
Parameter("max_males", "Maximum number of males in the subset", "--max-males", "n.males", "int", "int", "", false),
Parameter("max_females", "Maximum number of females in the subset", "--max-females", "n.females", "int", "int", "", false),
Parameter("min_individuals", "Minimum number of individuals in the subset (overrides sex parameters)", "--min-individuals", "0", "int", "int", "", false),
Parameter("max_individuals", "Maxmimum number of individuals in the subset (overrides sex parameters)", "--max-individuals", "n.individual", "int", "int", "", false),
Parameter("output_matrix", "Output the sex distribution table as a matrix", "--output-matrix", "0", "bool", "bool", "", false),
Parameter("min_quality", "Minimum mapping quality to keep a mapped read", "--min-quality", "20", "int", "int", "", false),
Parameter("min_frequency", "Minimum frequency of a sequence in at least one sex", "--min-frequency", "0.25", "float", "float", "", false),
Parameter("output_format", "Output format, either \"table\" or \"fasta\"", "--output-format", "table", "string", "string", "", false),
Parameter("freq_het", "Frequency of a heretozygous allele", "--freq-het", "0.5", "float", "float", "", false),
Parameter("freq_hom", "Frequency of a homozygous allele", "--freq-hom", "1", "float", "float", "", false),
Parameter("range_het", "Range for the frequency of a heretozygous allele", "--range-het", "0.1", "float", "float", "", false),
Parameter("range_hom", "Range for the frequency of a homozygous allele", "--range-hom", "0.1", "float", "float", "", false)
};
// Return a parameter from its flag
Parameter get_from_flag(std::string& flag) {
for (auto p: this->list) {
if (p.flag == flag) return p;
}
return Parameter(); // If the parameter was not found (meaning it's not a valid parameter) a dummy is returned, to be tested with "is_real()" method from the Parameter structure
}
// Check if a flag is in the list of parameters
bool find_flag(std::string& flag) {
for (auto p: this->list) {
if (p.flag == flag) return true;
}
return false;
}
// Return a parameter from its name
Parameter get_from_name(std::string& name) {
for (auto p: this->list) {
if (p.name == name) return p;
}
return Parameter(); // If the parameter was not found (meaning it's not a valid parameter) a dummy is returned, to be tested with "is_real()" method from the Parameter structure
}
// Check if a parameter name is in the list of parameters
bool find_name(std::string& name) {
for (auto p: this->list) {
if (p.name == name) return true;
}
return false;
}
// Check if a parameter is required for the analysis
bool is_required(std::string& name) {
return this->get_from_name(name).required;
}
// Get the value of a parameter from its name
template<typename T>
T get_value_from_name(const std::string& name) {
T output;
for (auto p: this->list) {
if (p.name == name) {
std::string v = p.value;
std::stringstream ss(v);
if (ss >> output) return output;
// TODO: Handle fail case
}
}
std::cout << " ** Error: looking for the value of parameter \"" << name <<"\" which does not exist." << std::endl;
exit(0);
}
// Get the value of a parameter from its name
template<typename T>
T get_value_from_flag(std::string& flag) {
for (auto p: this->list) {
if (p.flag == flag) {
if (p.internal_type == "bool") {
return p.value != "0"; // TODO: handle more bool values in the future
} else if (p.internal_type == "int") {
return std::stoi(p.value);
} else if (p.internal_type == "float") {
return std::stod(p.value);
} else {
return p.value;
}
}
}
std::cout << " ** Error: looking for the value of parameter \"" << flag <<"\" which does not exist." << std::endl;
exit(0);
}
// Set the value of a parameter from its flag
void set_value_from_flag(std::string& flag, std::string& value) {
for (auto it = this->list.begin(); it != this->list.end(); ++it) {
if ((*it).flag == flag) {
(*it).set_value(value);
return;
}
}
std::cout << "Error: trying to set the value " << value << " for parameter " << flag << " which does not exist." << std::endl;
exit(0);
}
// Set the value of a parameter from its flag
void set_value_from_name(std::string& name, std::string& value) {
for (auto it = this->list.begin(); it != this->list.end(); ++it) {
if ((*it).name == name) {
(*it).set_value(value);
return;
}
}
std::cout << "Error: trying to set the value " << value << " for parameter " << name << " which does not exist." << std::endl;
exit(0);
}
// Print information about parameters
void print() {
std::cout << std::endl;
for (auto parameter: this->list) {
std::cout << parameter.flag << "\t" << parameter.name << "\t" << parameter.description << "\t" << parameter.value << "\t" << parameter.default_value << "\t" <<
parameter.type << "\t" << parameter.internal_type << "\t" << parameter.required << "\n";
}
std::cout << "\n";
}
// Print information about specific parameters given by a list
void print(std::vector<std::string>& parameters_to_print) {
std::cout << std::endl;
for (auto parameter: this->list) {
if (std::find(parameters_to_print.begin(), parameters_to_print.end(), parameter.name) != parameters_to_print.end()) {
std::cout << parameter.flag << "\t" << parameter.name << "\t" << parameter.description << "\t" << parameter.value << "\t" << parameter.default_value << "\t" <<
parameter.type << "\t" << parameter.internal_type << "\t" << parameter.required << "\n";
}
}
std::cout << "\n";
}
// Print information about parameters given by a list
void simple_print(std::vector<std::string>& parameters_to_print) {
std::cout << std::endl;
for (auto parameter: this->list) {
if (std::find(parameters_to_print.begin(), parameters_to_print.end(), parameter.name) != parameters_to_print.end()) {
std::cout << parameter.name << " :\t" << parameter.value << std::endl;
}
}
std::cout << "\n";
}
};
// Template specialization for converting string in order to handle spaces in strings
template<>
std::string Parameters::get_value_from_name<std::string>(const std::string& name);
#endif // PARAMETERS_2_H
#include "radsex.h"
#include "cliparser.h"
RadSex::RadSex(int& argc, char** argv) {
CLIParser::CLIParser()
{
/* The RadSex constructor checks the first two command-line arguments to detect the type of analysis
*/
if (argc < 2) {
this->usage();
exit(0);
}
if (this->analyses.find(argv[1]) == this->analyses.end()) {
std::cout << std::endl << "** Error: \"" << argv[1] << "\" is not a valid command." << std::endl;
this->usage();
exit(0);
}
// Command-line arguments are stored to be used by the analysis-specific parser
this->argc = argc;
this->argv = argv;
// The analysis to run is stored
this->analysis = this->analyses[argv[1]];
// Command-line arguments are parsed
this->parse();
}
void RadSex::usage() {
/* Simply prints the general usage of radsex
*/
std::cout << std::endl << "Usage: radsex <command> [options]" << std::endl;
std::cout << std::endl << "Commands:" << std::endl;
for (auto analysis: this->analyses) std::cout << "\t" << analysis.first << "\t" << analysis.second.description << std::endl;
std::cout << std::endl;
}
void RadSex::usage(Analysis analysis) {
/* Prints usage for the specified analysis
*/
Parameter temp;
std::cout << std::endl << "Usage: radsex " << analysis.name << " ";
for (auto p: this->analysis.options) {
temp = this->parameters.get_from_name(p);
if (temp.required) std::cout << temp.flag << " " << temp.name << " ";
}
std::cout << " [ ";
for (auto p: this->analysis.options) {
temp = this->parameters.get_from_name(p);
if (!temp.required) std::cout << temp.flag << " " << temp.name << " ";
}
std::cout << "]";
std::cout << std::endl << std::endl << "Options:" << std::endl;
for (auto p: this->analysis.options){
temp = this->parameters.get_from_name(p);
std::cout << "\t" << temp.flag << "\t" << temp.name << "\t<" << temp.type << ">\t" << temp.description << "\t[" << temp.default_value << "]" << std::endl;
}
std::cout << std::endl;
}
void RadSex::run() {
/* Runs the function associated with the selected analysis
*/
this->analysis.run(this->parameters);
}
void RadSex::parse() {
// Command-line arguments are stored in a vector to facilitate processing later
std::vector<std::string> fields;
for (auto i=2; i < argc; ++i) fields.push_back(std::string(argv[i]));
// Store user-defined parameter values in the appropriate parameters
for (auto it=fields.begin(); it != fields.end(); ++it) {
if ((*it)[0] == '-' and (*it) != "-h") {
if (this->parameters.find_flag(*it)) {
parameters.set_value_from_flag(*it, *(std::next(it)));
} else {
std::cout << "\n** Warning: parameter (" << (*it) << ") does not exist." << std::endl;
}
}
}
// Check that all required parameters for the selected analysis were set by the user
Parameter option;
for (auto option_name: this->analysis.options) {
option = parameters.get_from_name(option_name);
if (option.required and std::find(fields.begin(), fields.end(), option.flag) == fields.end()) {
std::cout << "\n** Error: the parameter " << option.name << " (" << option.flag << ") is required." << std::endl;
this->usage(this->analysis);
exit(0);
}
}
// Special check for parameters from the subset analysis:
// If the value of max_males or max_females was not set, it is set to the max number of males / females in the popmap
if (this->analysis.name == "subset") {
std::unordered_map<std::string, bool> popmap = load_popmap(this->parameters);
int _n_males = 0, _n_females = 0;
for (auto i: popmap) { if (i.second) ++_n_males; else ++_n_females;}
std::string n_males = std::to_string(_n_males);
std::string n_females = std::to_string(_n_females);
std::string n_individuals = std::to_string(popmap.size());
std::string par = "max_males";
option = this->parameters.get_from_name(par);
if (option.value == option.default_value) this->parameters.set_value_from_name(par, n_males);
par = "max_females";
option = this->parameters.get_from_name(par);
if (option.value == option.default_value) this->parameters.set_value_from_name(par, n_females);
par = "max_individuals";
option = this->parameters.get_from_name(par);
if (option.value == option.default_value) this->parameters.set_value_from_name(par, n_individuals);
}
this->parameters.simple_print(this->analysis.options);
// Test all parameter values and output an appropriate error message is the test fails
bool success = true;
for (auto p: this->analysis.options) {
option = this->parameters.get_from_name(p);
if (!option.test_value()) {
std::cout << "**Error: incorrect value \"" << option.value << "\" for parameter \"" << option.name << "\", which should be";
if (option.internal_type == "bool") std::cout << " a boolean (0/1).";
else if (option.internal_type == "int") std::cout << " an integer.";
else if (option.internal_type == "ifile") std::cout << " a path to an existing file.";
else if (option.internal_type == "ofile") std::cout << " a valid path to an output file.";
else if (option.internal_type == "dir") std::cout << " a path to an existing directory.";
else if (option.name == "output_format") std::cout << " either \"table\" or \"fasta\"";
std::cout << std::endl;
success = false;
}
}
if (!success) {
this->usage(this->analysis);
exit(0);
}
}
#pragma once
#include <map>
#include <iostream>
#include "analysis.h"
#include "distrib.h"
#include "process.h"
#include "subset.h"
#include "freq.h"
#include "loci.h"
#include "signif.h"
#include "map.h"
#include "depth.h"
#ifndef CLIPARSER_H
#define CLIPARSER_H
class RadSex {
class CLIParser
{
public:
// Command-line arguments are stored as variables to be used by analysis-specific parsers
int argc;
char** argv;
// Store the type of analysis to run detected from the command-line arguments in the RadSex constructor
Analysis analysis;
// Store all parameter values and properties inside a list.
Parameters parameters;
// Define all analyses
// Analysis constructor: Analysis(name, description, parameters, function ref)
std::map<std::string, Analysis> analyses { {"distrib",
Analysis("distrib",
"Compute a matrix of sequences distribution between sexes",
std::vector<std::string> {"input_file_path",
"output_file_path",
"popmap_file_path",
"min_cov",
"output_matrix"},
distrib)},
{"process",
Analysis("processreads",
"Compute a matrix of coverage from a set of demultiplexed reads files",
std::vector<std::string> {"input_dir_path",
"output_file_path",
"n_threads",
"min_cov"},
process)},
{"depth",
Analysis("depth",
"Compute reads and depth metrics for each individual.",
std::vector<std::string> {"input_file_path",
"output_file_path",
"popmap_file_path"},
depth)},
{"subset",
Analysis("subset",
"Extract a subset of the coverage matrix",
std::vector<std::string> {"input_file_path",
"output_file_path",
"popmap_file_path",
"min_cov",
"min_males",
"min_females",
"max_males",
"max_females",
"min_individuals",
"max_individuals",
"output_format"},
subset)},
{"freq",
Analysis("freq",
"Calculate the frequency of markers in the population",
std::vector<std::string> {"input_file_path",
"output_file_path",
"min_cov"},
freq)},
{"loci",
Analysis("loci",
"Recreate polymorphic loci from a subset of coverage matrix",
std::vector<std::string> {"input_file_path",
"coverage_matrix_path",
"output_file_path",
"max_distance",
"n_threads",
"min_cov",
"freq_het",
"freq_hom",
"range_het",
"range_hom"},
loci)},
{"signif",
Analysis("signif",
"Extract sequences significantly associated with sex from the coverage matrix.",
std::vector<std::string> {"input_file_path",