Commit 1721577c authored by blackrim's avatar blackrim
Browse files

first commit

parents
/*
* AncSplit.cpp
*
* Created on: Aug 15, 2009
* Author: Stephen A. Smith
*/
/*
The AncSplit (ancestral split) class is used to store the likelihoods and
distributions associated with an ancstral split.
This should only be used for ancestral state calculation as there is no
need to store the likelihoods for each state (other than distconds) when
calculating the likeklihood.
*/
#include "AncSplit.h"
#include "RateModel.h"
#include <vector>
using namespace std;
#ifdef BIGTREE
#include "gmpfrxx/gmpfrxx.h"
#endif
AncSplit::AncSplit(RateModel * mod,int dist,int ldesc,int rdesc,double we):model(mod),weight(we),
ancdistint(dist),ldescdistint(ldesc),rdescdistint(rdesc){}
RateModel * AncSplit::getModel(){
return model;
}
double AncSplit::getWeight(){
return weight;
}
#ifdef BIGTREE
mpfr_class AncSplit::getLikelihood(){
return likelihood;
}
void AncSplit::setLikelihood(mpfr_class li){
likelihood = li;
}
#else
double AncSplit::getLikelihood(){
return likelihood;
}
void AncSplit::setLikelihood(double li){
likelihood = li;
}
#endif
/*
* AncSplit.h
*
* Created on: Aug 15, 2009
* Author: smitty
*/
/*
The AncSplit (ancestral split) class is used to store the likelihoods and
distributions associated with an ancstral split.
This should only be used for ancestral state calculation as there is no
need to store the likelihoods for each state (other than distconds) when
calculating the likeklihood.
*/
#ifndef ANCSPLIT_H_
#define ANCSPLIT_H_
#include <vector>
//using namespace std;
#include "RateModel.h"
#ifdef BIGTREE
#include "gmpfrxx/gmpfrxx.h"
#endif
class AncSplit{
private:
RateModel * model;
double weight;
#ifdef BIGTREE
mpfr_class likelihood;
#else
double likelihood;
#endif
public:
AncSplit(RateModel * mod,int,int,int,double);
RateModel * getModel();
double getWeight();
#ifdef BIGTREE
mpfr_class getLikelihood();
void setLikelihood(mpfr_class li);
#else
double getLikelihood();
void setLikelihood(double li);
#endif
int ancdistint;
int ldescdistint;
int rdescdistint;
};
#endif /* ANCSPLIT_H_ */
/*
* BayesianBioGeo.cpp
* lagrange_cpp
*
* Created by Stephen Smith on 1/13/10.
* Copyright 2010 Yale University. All rights reserved.
*
*/
#include "BayesianBioGeo.h"
#include <fstream>
#include <map>
#include <vector>
#include <math.h>
#include <cmath>
#include <functional>
#include <numeric>
#include <iostream>
using namespace std;
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include "RateModel.h"
#include "BioGeoTree_copper.h"
namespace {
inline double MIN(const double &a, const double &b)
{return b < a ? (b) : double(a);}
}
BayesianBioGeo::BayesianBioGeo(BioGeoTree_copper * intree,RateModel * inrm, bool marg, int gen):
tree(intree),rm(inrm), gens(gen), marginal(marg){
gsl_rng_env_setup();
T = gsl_rng_default;
r = gsl_rng_alloc (T);
}
void BayesianBioGeo::run_global_dispersal_extinction(){
double prevlike = 0;
double prevprior = 0;
double prevpost = 0;
double curlike = 0;
double curpost = 0;
double curprior = 0;
vector<double> sliding(2);
vector<double> trials(2);
vector<double> success(2);
sliding[0] = 0.5;sliding[1] = 0.5;
for(unsigned int i=0;i<trials.size();i++){trials[i] = 0;}
for(unsigned int i=0;i<success.size();i++){success[i] = 0;}
int rot = 0;
double hastings = 1;
ofstream outfile ("test.txt");
params = vector<double>(2);
prevparams = vector<double>(2);
for(unsigned int i=0;i<params.size();i++){params[i] = 0.1;}
rm->setup_D(0.1);
rm->setup_E(0.1);
rm->setup_Q();
tree->update_default_model(rm);
prevlike = -tree->eval_likelihood(marginal);
prevprior = 1;
for(unsigned int i=0;i<params.size();i++){prevprior *= calculate_pdf(params[i]);}
cout << "intial like: " << prevlike << endl;
int iter = 0;
while(iter < gens){
double dispersal=params[0];
double extinction=params[1];
rm->setup_D(dispersal);
rm->setup_E(extinction);
rm->setup_Q();
tree->update_default_model(rm);
curlike = -tree->eval_likelihood(marginal);
/*
* calcprior
*/
curprior = 1;
for(unsigned int i=0;i<params.size();i++){curprior *= calculate_pdf(params[i]);}
/*
* check to keep
*/
double testr = gsl_ran_flat(r,0,1);
double test = MIN(1,hastings * (curprior/prevprior) * (exp(curlike-prevlike)));
if (iter > 100)
trials[rot] += 1;
//cout << "- "<< prevlike << " " << curlike << " " << hastings << " " << test << " " << testr;
//cout << " " << rot << " " << dispersal << " " << extinction << endl;
if (testr < test){
prevprior = curprior;
prevlike = curlike;
prevpost = curpost;
prevparams[rot] = params[rot];
if (iter > 100)
success[rot] += 1;
}
/*
* pick next params
*/
//params[rot] = calculate_sliding(prevparams[rot],sliding[rot]);
params[rot] = calculate_sliding_log(prevparams[rot],sliding[rot], &hastings);
if(iter%5 == 0){
rot += 1;
}
if (rot == 2){
rot = 0;
}
if(iter%100 == 0 && iter > 100){
cout << iter << " " << prevlike;
for(unsigned int i=0;i<params.size();i++){cout << " " << prevparams[i];}
for(unsigned int i=0;i<params.size();i++){cout << " " << success[i]/trials[i];}
cout << endl;
outfile << iter << "\t" << prevlike;
for(unsigned int i=0;i<params.size();i++){outfile << "\t" << prevparams[i];}
outfile << endl;
}
iter++;
}
outfile.close();
}
double BayesianBioGeo::calculate_pdf(double value){
return gsl_ran_flat_pdf (value, 0,100.);
}
double BayesianBioGeo::calculate_sliding(double value, double sliding){
//return abs(gsl_ran_flat(r,value - (sliding/2),value + (sliding/2)));
return abs(value + gsl_ran_gaussian(r,sliding));
//cauchy
}
double BayesianBioGeo::calculate_sliding_log(double value, double sliding, double * hastings){
double newv = log(value) + gsl_ran_gaussian(r,sliding);
newv = abs(exp(newv));
*hastings = 1 * newv/value;
return newv;
}
/*
* BayesianBioGeo.h
* lagrange_cpp
*
* Created by Stephen Smith on 1/13/10.
* Copyright 2010 Yale University. All rights reserved.
*
*/
#ifndef BAYESIANBIOGEO_H_
#define BAYESIANBIOGEO_H_
#include <vector>
using namespace std;
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include "BioGeoTree_copper.h"
#include "RateModel.h"
class BayesianBioGeo{
private:
BioGeoTree_copper * tree;
RateModel * rm;
int gens;
bool marginal;
vector<double> params;
vector<double> prevparams;
const gsl_rng_type * T;
gsl_rng * r;
double calculate_pdf(double value);
double calculate_sliding(double value, double sliding);
double calculate_sliding_log(double value, double sliding, double * hastings);
public:
BayesianBioGeo(BioGeoTree_copper * intree,RateModel * inrm, bool marg, int gen);
void run_global_dispersal_extinction();
};
#endif /* BAYESIANBIOGEO_H_ */
\ No newline at end of file
/*
* BayesianBioGeo.cpp
* lagrange_cpp
*
* Created by Stephen Smith on 1/13/10.
* Copyright 2010 Yale University. All rights reserved.
*
*/
#include "BayesianBioGeoAllDispersal.h"
#include <fstream>
#include <map>
#include <vector>
#include <math.h>
#include <cmath>
#include <functional>
#include <numeric>
#include <iostream>
using namespace std;
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include "RateModel.h"
#include "BioGeoTree_copper.h"
namespace {
inline double MIN(const double &a, const double &b)
{return b < a ? (b) : double(a);}
}
BayesianBioGeoAllDispersal::BayesianBioGeoAllDispersal(BioGeoTree_copper * intree,RateModel * inrm, bool marg, int gen):
tree(intree),rm(inrm),gens(gen),marginal(marg){
int nareas = rm->get_num_areas();
nareas = rm->get_num_areas();
vector<double> cols(nareas, 1);
vector< vector<double> > rows(nareas, cols);
D_mask = vector< vector< vector<double> > > (rm->get_num_periods(), rows);
gsl_rng_env_setup();
T = gsl_rng_default;
r = gsl_rng_alloc (T);
}
void BayesianBioGeoAllDispersal::run_global_dispersal_extinction(){
double prevlike = 0;
double prevprior = 1;
double prevpost = 0;
double curlike = 0;
double curpost = 0;
double curprior = 0;
int nareas = rm->get_num_areas();
int nparams = 2+(nareas*nareas*rm->get_num_periods())-(rm->get_num_periods()*nareas);
vector<double> sliding(nparams);
vector<double> trials(nparams);
vector<double> success(nparams);
for(unsigned int i=0;i<sliding.size();i++){sliding[i] = 0.001;}
//sliding[0] = 0.005;sliding[1] = 0.005;
for(unsigned int i=0;i<trials.size();i++){trials[i] = 0;}
for(unsigned int i=0;i<success.size();i++){success[i] = 0;}
int rot = 1;
double hastings = 1;
ofstream outfile ("test.txt");
params = vector<double>(nparams);
prevparams = vector<double>(nparams);
for(unsigned int i=0;i<params.size();i++){params[i] = 0.01;}
params[0] = 1.;params[1] = 5.28047e-07;
rm->setup_D(0.1);
rm->setup_E(0.1);
rm->setup_Q();
tree->update_default_model(rm);
prevlike = -tree->eval_likelihood(marginal);
int iter = 0;
while(iter < gens){
double dispersal=params[0];
double extinction=params[1];
int count = 2;
for (unsigned int i=0;i<D_mask.size();i++){
for (unsigned int j=0;j<D_mask[i].size();j++){
D_mask[i][j][j] = 0.0;
for (unsigned int k=0;k<D_mask[i][j].size();k++){
if(k!=j){
D_mask[i][j][k] = params[count];
/*if(D_mask[i][j][k] <= 0){
return 100000000;
}*/
count += 1;
}
}
}
}
rm->setup_D_provided(dispersal,D_mask);
rm->setup_E(extinction);
rm->setup_Q();
tree->update_default_model(rm);
curlike = -tree->eval_likelihood(marginal);
/*
* calcprior
*/
curprior = 1;
//for(unsigned int i=0;i<params.size();i++){curprior *= calculate_pdf(params[i]);}
/*
* check to keep
*/
double testr = gsl_ran_flat(r,0,1);
double test = MIN(1,hastings * (curprior/prevprior) * (exp(curlike-prevlike)));
if (iter > 1000)
trials[rot] += 1;
// cout << "- "<< prevlike << " " << curlike << " " << (curprior) << " "<< test << " " << testr << endl;
if (testr < test){
prevprior = curprior;
prevlike = curlike;
prevpost = curpost;
prevparams = params;
if (iter > 1000)
success[rot] += 1;
}
/*
* pick next params
*/
params[rot] = calculate_sliding(prevparams[rot],sliding[rot]);
//params[rot] = calculate_sliding_log(prevparams[rot],sliding[rot], &hastings);
if(iter%10 == 0){
rot += 1;
}
if (rot == params.size()){
rot = 1;
}
if(iter%100 == 0 && iter > 1){
cout << iter << " " << prevlike;
for(unsigned int i=0;i<2;i++){cout << " " << prevparams[i];}
cout << endl;
for (unsigned int i=0;i<D_mask.size();i++){
for (unsigned int j=0;j<D_mask[i].size();j++){
for (unsigned int k=0;k<D_mask[i][j].size();k++){
// cout << D_mask[i][j][k] << " ";
}
// cout << endl;
}
// cout << endl;
}
// for(unsigned int i=0;i<params.size();i++){cout << " " << success[i]/trials[i];}
// cout << endl;
outfile << iter << "\t" << prevlike;
for(unsigned int i=0;i<params.size();i++){outfile << "\t" << prevparams[i];}
outfile << endl;
}
iter++;
}
outfile.close();
}
double BayesianBioGeoAllDispersal::calculate_pdf(double value){
return gsl_ran_flat_pdf (value, 0,100.);
}
double BayesianBioGeoAllDispersal::calculate_sliding(double value, double sliding){
//return abs(gsl_ran_flat(r,value - (w/2),value + (w/2)));
return abs(value + gsl_ran_gaussian(r,sliding));
//gsl_gaus
//cauchy
}
double BayesianBioGeoAllDispersal::calculate_sliding_log(double value, double sliding, double * hastings){
double newv = log(value) + gsl_ran_gaussian(r,sliding);
newv = abs(exp(newv));
*hastings = 1 * newv/value;
return newv;
}
/*
* BayesianBioGeo.h
* lagrange_cpp
*
* Created by Stephen Smith on 1/13/10.
* Copyright 2010 Yale University. All rights reserved.
*
*/
#ifndef BAYESIANBIOGEOALLDISPERSAL_H_
#define BAYESIANBIOGEOALLDISPERSAL_H_
#include <vector>
using namespace std;
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include "BioGeoTree_copper.h"
#include "RateModel.h"
class BayesianBioGeoAllDispersal{
private:
BioGeoTree_copper * tree;
RateModel * rm;
int gens;
bool marginal;
vector<double> params;
vector<double> prevparams;
vector< vector< vector<double> > > D_mask;
const gsl_rng_type * T;
gsl_rng * r;
double calculate_pdf(double value);
double calculate_sliding(double value, double sliding);
double calculate_sliding_log(double value, double sliding, double * hastings);
public:
BayesianBioGeoAllDispersal(BioGeoTree_copper * intree,RateModel * inrm, bool marg, int gen);
void run_global_dispersal_extinction();
};
#endif /* BAYESIANBIOGEO_H_ */
/*
* BayesianBioGeo.cpp
* lagrange_cpp
*
* Created by Stephen Smith on 1/13/10.
* Copyright 2010 Yale University. All rights reserved.
*
*/
#include "BayesianBioGeoStMapper.h"
#include <fstream>
#include <map>
#include <vector>
#include <math.h>
#include <cmath>
#include <functional>
#include <numeric>
#include <string>
#include <iostream>
#include <stack>
using namespace std;
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include "RateModel.h"
#include "RateMatrixUtils.h"
#include "BioGeoTree_copper.h"
#include "BranchSegment_copper.h"
#include "node.h"
#include "tree.h"
namespace {
inline double MIN(const double &a, const double &b)
{return b < a ? (b) : double(a);}
}
BayesianBioGeoStMapper::BayesianBioGeoStMapper(BioGeoTree_copper * inbgt, Tree * intr,
RateModel * inrm, bool marg, int gen){
gens = gen;
marginal = marg;
bgt = inbgt;
tree = intr;
rm = inrm;
gsl_rng_env_setup();
T = gsl_rng_default;
r = gsl_rng_alloc (T);
sts = "states";
pts = "points";
}
void BayesianBioGeoStMapper::run_mappings(){
/*
"""
Stochastically map a binary character on a set of trees.
newick_iter
* a sequence of newick trees (for newick in newick_iter: ...)
name2state
* mapping of tip name -> state (0 or 1)
treelen_prior_func
* function to get another draw from the treelength prior
pi0_prior_func
* ditto for pi0, the stationary frequency of state 0 and
so-called 'bias' parameter
outfile, histfile:
* open files for logging posterior parameter values (pi0,
treelength, etc) and the actual simulation results (see map())
sims_per_tree:
* number of simulations to save for each tree
ttable:
* optional mapping of tip names to actual names in name2state
char_name:
* optional name of the character, for console logging
thin:
* skip to very ith tree
silent:
* suppress logging to console
"""
states = (0, 1)