Bayescan_evaluation.R 2.48 KB
Newer Older
eboulanger's avatar
eboulanger committed
1
2
3
4
5
6
7
8
9
10
setwd("~/Documents/project_SEACONNECT/seaConnect--dataPrep/02-Bayescan/")
#----- BayeScan -----#
## evaluate model convergence and extract results
## script adapted from http://evomicsorg.wpengine.netdna-cdn.com/wp-content/uploads/2016/01/BayeScan_BayeScEnv_exercises.pdf

# load libraries
library(coda)
library(vcfR)

# set arguments
eboulanger's avatar
eboulanger committed
11
spCode <- "mul" # species
eboulanger's avatar
eboulanger committed
12
13
14
runNum <- 1 # run number
inputName <- paste0(spCode,"_bayesc_2pop_run",runNum) # 2pop or 3pop analysis (only for mullus)
#vcfPath <- "../01-SNPfilters/02-Mullus/mul_all_filtered.vcf"
eboulanger's avatar
eboulanger committed
15
#vcfPath <- "../01-SNPfilters/01-Diplodus/dip_all_filtered.vcf"
eboulanger's avatar
eboulanger committed
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

## model convergence ####
chain1 <- read.table(paste0(inputName,"_ch1.sel"),header=TRUE)
#chain<-chain[-c(1)] # remove first column (step number)
# create a MCMC object with the correct thinning interval
chain1 <- mcmc(chain1,thin=10)
# Plot
plot(chain1)
# Summary statistics
summary(chain1)
# Test correlation between sampled parameters to verify that the sample is large enough
autocorr.diag(chain1)
# Effective size of the sample
effectiveSize(chain1)

# Statistic test for convergence
#### Geweke's convergence diagnistic
#The diagnostic reports the z- scores for each parameter. 
#With ?? = 0.05, We reject H0 (equality of means => convergence) if z < -1.96 or z > +1.96.
geweke <- geweke.diag(chain1, frac1=0.1, frac2=0.5)
geweke
which(abs(geweke$z)>1.96) # 0 parameter does not converge

#### Heidelberg and Welch's test
heidel.diag(chain1, eps=0.1, pvalue=0.05) # all parameters passed

#### Test comparing two chains
chain2 <- read.table(paste0(inputName,"_ch2.sel"),header=TRUE)
#chain2<-chain2[-c(1)]
chain2 <- mcmc(chain2,thin=10)
plot(chain2)

combined = mcmc.list(chain1,chain2) 
plot(combined)
gelman.diag(combined) # should be close to 1
gelman.plot(combined,ask)

## Bayescan results ####

## Functions defined in the script 'plot_R.r'in the BayeScan folder
eboulanger's avatar
eboulanger committed
56
results<-plot_bayescan(paste0(inputName,"_ch2_fst.txt"),FDR=0.05)
eboulanger's avatar
eboulanger committed
57
58
59

# detect outliers
outliers_bayescan <-results$outliers # position of outliers
eboulanger's avatar
eboulanger committed
60
results$nb_outliers #283 outliers for mullus, 126 for diplodus
eboulanger's avatar
eboulanger committed
61
62

# extract corresponding SNP names and positions
eboulanger's avatar
eboulanger committed
63
vcf <- read.vcfR(vcfPath, verbose=F)
eboulanger's avatar
eboulanger committed
64
65
66
67
loci <- as.data.frame(vcf@fix[,1:2])
outlier_loci <- loci[results$outliers,]

# output positions table
eboulanger's avatar
eboulanger committed
68
69
write.table(outlier_loci, file = paste0("outl_pos_bayesc_",spCode,"_run",runNum,".txt"), sep = "\t", quote = F, row.names = F, col.names = F)
print(paste0("outlier positions table exported to outl_pos_bayesc_", spCode,"_run", runNum,".txt"), quote = 0)
eboulanger's avatar
eboulanger committed
70
71