setwd("~/Documents/project_SEACONNECT/seaConnect--dataPrep/02-Bayescan/") #----- BayeScan -----# ## evaluate model convergence and extract results ## script adapted from http://evomicsorg.wpengine.netdna-cdn.com/wp-content/uploads/2016/01/BayeScan_BayeScEnv_exercises.pdf # load libraries library(coda) library(vcfR) # set arguments spCode <- "dip" runNum <- 1 inputName <- paste0(spCode,"_bayesc_2pop_run",runNum) ## model convergence #### chain1 <- read.table(paste0(inputName,"_ch1.sel"),header=TRUE) #chain<-chain[-c(1)] # remove first column (step number) # create a MCMC object with the correct thinning interval chain1 <- mcmc(chain1,thin=10) # Plot plot(chain1) # Summary statistics summary(chain1) # Test correlation between sampled parameters to verify that the sample is large enough autocorr.diag(chain1) # Effective size of the sample effectiveSize(chain1) # Statistic test for convergence #### Geweke's convergence diagnistic #The diagnostic reports the z- scores for each parameter. #With ?? = 0.05, We reject H0 (equality of means => convergence) if z < -1.96 or z > +1.96. geweke <- geweke.diag(chain1, frac1=0.1, frac2=0.5) geweke which(abs(geweke$z)>1.96) # 0 parameter does not converge #### Heidelberg and Welch's test heidel.diag(chain1, eps=0.1, pvalue=0.05) # all parameters passed #### Test comparing two chains chain2 <- read.table(paste0(inputName,"_ch2.sel"),header=TRUE) #chain2<-chain2[-c(1)] chain2 <- mcmc(chain2,thin=10) plot(chain2) combined = mcmc.list(chain1,chain2) plot(combined) gelman.diag(combined) # should be close to 1 gelman.plot(combined,ask) ## Bayescan results #### ## Functions defined in the script 'plot_R.r'in the BayeScan folder results<-plot_bayescan(paste0(inputName,"_ch1_fst.txt"),FDR=0.05) # detect outliers outliers_bayescan <-results$outliers # position of outliers results$nb_outliers #45 outliers # extract corresponding SNP names and positions vcf <- read.vcfR("../01-SNPfilters/01-Diplodus/diplodus_all_filtered.vcf", verbose=F) loci <- as.data.frame(vcf@fix[,1:2]) outlier_loci <- loci[results$outliers,] outlier_positions <- outlier_loci$POS # output positions table write.table(outlier_positions, file = paste0("outl_pos_bayesc_",args[2],".txt"), sep = "\t", quote = F, row.names = F, col.names = F) print(paste0("outlier positions table exported to outl_pos_bayesc_", args[2]), quote = 0)