PCAdapt.R 2.57 KB
Newer Older
eboulanger's avatar
eboulanger committed
1
2
3
4
#----- PCAdapt -----#
# vignette: https://cran.r-project.org/web/packages/pcadapt/vignettes/pcadapt.html

#!/usr/bin/env Rscript
eboulanger's avatar
eboulanger committed
5
#setwd("/Users/eboulanger-admin/Documents/project_SEACONNECT/seaConnect--dataPrep/03-PCAdapt")
eboulanger's avatar
eboulanger committed
6
args = commandArgs(trailingOnly=TRUE)
eboulanger's avatar
eboulanger committed
7
#args <- c("../01-SNPfilters/01-Diplodus/dip_all_filtered.vcf", "dip")
eboulanger's avatar
eboulanger committed
8
#args <- c("../01-SNPfilters/02-Mullus/mul_all_filtered.vcf", "mul")
eboulanger's avatar
eboulanger committed
9
10
11

# load libraries
library(pcadapt)
eboulanger's avatar
eboulanger committed
12
library(qvalue)
eboulanger's avatar
eboulanger committed
13
14
library(vcfR)

eboulanger's avatar
eboulanger committed
15
# define arguments
eboulanger's avatar
eboulanger committed
16
path_to_file <- args[1]
eboulanger's avatar
eboulanger committed
17
speciesCode <- args[2]
eboulanger's avatar
eboulanger committed
18
numPC <- args[3]
eboulanger's avatar
eboulanger committed
19
20

# import vcf file to pcadapt format
eboulanger's avatar
eboulanger committed
21
22
23
24
25
dat <- read.pcadapt(path_to_file, type = "vcf")

# choose number K of principal components
x <- pcadapt(input = dat, K = 20)
# set plot output file name
eboulanger's avatar
eboulanger committed
26
pdf(file=paste0("RPlots_", args[2],"_Kselection.pdf"))
eboulanger's avatar
eboulanger committed
27
28
29
30
31
# scree plot of principal components
plot(x, option = "screeplot", K = 10)
# score plot of principal components
plot(x, option = "scores")
plot(x, option = "scores", i = 3, j = 4) # check PC axis 3 and 4
eboulanger's avatar
eboulanger committed
32
33
34
plot(x, option = "scores", i = 1, j = 3)
plot(x, option = "scores", i = 2, j = 3)
dev.off()
eboulanger's avatar
eboulanger committed
35

eboulanger's avatar
eboulanger committed
36
37
38
39
40
# compute outliers detection with K = 1 (# of PC to retain) for M. surmuletus.
# K = 1 for D. sargus
x <- pcadapt(dat, K = 1)
summary(x)
pdf(file=paste0("RPlots_", args[2],"_manhattan_qq_hist.pdf"))
eboulanger's avatar
eboulanger committed
41
# Manhattan plot
eboulanger's avatar
eboulanger committed
42
#jpeg(filename = paste0("Rplots_",args[2],"_manhattan_pcadpt",".jpeg"))
eboulanger's avatar
eboulanger committed
43
44
45
46
47
48
49
50
51
52
plot(x, option = "manhattan")
#dev.off()
# QQ plot
plot(x , option = "qqplot", threshold = 0.05)
# histogram of p-values
#jpeg(filename = paste0(args[2],"_histP_pcadpt",".jpeg"))
hist(x$pvalues, xlab = "p-values", main = NULL, breaks = 50, col = "orange")
#dev.off()
# histogram of test statistic
plot(x, option = "stat.distribution")
eboulanger's avatar
eboulanger committed
53
dev.off()
eboulanger's avatar
eboulanger committed
54
55

# choose cutoff for outlier detection
eboulanger's avatar
eboulanger committed
56
57
58
59
60
61
62

# q-values & FDR
qval <- qvalue(x$pvalues)$qvalues
alphaQ <- 0.005
outliersQ <- which(qval < alphaQ)
length(outliersQ)

eboulanger's avatar
eboulanger committed
63
64
# Bonferroni correction
padj <- p.adjust(x$pvalues, method="bonferroni")
eboulanger's avatar
eboulanger committed
65
66
67
68
69
70
71
72
73
74
alphaB <- 0.05
outliersB <- which(padj < alphaB)
print(paste(length(outliersB), "outliers detected"),quote=0)

# compare
setdiff(outliersB, outliersQ)
setdiff(outliersQ, outliersB)

# keep the ones inferred from FDR & q-values
outliers <- outliersQ
eboulanger's avatar
eboulanger committed
75
76
77
78
79
80
81

# extract corresponding SNP names and positions
vcf <- read.vcfR(path_to_file, verbose=F)
loci <- as.data.frame(vcf@fix[,1:2])
outlier_loci <- loci[outliers,]

# output positions table
eboulanger's avatar
eboulanger committed
82
write.table(outlier_loci, file = paste0("outl_pos_pcadpt_",speciesCode,".txt"), sep = "\t", quote = F, row.names = F, col.names = F)
eboulanger's avatar
eboulanger committed
83
print(paste0(length(outliers), " outlier positions exported to outl_pos_pcadpt_",speciesCode), quote = 0)