Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
seaconnect
annotationVariant
Commits
3a492626
Commit
3a492626
authored
Jun 17, 2021
by
eboulanger
Browse files
compare obtained SNP
parent
9f4dda23
Changes
2
Hide whitespace changes
Inline
Side-by-side
compare_snps.R
0 → 100644
View file @
3a492626
## load new and old SNP positions
## create new SNP ID
## compare SNP identified with both methods, see if overlap
setwd
(
"~/Documents/project_SEACONNECT/seaConnect--annotationVariant/annotationvariant/"
)
# read in SNP positions
tbl_dip_cds
<-
read.table
(
"diplodus/diplodus_CDS_pos.txt"
)
tbl_dip_cdsns
<-
read.table
(
"diplodus/diplodus_CDS_nonsynonymous_pos.txt"
)
tbl_dip_pca
<-
read.table
(
"diplodus/diplodus_pcadapt_pos.txt"
)
tbl_dip_pcaupns
<-
read.table
(
"../../seaConnect--radFishComp/200_genome_BLAST/04-filter_uniprot/diplodus_CDS_uniprot_nonsynonymous.tsv"
)
tbl_mul_cds
<-
read.table
(
"mullus/mullus_CDS_pos.txt"
)
tbl_mul_cdsns
<-
read.table
(
"mullus/mullus_CDS_nonsynonymous_pos.txt"
)
tbl_mul_pca
<-
read.table
(
"mullus/mullus_pcadapt_pos.txt"
)
# create new ID
dip_cds
<-
paste
(
tbl_dip_cds
$
V1
,
tbl_dip_cds
$
V2
,
sep
=
"_"
)
dip_pca
<-
paste
(
tbl_dip_pca
$
V1
,
tbl_dip_pca
$
V2
,
sep
=
"_"
)
dip_cdsns
<-
paste
(
tbl_dip_cdsns
$
V1
,
tbl_dip_cdsns
$
V2
,
sep
=
"_"
)
dip_pcaupns
<-
paste
(
tbl_dip_pcaupns
$
V1
,
tbl_dip_pcaupns
$
V2
,
sep
=
"_"
)
mul_cds
<-
paste
(
tbl_mul_cds
$
V1
,
tbl_mul_cds
$
V2
,
sep
=
"_"
)
mul_pca
<-
paste
(
tbl_mul_pca
$
V1
,
tbl_mul_pca
$
V2
,
sep
=
"_"
)
mul_cdsns
<-
paste
(
tbl_mul_cdsns
$
V1
,
tbl_mul_cdsns
$
V2
,
sep
=
"_"
)
# compare obtained SNPs with both methods
dip_cdsXpca
<-
intersect
(
dip_cds
,
dip_pca
)
dip_cdsnsXpca
<-
intersect
(
dip_cds_ns
,
dip_pca
)
dip_cdsXpcaXcdsns
<-
intersect
(
intersect
(
dip_cds
,
dip_pca
),
dip_cds_ns
)
dip_A
<-
setdiff
(
setdiff
(
dip_cds
,
dip_cds_ns
),
dip_pca
)
dip_B
<-
setdiff
(
setdiff
(
dip_cds_ns
,
dip_pca
),
dip_cds
)
dip_C
<-
setdiff
(
dip_pca
,
dip_cds
)
dip_AB
<-
setdiff
(
intersect
(
dip_cds
,
dip_cds_ns
),
dip_pca
)
dip_AC
<-
setdiff
(
intersect
(
dip_cds
,
dip_pca
),
dip_cds_ns
)
dip_BC
<-
setdiff
(
intersect
(
dip_cds_ns
,
dip_pca
),
dip_cds
)
dip_ABC
<-
intersect
(
intersect
(
dip_cds
,
dip_pca
),
dip_cds_ns
)
dip_plusplus
<-
intersect
(
dip_cds_ns
,
dip_pca_cds_up_ns
)
mul_cdsXpca
<-
intersect
(
mul_cds
,
mul_pca
)
mul_cdsnsXpca
<-
intersect
(
mul_cds_ns
,
mul_pca
)
mul_cdsXpcaXcdsns
<-
intersect
(
intersect
(
mul_cds
,
mul_pca
),
mul_cds_ns
)
mul_A
<-
setdiff
(
setdiff
(
mul_cds
,
mul_cds_ns
),
mul_pca
)
mul_B
<-
setdiff
(
setdiff
(
mul_cds_ns
,
mul_pca
),
mul_cds
)
mul_C
<-
setdiff
(
mul_pca
,
mul_cds
)
mul_AB
<-
setdiff
(
intersect
(
mul_cds
,
mul_cds_ns
),
mul_pca
)
mul_AC
<-
setdiff
(
intersect
(
mul_cds
,
mul_pca
),
mul_cds_ns
)
mul_BC
<-
setdiff
(
intersect
(
mul_cds_ns
,
mul_pca
),
mul_cds
)
mul_ABC
<-
intersect
(
intersect
(
mul_cds
,
mul_pca
),
mul_cds_ns
)
## euler diagrams
library
(
eulerr
)
dip_fit
<-
euler
(
c
(
"A"
=
length
(
dip_cds
)
-
length
(
dip_cdsXpca
),
"B"
=
length
(
dip_pca
)
-
length
(
dip_cdsXpca
),
"A&B"
=
length
(
dip_cdsXpca
)))
plot
(
dip_fit
,
quantities
=
TRUE
,
labels
=
TRUE
)
dip_fit2
<-
euler
(
c
(
"CDS"
=
length
(
dip_A
),
"CDS_NS"
=
length
(
dip_B
),
"PCA"
=
length
(
dip_C
),
"CDS&CDS_NS"
=
length
(
dip_AB
),
"CDS&PCA"
=
length
(
dip_AC
),
"CDS_NS&PCA"
=
length
(
dip_BC
),
"CDS&CDS_NS&PCA"
=
length
(
dip_ABC
)))
plot
(
dip_fit2
,
quantities
=
TRUE
,
labels
=
TRUE
)
mul_fit
<-
euler
(
c
(
"A"
=
length
(
mul_cds_id
)
-
length
(
mul_comm
),
"B"
=
length
(
mul_pca_id
)
-
length
(
mul_comm
),
"A&B"
=
length
(
mul_comm
)))
plot
(
mul_fit
,
quantities
=
TRUE
,
labels
=
TRUE
)
mul_fit2
<-
euler
(
c
(
"CDS"
=
length
(
mul_A
),
"CDS_NS"
=
length
(
mul_B
),
"PCA"
=
length
(
mul_C
),
"CDS&CDS_NS"
=
length
(
mul_AB
),
"CDS&PCA"
=
length
(
mul_AC
),
"CDS_NS&PCA"
=
length
(
mul_BC
),
"CDS&CDS_NS&PCA"
=
length
(
mul_ABC
)))
plot
(
mul_fit2
,
quantities
=
TRUE
,
labels
=
TRUE
)
compare_snps.sh
0 → 100644
View file @
3a492626
# compare lists of SNP
## Diplodus
# get CDS positions
grep
-v
"^##"
"diplodus/diplodus_CDS.vcf"
|
cut
-f1-2
|
sed
'1d'
>
"diplodus/diplodus_CDS_pos.txt"
# get pcadapt outlier positions
grep
-v
"^##"
"../../seaConnect--dataPrep/04-finalPrep/01-Diplodus/dip_adaptive_413.vcf"
|
cut
-f1-2
|
sed
'1d'
>
"diplodus/diplodus_pcadapt_pos.txt"
## Mullus
# get CDS positions
grep
-v
"^##"
"mullus/mullus_CDS.vcf"
|
cut
-f1-2
|
sed
'1d'
>
"mullus/mullus_CDS_pos.txt"
# get pcadapt outlier positions
grep
-v
"^##"
"../../seaConnect--dataPrep/04-finalPrep/02-Mullus/mul_adaptive_291.vcf"
|
cut
-f1-2
|
sed
'1d'
>
"mullus/mullus_pcadapt_pos.txt"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment