Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
khalid
radsex
Commits
1fcb1067
Commit
1fcb1067
authored
Feb 07, 2018
by
Romain Feron
Browse files
Added some scripts I use to generate plots
parent
a6fdb1e1
Changes
9
Show whitespace changes
Inline
Side-by-side
r_scripts/clustering.R
0 → 100644
View file @
1fcb1067
source
(
"/home/rferon/work/analyses/radseq_all/r_scripts/utils.R"
)
coverage_heatmap
=
function
(
coverage_file
,
popmap_file
,
output_file
=
"none"
,
remove_coverage_1
=
TRUE
,
sex_ratio_range
=
0.25
,
min_individuals_ratio
=
0.5
,
special_SD_system
=
FALSE
,
coverage_filtering_range
=
0.1
,
max_coverage
=
150
,
distance
=
"binary"
,
clustering
=
"ward.D"
,
x_axis
=
'individuals'
,
individuals.dendrogram
=
TRUE
,
loci.dendrogram
=
TRUE
,
males.color
=
"steelblue3"
,
females.color
=
"orange1"
,
palette
=
c
(
"lightgrey"
,
"royalblue2"
,
"black"
,
"gold2"
,
"red3"
),
fasta_file_path
=
"none"
,
coverage_file_path
=
"none"
,
species
=
"none"
)
{
data
=
load_coverage_file
(
coverage_file
)
popmap
=
load_popmap_file
(
popmap_file
)
data
=
filter
(
data
,
popmap
,
min_ratio
=
min_individuals_ratio
,
remove_coverage_1
=
remove_coverage_1
,
sex_ratio_range
=
sex_ratio_range
,
special
=
special_SD_system
,
ratio_range
=
coverage_filtering_range
,
max_coverage
=
max_coverage
,
fasta_file_path
=
fasta_file_path
,
coverage_file_path
=
coverage_file_path
,
species
=
species
)
if
(
dim
(
data
)[
1
]
>
0
)
{
if
(
output_file
!=
"none"
)
{
png
(
output_file
,
width
=
2400
,
height
=
1200
,
res
=
90
)
}
plot_heatmap
(
data
,
popmap
,
distance
=
distance
,
clustering
=
clustering
,
x_axis
=
x_axis
,
individuals.dendrogram
=
individuals.dendrogram
,
loci.dendrogram
=
loci.dendrogram
,
males.color
=
males.color
,
females.color
=
females.color
,
palette
=
palette
)
if
(
output_file
!=
"none"
)
{
o
=
dev.off
()
}
}
}
presence_heatmap
=
function
(
coverage_file
,
popmap_file
,
read.length
=
94
,
distance
=
"binary"
,
clustering
=
"ward.D"
,
x_axis
=
'individuals'
,
individuals.dendrogram
=
TRUE
,
loci.dendrogram
=
TRUE
,
males.color
=
"steelblue3"
,
females.color
=
"orange1"
,
absence.color
=
"cornsilk"
,
presence.color
=
"skyblue3"
)
{
data
=
load_coverage_file
(
coverage_file
)
popmap
=
load_popmap_file
(
popmap_file
)
data
=
convert_to_presence_data
(
data
,
read.length
=
read.length
)
if
(
dim
(
data
)[
1
]
>
0
)
{
plot_heatmap
(
data
,
popmap
,
distance
=
distance
,
clustering
=
clustering
,
x_axis
=
x_axis
,
individuals.dendrogram
=
individuals.dendrogram
,
loci.dendrogram
=
loci.dendrogram
,
males.color
=
males.color
,
females.color
=
females.color
,
absence.color
=
absence.color
,
presence.color
=
presence.color
)
}
}
export_to_fasta
=
function
(
data
,
file_path
)
{
output_file
=
file
(
file_path
)
lines
=
c
()
for
(
i
in
1
:
dim
(
data
)[
1
])
{
name
=
">"
name
=
paste
(
name
,
data
$
Name
[
i
],
sep
=
""
)
sequence
=
as.character
(
data
$
Sequence
[
i
])
lines
=
c
(
lines
,
name
,
sequence
)
}
writeLines
(
lines
,
output_file
)
close
(
output_file
)
}
export_coverage
=
function
(
data
,
file_path
)
{
names
(
data
)[
1
]
=
"Haplotype"
write.table
(
data
,
file
=
file_path
,
quote
=
FALSE
,
sep
=
"\t"
,
row.names
=
FALSE
)
}
filter
=
function
(
data
,
popmap
,
min_ratio
=
0.5
,
special
=
FALSE
,
ratio_range
=
0.1
,
sex_ratio_range
=
0.25
,
max_coverage
=
150
,
remove_coverage_1
=
TRUE
,
fasta_file_path
=
"none"
,
coverage_file_path
=
"none"
,
species
=
"none"
)
{
fasta
=
TRUE
if
(
fasta_file_path
==
"none"
)
{
fasta
=
FALSE
}
depths
=
data
[,
7
:
dim
(
data
)[
2
]]
depths
[
depths
==
0
]
=
NA
if
(
remove_coverage_1
){
depths
[
depths
==
1
]
=
NA
}
# REMOVE LOCI WITH LESS MIN.RATIO MALES AND LESS THAN MIN.RATIO FEMALES
min_males
=
floor
(
sum
(
popmap
$
Sex
==
'M'
)
*
min_ratio
)
min_females
=
floor
(
sum
(
popmap
$
Sex
==
'F'
)
*
min_ratio
)
males_col
=
which
(
names
(
depths
)
%in%
popmap
$
Individual
[
which
(
popmap
$
Sex
==
'M'
)])
females_col
=
which
(
names
(
depths
)
%in%
popmap
$
Individual
[
which
(
popmap
$
Sex
==
'F'
)])
too_few_individuals
=
which
((
rowSums
(
!
is.na
(
depths
[,
males_col
]))
<
min_males
)
&
(
rowSums
(
!
is.na
(
depths
[,
females_col
]))
<
min_females
)
)
data
=
data
[
-
too_few_individuals
,]
depths
=
depths
[
-
too_few_individuals
,]
# IF A VALUE IS > MAX.COVERAGE PERCENTILE, REPLACE WITH MAX.COVERAGE PERCENTILE
depths
[
depths
>
max_coverage
]
=
max_coverage
if
(
!
special
)
{
# Filter XY alleles
males_means
=
rowMeans
(
depths
[,
males_col
],
na.rm
=
TRUE
)
females_means
=
rowMeans
(
depths
[,
females_col
],
na.rm
=
TRUE
)
males_means
[
males_means
==
'NaN'
]
=
0
females_means
[
females_means
==
'NaN'
]
=
0
# Sequences in 50% more males than females
n_males
=
rowSums
(
!
is.na
(
depths
[,
males_col
]))
n_females
=
rowSums
(
!
is.na
(
depths
[,
females_col
]))
sex_ratio
=
(
n_males
/
(
n_males
+
n_females
))
to_keep
=
which
(
sex_ratio
>
1
-
sex_ratio_range
|
sex_ratio
<
sex_ratio_range
)
# Sequences with coverage bias
unbiased_value
=
0.5
ratio
=
(
males_means
)
/
(
males_means
+
females_means
)
to_keep
=
union
(
to_keep
,
which
(
ratio
>
unbiased_value
+
ratio_range
|
ratio
<
unbiased_value
-
ratio_range
))
# Filtering
depths
=
depths
[
to_keep
,]
data
=
data
[
to_keep
,]
}
depths
[
is.na
(
depths
)]
=
0
data
=
data.frame
(
Origin
=
rep
(
""
,
dim
(
data
)[
1
]),
Males
=
rowSums
(
depths
[,
males_col
]
>
0
),
Females
=
rowSums
(
depths
[,
females_col
]
>
0
),
Other_allele
=
rep
(
""
,
dim
(
data
)[
1
]),
data
[,
1
:
6
],
depths
)
individual_names
=
names
(
depths
)
data
$
Origin
=
as.character
(
data
$
Origin
)
data
$
Haplotype_ID
=
as.character
(
data
$
Haplotype_ID
)
data
$
Stack_ID
=
as.character
(
data
$
Stack_ID
)
data
$
Other_allele
=
as.character
(
data
$
Other_allele
)
for
(
i
in
1
:
dim
(
data
)[
1
])
{
data
$
Stack_ID
[
i
]
=
strsplit
(
as.character
(
data
$
Stack_ID
[
i
]),
"_"
)[[
1
]][
1
]
if
(
data
$
Haplotype_ID
[
i
]
==
data
$
Stack_ID
[
i
])
{
data
$
Origin
[
i
]
=
"Heatmap"
}
else
{
data
$
Origin
[
i
]
=
"Blasting"
}
}
heatmap
=
subset
(
data
,
data
$
Origin
==
'Heatmap'
)
blasting
=
subset
(
data
,
data
$
Origin
==
'Blasting'
&
!
(
data
$
Haplotype_ID
%in%
heatmap
$
Stack_ID
))
blasting
$
Other_allele
=
blasting
$
Stack_ID
# When a haplotype was rescued from multiple haplotypes, only keep best matching scores
to_remove
=
c
()
haplotypes
=
unique
(
blasting
$
Haplotype_ID
)
for
(
i
in
1
:
length
(
haplotypes
))
{
temp
=
max
(
blasting
$
Matches
[
blasting
$
Haplotype_ID
==
haplotypes
[
i
]])
to_remove
=
c
(
to_remove
,
which
(
blasting
$
Haplotype_ID
==
haplotypes
[
i
]
&
blasting
$
Matches
<
temp
))
max
=
which
(
blasting
$
Haplotype_ID
==
haplotypes
[
i
]
&
blasting
$
Matches
==
temp
)
if
(
length
(
max
>
1
))
{
to_remove
=
c
(
to_remove
,
max
[
-1
])
blasting
$
Other_allele
[
max
[
1
]]
=
paste
(
"Complementary:"
,
paste
(
blasting
$
Stack_ID
[
max
],
collapse
=
","
),
sep
=
""
)
}
}
if
(
length
(
to_remove
)
>
0
)
{
blasting
=
blasting
[
-
to_remove
,
]}
for
(
i
in
1
:
dim
(
heatmap
)[
1
])
{
candidates
=
which
(
blasting
$
Stack_ID
==
heatmap
$
Haplotype_ID
[
i
])
if
(
length
(
candidates
)
==
0
)
{
heatmap
$
Other_allele
[
i
]
=
"Specific"
}
else
{
heatmap
$
Other_allele
[
i
]
=
paste
(
"Complementary:"
,
paste
(
blasting
$
Haplotype_ID
[
candidates
],
collapse
=
","
),
sep
=
""
)
}
}
combined
=
rbind
(
heatmap
,
blasting
)
new_ids
=
paste
(
species
,
"_"
,
combined
$
Haplotype_ID
,
"_"
,
combined
$
Males
,
"M_"
,
combined
$
Females
,
"F_"
,
combined
$
Origin
,
"_"
,
combined
$
Other_allele
,
sep
=
""
)
combined
=
data.frame
(
Name
=
new_ids
,
combined
)
names
(
combined
)[
12
:
dim
(
combined
)[
2
]]
=
individual_names
export_to_fasta
(
subset
(
combined
,
select
=
c
(
"Name"
,
"Sequence"
)),
fasta_file_path
)
export_coverage
(
combined
[,
c
(
1
,
12
:
dim
(
combined
)[
2
])],
coverage_file_path
)
return
(
combined
[,
c
(
7
,
12
:
dim
(
combined
)[
2
])])
}
convert_to_presence_data
=
function
(
data
,
read.length
=
94
)
{
data
=
subset
(
data
,
data
$
Matches
==
read.length
)
depths
=
data
[,
7
:
dim
(
data
)[
2
]]
depths
[
depths
>
0
]
=
1
new_data
=
data.frame
(
data
[,
1
],
depths
)
names
(
new_data
)
=
c
(
"Haplotype_ID"
,
names
(
data
[,
7
:
dim
(
data
)[
2
]]))
return
(
new_data
)
}
plot_heatmap
=
function
(
data
,
popmap
=
data.frame
(),
x_axis
=
'individuals'
,
distance
=
"binary"
,
clustering
=
"ward.D"
,
individuals.dendrogram
=
TRUE
,
loci.dendrogram
=
TRUE
,
males.color
=
"steelblue3"
,
females.color
=
"orange1"
,
palette
=
c
(
"lightgrey"
,
"royalblue2"
,
"black"
,
"gold2"
,
"red3"
),
absence.color
=
"cornsilk"
,
presence.color
=
"skyblue3"
)
{
x
=
as.matrix
(
data
[,
-1
],
rownames.force
=
TRUE
)
rownames
(
x
)
=
data
$
Haplotype_ID
if
(
dim
(
data
)[
1
]
>
200
)
{
tile_separation_size
=
0.05
}
else
{
tile_separation_size
=
0.1
}
individuals.d
=
dist
(
t
(
x
),
method
=
distance
)
individuals.clusters
=
hclust
(
individuals.d
,
method
=
clustering
)
ordered
=
individuals.clusters
$
labels
[
individuals.clusters
$
order
]
temp
=
c
(
1
)
for
(
i
in
1
:
length
(
ordered
))
{
temp
=
c
(
temp
,
which
(
names
(
data
)
==
ordered
[
i
]))
}
individuals.order
=
temp
loci.d
=
dist
(
x
,
method
=
distance
)
loci.clusters
=
hclust
(
loci.d
,
method
=
clustering
)
ordered
=
loci.clusters
$
labels
[
loci.clusters
$
order
]
temp
=
c
()
for
(
i
in
1
:
length
(
ordered
))
{
temp
=
c
(
temp
,
which
(
data
$
Haplotype_ID
==
ordered
[
i
]))
}
loci.order
=
temp
data
=
data
[,
individuals.order
]
data
=
data
[
loci.order
,]
melted
=
suppressMessages
(
melt
(
data
,
id.vars
=
c
(
"Haplotype_ID"
),
variable.name
=
"Individuals"
,
value.name
=
"Values"
))
melted
$
Haplotype_ID
=
factor
(
melted
$
Haplotype_ID
,
levels
=
data
$
Haplotype_ID
)
melted
$
Individuals
=
factor
(
as.character
(
melted
$
Individuals
),
levels
=
as.character
(
names
(
data
)[
-1
]))
if
(
length
(
unique
(
melted
$
Values
))
==
2
)
{
melted
$
Values
=
factor
(
melted
$
Values
,
levels
=
unique
(
melted
$
Values
))
}
else
{
melted
$
Values
=
as.numeric
(
melted
$
Values
)
}
all_values
=
melted
$
Values
all_values
[
all_values
==
0
]
=
NA
stats
=
summary
(
all_values
,
na.rm
=
TRUE
)
if
(
length
(
popmap
)
>
0
)
{
sexes
=
c
(
popmap
$
Sex
)
names
(
sexes
)
=
c
(
popmap
$
Individual
)
}
sex_palette
=
c
(
"M"
=
males.color
,
"F"
=
females.color
)
if
(
x_axis
==
"individuals"
)
{
g
=
ggplot
(
melted
,
aes
(
x
=
Individuals
,
y
=
Haplotype_ID
,
fill
=
Values
))
+
geom_tile
(
color
=
"grey60"
,
size
=
tile_separation_size
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
60
,
hjust
=
1
))
+
ylab
(
"Loci"
)
+
xlab
(
"Individuals"
)
if
(
length
(
unique
(
all_values
))
!=
2
)
{
g
=
g
+
scale_fill_gradientn
(
name
=
"Coverage"
,
colours
=
palette
,
values
=
c
(
0
,
0.001
,
stats
[
3
]
/
stats
[
6
],
stats
[
5
]
/
stats
[
6
],
1
))
}
else
{
g
=
g
+
scale_fill_manual
(
breaks
=
c
(
"0"
,
"1"
),
labels
=
c
(
"Absent"
,
"Present"
),
values
=
c
(
"0"
=
absence.color
,
"1"
=
presence.color
))
+
theme
(
legend.title
=
element_blank
())
}
if
(
length
(
popmap
)
>
0
)
{
g
=
g
+
theme
(
axis.text.x
=
element_text
(
colour
=
sex_palette
[
sexes
[
names
(
data
)[
-1
]]]))
}
a
=
ggplotGrob
(
g
)
loci_start
=
4
if
(
individuals.dendrogram
==
TRUE
)
{
h
=
suppressMessages
(
ggdendrogram
(
individuals.clusters
,
labels
=
FALSE
,
leaf_labels
=
FALSE
,
theme_dendro
=
TRUE
,
rotate
=
FALSE
)
+
theme
(
plot.margin
=
unit
(
c
(
0.1
,
0
,
0
,
0
),
'cm'
),
axis.text.x
=
element_blank
(),
axis.text.y
=
element_blank
(),
axis.title.x
=
element_blank
())
+
scale_y_continuous
(
expand
=
c
(
0
,
0.5
))
+
scale_x_continuous
(
expand
=
c
(
0
,
0.5
)))
b
=
ggplotGrob
(
h
)
a
=
gtable_add_rows
(
a
,
unit
(
1.1
,
"cm"
),
pos
=
3
)
a
=
gtable_add_grob
(
a
,
b
,
t
=
3
,
l
=
4
,
b
=
5
,
r
=
5
)
loci_start
=
loci_start
+
1
}
if
(
loci.dendrogram
==
TRUE
)
{
i
=
suppressMessages
(
ggdendrogram
(
loci.clusters
,
labels
=
FALSE
,
leaf_labels
=
FALSE
,
theme_dendro
=
TRUE
,
rotate
=
FALSE
)
+
theme
(
plot.margin
=
unit
(
c
(
0.1
,
0
,
0
,
0
),
'cm'
),
axis.text.x
=
element_blank
(),
axis.text.y
=
element_blank
(),
axis.title.x
=
element_blank
())
+
coord_flip
()
+
scale_y_continuous
(
expand
=
c
(
0
,
0.5
))
+
scale_x_continuous
(
expand
=
c
(
0
,
0
)))
c
=
ggplotGrob
(
i
)
a
=
gtable_add_cols
(
a
,
unit
(
1.1
,
"cm"
),
pos
=
5
)
a
=
gtable_add_grob
(
a
,
c
,
t
=
loci_start
,
l
=
5
,
b
=
loci_start
+
2
,
r
=
6
)
}
grid.newpage
()
grid.draw
(
a
)
}
else
if
(
x_axis
==
"loci"
)
{
g
=
ggplot
(
melted
,
aes
(
x
=
Haplotype_ID
,
y
=
Individuals
,
fill
=
Values
))
+
geom_tile
(
color
=
"grey60"
,
size
=
tile_separation_size
)
+
theme
(
axis.text.x
=
element_text
(
angle
=
60
,
hjust
=
1
))
+
xlab
(
"Loci"
)
+
ylab
(
"Individuals"
)
if
(
length
(
unique
(
all_values
))
!=
2
)
{
g
=
g
+
scale_fill_gradientn
(
name
=
"Coverage"
,
colours
=
palette
,
values
=
c
(
0
,
0.001
,
stats
[
4
]
/
stats
[
6
],
stats
[
5
]
/
stats
[
6
],
1
))
}
else
{
g
=
g
+
scale_fill_manual
(
breaks
=
c
(
"0"
,
"1"
),
labels
=
c
(
"Absent"
,
"Present"
),
values
=
c
(
"0"
=
absence.color
,
"1"
=
presence.color
))
+
theme
(
legend.title
=
element_blank
())
}
if
(
length
(
popmap
)
>
0
)
{
g
=
g
+
theme
(
axis.text.y
=
element_text
(
colour
=
sex_palette
[
sexes
[
names
(
data
)[
-1
]]]))
}
a
=
ggplotGrob
(
g
)
loci_start
=
4
if
(
loci.dendrogram
==
TRUE
)
{
h
=
suppressMessages
(
ggdendrogram
(
individuals.clusters
,
labels
=
FALSE
,
leaf_labels
=
FALSE
,
theme_dendro
=
TRUE
,
rotate
=
FALSE
)
+
theme
(
plot.margin
=
unit
(
c
(
0.1
,
0
,
0
,
0
),
'cm'
),
axis.text.x
=
element_blank
(),
axis.text.y
=
element_blank
(),
axis.title.x
=
element_blank
())
+
scale_y_continuous
(
expand
=
c
(
0
,
0.5
))
+
scale_x_continuous
(
expand
=
c
(
0
,
0.5
)))
b
=
ggplotGrob
(
h
)
a
=
gtable_add_rows
(
a
,
unit
(
1.1
,
"cm"
),
pos
=
3
)
a
=
gtable_add_grob
(
a
,
b
,
t
=
3
,
l
=
4
,
b
=
5
,
r
=
5
)
loci_start
=
loci_start
+
1
}
if
(
individuals.dendrogram
==
TRUE
)
{
i
=
suppressMessages
(
ggdendrogram
(
loci.clusters
,
labels
=
FALSE
,
leaf_labels
=
FALSE
,
theme_dendro
=
TRUE
,
rotate
=
FALSE
)
+
theme
(
plot.margin
=
unit
(
c
(
0.1
,
0
,
0
,
0
),
'cm'
),
axis.text.x
=
element_blank
(),
axis.text.y
=
element_blank
(),
axis.title.x
=
element_blank
())
+
coord_flip
()
+
scale_y_continuous
(
expand
=
c
(
0
,
0.5
))
+
scale_x_continuous
(
expand
=
c
(
0
,
0
)))
c
=
ggplotGrob
(
i
)
a
=
gtable_add_cols
(
a
,
unit
(
1.1
,
"cm"
),
pos
=
5
)
a
=
gtable_add_grob
(
a
,
c
,
t
=
loci_start
,
l
=
5
,
b
=
loci_start
+
2
,
r
=
6
)
}
grid.newpage
()
grid.draw
(
a
)
}
}
plot_clustering_clean
=
function
(
data
,
popmap
=
data.frame
(),
distance
=
"binary"
,
clustering
=
"ward.D"
,
males.color
=
"steelblue3"
,
females.color
=
"orange1"
,
palette
=
c
(
"white"
,
"royalblue2"
,
"black"
,
"gold2"
,
"red3"
))
{
x
=
as.matrix
(
data
[,
-1
],
rownames.force
=
TRUE
)
rownames
(
x
)
=
data
$
Haplotype_ID
tile_separation_size
=
0.02
individuals.d
=
dist
(
t
(
x
),
method
=
distance
)
individuals.clusters
=
hclust
(
individuals.d
,
method
=
clustering
)
ordered
=
individuals.clusters
$
labels
[
individuals.clusters
$
order
]
temp
=
c
(
1
)
for
(
i
in
1
:
length
(
ordered
))
{
temp
=
c
(
temp
,
which
(
names
(
data
)
==
ordered
[
i
]))
}
data
=
data
[,
temp
]
loci.d
=
dist
(
x
,
method
=
distance
)
loci.clusters
=
hclust
(
loci.d
,
method
=
clustering
)
ordered
=
loci.clusters
$
labels
[
loci.clusters
$
order
]
temp
=
c
()
for
(
i
in
1
:
length
(
ordered
))
{
temp
=
c
(
temp
,
which
(
data
$
Haplotype_ID
==
ordered
[
i
]))
}
data
=
data
[
temp
,]
melted
=
suppressMessages
(
melt
(
data
,
id.vars
=
c
(
"Haplotype_ID"
),
variable.name
=
"Individuals"
,
value.name
=
"Values"
))
melted
$
Haplotype_ID
=
factor
(
melted
$
Haplotype_ID
,
levels
=
data
$
Haplotype_ID
)
melted
$
Individuals
=
factor
(
as.character
(
melted
$
Individuals
),
levels
=
as.character
(
names
(
data
)[
-1
]))
melted
$
Values
=
as.numeric
(
melted
$
Values
)
stats
=
summary
(
replace
(
melted
$
Values
,
which
(
melted
$
Values
==
0
),
NA
),
na.rm
=
TRUE
)
sexes
=
c
(
popmap
$
Sex
)
names
(
sexes
)
=
c
(
popmap
$
Individual
)
sex_palette
=
c
(
"M"
=
males.color
,
"F"
=
females.color
)
g
=
ggplot
(
melted
,
aes
(
x
=
Individuals
,
y
=
Haplotype_ID
,
fill
=
Values
))
+
geom_tile
(
color
=
"grey30"
,
size
=
tile_separation_size
)
+
theme_bw
()
+
theme
(
axis.text.x
=
element_text
(
angle
=
90
,
vjust
=
0.5
,
colour
=
sex_palette
[
sexes
[
names
(
data
)[
-1
]]],
size
=
10
),
axis.text.y
=
element_blank
(),
axis.ticks.y
=
element_blank
(),
axis.title.y
=
element_blank
(),
axis.title.x
=
element_blank
(),
plot.margin
=
margin
(
15
,
25
,
15
,
15
),
panel.border
=
element_rect
(
size
=
0.75
,
color
=
"black"
),
legend.position
=
c
(
0.85
,
0.15
),
legend.margin
=
margin
(
0
,
0
,
0
,
0
),
legend.title
=
element_text
(
size
=
14
,
face
=
"bold"
),
legend.text
=
element_text
(
size
=
11
),
legend.key.height
=
unit
(
0.06
,
"npc"
),
legend.key.width
=
unit
(
0.04
,
"npc"
))
+
scale_fill_gradientn
(
name
=
"Coverage"
,
colours
=
palette
,
values
=
c
(
0
,
0.001
,
stats
[
4
]
/
stats
[
6
],
stats
[
5
]
/
stats
[
6
],
1
))
a
=
ggplotGrob
(
g
)
h
=
suppressMessages
(
ggdendrogram
(
individuals.clusters
,
labels
=
FALSE
,
leaf_labels
=
FALSE
,
theme_dendro
=
TRUE
,
rotate
=
FALSE
)
+
theme
(
plot.margin
=
unit
(
c
(
0.1
,
0
,
0
,
0
),
'cm'
),
axis.text.x
=
element_blank
(),
axis.text.y
=
element_blank
(),
axis.title.x
=
element_blank
())
+
scale_y_reverse
(
expand
=
c
(
0
,
0.5
))
+
scale_x_continuous
(
expand
=
c
(
0
,
0
)))
a
=
gtable_add_rows
(
a
,
unit
(
0.06
,
"npc"
),
pos
=
8
)
a
=
gtable_add_grob
(
a
,
ggplotGrob
(
h
),
t
=
8
,
l
=
4
,
b
=
9
,
r
=
5
)
# grid.newpage()
# grid.draw(a)
return
(
a
)
}
r_scripts/clustering_species.R
0 → 100644
View file @
1fcb1067
source
(
"/home/rferon/work/analyses/radseq_all/r_scripts/clustering.R"
)
species
=
"esox_masquinongy_1"
coverage_file
=
paste
(
"~/work/analyses/radseq_all/results/rescue/"
,
species
,
".tsv"
,
sep
=
""
)
popmap_file
=
paste
(
"~/work/analyses/radseq_all/data/popmaps/"
,
species
,
"_popmap.csv"
,
sep
=
""
)
presence_output_file
=
paste
(
"~/work/analyses/radseq_all/figures/clustering/"
,
species
,
"_presence.png"
,
sep
=
""
)
coverage_output_file
=
paste
(
"~/work/analyses/radseq_all/figures/clustering/"
,
species
,
"_coverage.png"
,
sep
=
""
)
# png(coverage_output_file, width=2400, height=1200, res=100)
coverage_heatmap
(
coverage_file
,
popmap_file
,
distance
=
"euclidian"
,
clustering
=
"ward.D"
)
# dev.off()
# png(presence_output_file, width=2400, height=1200, res=100)
# presence_heatmap(coverage_file, popmap_file, distance="binary", clustering="ward.D", read.length=96)
# dev.off()
r_scripts/frequencies.R
0 → 100644
View file @
1fcb1067
source
(
"/home/rferon/work/analyses/radseq_all/r_scripts/utils.R"
)
aggregate_frequencies
=
function
(
input_folder
,
output_file
=
"none"
,
title
=
"none"
,
species
=
"none"
)
{
input_folder
=
paste
(
sub
(
"/$"
,
""
,
input_folder
))
data
=
parse_folder
(
input_folder
)
aggregate_frequencies_plot
(
data
,
output_file
=
output_file
,
title
=
title
,
species
=
species
)
}
parse_folder
=
function
(
folder
)
{
output
=
list
()
plot_m_2
=
single_plot
(
folder
,
2
)
plot_m_3
=
single_plot
(
folder
,
3
)
plot_m_5
=
single_plot
(
folder
,
5
)
plot_m_10
=
single_plot
(
folder
,
10
)
n_plots
=
0
if
(
is.ggplot
(
plot_m_2
))
{
n_plots
=
n_plots
+
1
}
if
(
is.ggplot
(
plot_m_3
))
{
n_plots
=
n_plots
+
1
}
if
(
is.ggplot
(
plot_m_5
))
{
n_plots
=
n_plots
+
1
}
if
(
is.ggplot
(
plot_m_10
))
{
n_plots
=
n_plots
+
1
}
output
$
plot_m_2
=
plot_m_2
output
$
plot_m_3
=
plot_m_3
output
$
plot_m_5
=
plot_m_5
output
$
plot_m_10
=
plot_m_10
output
$
n_plots
=
n_plots
return
(
output
)
}
single_plot
=
function
(
folder
,
m_value
)
{
frequencies_file_path
=
paste
(
folder
,
paste
(
"m_"
,
m_value
,
"_frequencies.tsv"
,
sep
=
""
),
sep
=
"/"
)
if
(
file.exists
(
frequencies_file_path
)){
g
=
generate_plot
(
frequencies_file_path
,
title
=
get_title
(
m_value
=
m_value
))
}
else
{
print
(
paste
(
"Frequencies file"
,
frequencies_file_path
,
"does not exist."
,
sep
=
" "
))
g
=
"none"
}
return
(
g
)
}
aggregate_frequencies_plot
=
function
(
data
,
output_file
=
"none"
,
title
=
"none"
,
species
=
"none"
)
{
width_c
=
1600
height_c
=
500
if
(
title
==
"none"
&
species
==
"none"
)
{
title
=
"Haplotypes frequencies"
}
title
=
get_title
(
species
=
species
,
base
=
title
)
if
(
output_file
!=
"none"
)
{
png
(
output_file
,
width
=
width_c
,
height
=
height_c
*
data
$
n_plots
,
res
=
160
)
}
if
(
is.ggplot
(
data
$
plot_m_5
))
{
if
(
is.ggplot
(
data
$
plot_m_10
))
{
if
(
is.ggplot
(
data
$
plot_m_3
))
{
if
(
is.ggplot
(
data
$
plot_m_2
))
{
grid.arrange
(
data
$
plot_m_2
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_3
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_5
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_10
+
theme
(
legend.position
=
"none"
),
ncol
=
1
,
nrow
=
4
,
top
=
textGrob
(
title
,
gp
=
gpar
(
fontsize
=
15
)),
bottom
=
"Number of individuals"
)
}
else
{
grid.arrange
(
data
$
plot_m_3
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_5
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_10
+
theme
(
legend.position
=
"none"
),
ncol
=
1
,
nrow
=
3
,
top
=
textGrob
(
title
,
gp
=
gpar
(
fontsize
=
15
)),
bottom
=
"Number of individuals"
)
}
}
else
{
if
(
is.ggplot
(
data
$
plot_m_2
))
{
grid.arrange
(
data
$
plot_m_2
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_5
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_10
+
theme
(
legend.position
=
"none"
),
ncol
=
1
,
nrow
=
3
,
top
=
textGrob
(
title
,
gp
=
gpar
(
fontsize
=
15
)),
bottom
=
"Number of individuals"
)
}
else
{
grid.arrange
(
data
$
plot_m_5
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_10
+
theme
(
legend.position
=
"none"
),
ncol
=
1
,
nrow
=
2
,
top
=
textGrob
(
title
,
gp
=
gpar
(
fontsize
=
15
)),
bottom
=
"Number of individuals"
)
}
}
}
else
{
if
(
is.ggplot
(
data
$
plot_m_3
))
{
if
(
is.ggplot
(
data
$
plot_m_2
))
{
grid.arrange
(
data
$
plot_m_2
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_3
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_5
+
theme
(
legend.position
=
"none"
),
ncol
=
1
,
nrow
=
3
,
top
=
textGrob
(
title
,
gp
=
gpar
(
fontsize
=
15
)),
bottom
=
"Number of individuals"
)
}
else
{
grid.arrange
(
data
$
plot_m_3
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_5
+
theme
(
legend.position
=
"none"
),
ncol
=
1
,
nrow
=
2
,
top
=
textGrob
(
title
,
gp
=
gpar
(
fontsize
=
15
)),
bottom
=
"Number of individuals"
)
}
}
else
{
if
(
is.ggplot
(
data
$
plot_m_2
))
{
grid.arrange
(
data
$
plot_m_2
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_5
+
theme
(
legend.position
=
"none"
),
ncol
=
1
,
nrow
=
2
,
top
=
textGrob
(
title
,
gp
=
gpar
(
fontsize
=
15
)),
bottom
=
"Number of individuals"
)
}
else
{
print
(
data
$
plot_m_5
)
}
}
}
}
else
{
if
(
is.ggplot
(
data
$
plot_m_10
))
{
if
(
is.ggplot
(
data
$
plot_m_3
))
{
if
(
is.ggplot
(
data
$
plot_m_2
))
{
grid.arrange
(
data
$
plot_m_2
+
theme
(
legend.position
=
"none"
),
data
$
plot_m_3
+
theme
(
legend.position
=
"none"
),