Commit 5e1ed984 authored by Romain Feron's avatar Romain Feron
Browse files

Implemented clustering visualization

parent 7fa90400
......@@ -48,4 +48,4 @@ def analysis(input_dir=None,
elif analysis == 'rescue':
rescue(sequences_file_path, catalog_file_path, individual_files_paths, coverage_file_path, parameters)
elif analysis == 'visualize':
visualization(input_file_path, output_file_path, parameters)
visualization(input_file_path, popmap_file_path, output_file_path, parameters)
......@@ -2,12 +2,14 @@ from radseq_analysis import visualization
def detect_plot_type(input_file):
return 'haplotypes'
return 'clustering'
def analysis(input_file, output_file, parameters):
def analysis(input_file_path, popmap_file_path, output_file_path, parameters):
plot_type = detect_plot_type(input_file)
plot_type = detect_plot_type(input_file_path)
if plot_type == 'haplotypes':
visualization.haplotypes(input_file, output_file, parameters.species)
visualization.haplotypes(input_file_path, output_file_path, parameters.species)
elif plot_type == 'clustering':
visualization.clustering(input_file_path, popmap_file_path, output_file_path)
......@@ -179,13 +179,13 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
usage='''python3 radseq_analysis.py visualize -i input_file -o output_file -m popmap
Options: -i\t--input-file\tPath to a file generated by this pipeline (haplotype_matrix, frequencies, ...)
\t -o\t--output-file\tPath to output file
\t -o\t--output-file\tPath to output file or output directory
\t -m\t--popmap\tPath to population map
''')
parser.add_argument('--input-file', '-i',
help='Path to a file generated by this pipeline')
parser.add_argument('--output-file', '-o',
help='Path to output file')
help='Path to output file / directory')
parser.add_argument('--popmap', '-m',
help='Path to a popmap file')
......
from radseq_analysis.visualization.haplotypes import haplotypes
from radseq_analysis.visualization.clustering import clustering
import os
def clustering(input_file_path, popmap_file_path, output_dir_path):
scripts_d = ''.join(os.path.split(os.path.realpath(__file__))[:-1])
cmd = ('Rscript ' + os.path.join(scripts_d, 'r_scripts', 'clustering_species.R') +
' ' + input_file_path +
' ' + popmap_file_path +
' ' + output_dir_path)
os.system(cmd)
import os
def haplotypes(input_file, output_file, species_name):
def haplotypes(input_file_path, output_file_path, species_name):
scripts_d = ''.join(os.path.split(os.path.realpath(__file__))[:-1])
cmd = ('Rscript ' + os.path.join(scripts_d, 'r_scripts', 'heatmap.R') +
' ' + input_file +
' ' + output_file +
' ' + input_file_path +
' ' + output_file_path +
' ' + species_name)
os.system(cmd)
......@@ -80,6 +80,10 @@ filter_coverage = function(data, popmap, min_ratio=0.5) {
data = data[, c(2, 7:dim(data)[2])]
data = unique(data)
if (dim(data)[1] == 0) {
stop()
}
temp = suppressMessages(melt(data))
temp[temp == 0] = NA
max_coverage = round(quantile(temp$value, c(.99), na.rm = TRUE)[1])
......@@ -153,16 +157,17 @@ plot_heatmap = function(data, popmap=data.frame(), x_axis='individuals',
g = ggplot(melted, aes(x=Individuals, y=Haplotype_ID, fill=Values)) +
geom_tile(color="grey") +
theme(axis.text.x = element_text(angle=60, hjust = 1), legend.title = element_blank()) +
theme(axis.text.x = element_text(angle=60, hjust = 1)) +
ylab("Loci") + xlab("Individuals")
if (length(unique(melted$Values)) != 2) {
temp = melted
temp$Values[temp$Values==0] = NA
stats = summary(temp$Values, na.rm=TRUE)
g = g + scale_fill_gradientn(colours = palette, values = c(0, 0.001, stats[4]/stats[6], stats[5]/stats[6], 1))
g = g + scale_fill_gradientn(name = "Coverage", colours = palette, values = c(0, 0.001, stats[4]/stats[6], stats[5]/stats[6], 1))
} else {
g = g + scale_fill_manual(breaks = c("0", "1"), labels = c("Absent", "Present"), values = c("0"=absence.color, "1"=presence.color))
g = g + scale_fill_manual(breaks = c("0", "1"), labels = c("Absent", "Present"), values = c("0"=absence.color, "1"=presence.color)) +
theme(legend.title=element_blank())
}
if (length(popmap) > 0) {
......@@ -200,16 +205,17 @@ plot_heatmap = function(data, popmap=data.frame(), x_axis='individuals',
g = ggplot(melted, aes(x=Haplotype_ID, y=Individuals, fill=Values)) +
geom_tile(color="grey") +
theme(axis.text.x = element_text(angle=60, hjust = 1), legend.title = element_blank()) +
theme(axis.text.x = element_text(angle=60, hjust = 1)) +
xlab("Loci") + ylab("Individuals")
if (length(unique(melted$Values)) != 2) {
temp = melted
temp$Values[temp$Values==0] = NA
stats = summary(temp$Values, na.rm=TRUE)
g = g + scale_fill_gradientn(colours = palette, values = c(0, 0.001, stats[4]/stats[6], stats[5]/stats[6], 1))
g = g + scale_fill_gradientn(name="Coverage", colours = palette, values = c(0, 0.001, stats[4]/stats[6], stats[5]/stats[6], 1))
} else {
g = g + scale_fill_manual(breaks = c("0", "1"), labels = c("Absent", "Present"), values = c("0"=absence.color, "1"=presence.color))
g = g + scale_fill_manual(breaks = c("0", "1"), labels = c("Absent", "Present"), values = c("0"=absence.color, "1"=presence.color)) +
theme(legend.title=element_blank())
}
if (length(popmap) > 0) {
......@@ -258,10 +264,12 @@ coverage_heatmap = function(coverage_file, popmap_file, min_individuals_ratio=0.
data = filter_coverage(data, popmap, min_ratio=min_individuals_ratio)
data = extract_XY_alleles(data, popmap)
plot_heatmap(data, popmap,
distance=distance, clustering=clustering,
x_axis=x_axis, individuals.dendrogram=individuals.dendrogram, loci.dendrogram=loci.dendrogram,
males.color=males.color, females.color=females.color, palette=palette)
if (dim(data)[1] > 0) {
plot_heatmap(data, popmap,
distance=distance, clustering=clustering,
x_axis=x_axis, individuals.dendrogram=individuals.dendrogram, loci.dendrogram=loci.dendrogram,
males.color=males.color, females.color=females.color, palette=palette)
}
}
......@@ -276,9 +284,11 @@ presence_heatmap = function(coverage_file, popmap_file, read.length=94,
data = convert_to_presence_data(data, read.length=read.length)
plot_heatmap(data, popmap,
distance=distance, clustering=clustering,
x_axis=x_axis, individuals.dendrogram=individuals.dendrogram, loci.dendrogram=loci.dendrogram,
males.color=males.color, females.color=females.color,
absence.color=absence.color, presence.color=presence.color)
if (dim(data)[1] > 0) {
plot_heatmap(data, popmap,
distance=distance, clustering=clustering,
x_axis=x_axis, individuals.dendrogram=individuals.dendrogram, loci.dendrogram=loci.dendrogram,
males.color=males.color, females.color=females.color,
absence.color=absence.color, presence.color=presence.color)
}
}
#!/usr/bin/env Rscript
suppressMessages(require(readr))
suppressMessages(require(ggplot2))
suppressMessages(require(base))
args = commandArgs(trailingOnly=FALSE)
nargs = length(args)
# R script arguments are last in arguments list
input_file_path = args[nargs - 2]
popmap_file_path = args[nargs - 1]
output_dir_path = args[nargs]
if (substr(output_dir_path, nchar(output_dir_path), nchar(output_dir_path)) != "/") {
output_dir_path = cat(output_dir_path, "/")
}
script_dir = ""
for (i in 1:nargs) {
if (substr(args[i], 1, 6) == "--file") {
full_path = substr(args[i], 8, nchar(args[i]))
split_path = strsplit(full_path, "/")
script_dir = paste(split_path[[1]][-length(split_path[[1]])], collapse="/")
}
}
source(paste(script_dir, "clustering.R", sep="/"), chdir=T)
png(paste(output_dir_path, "coverage_heatmap.png", sep=""), width=2400, height=1200, res=90)
coverage_heatmap(input_file_path, popmap_file_path, distance="euclidian", clustering="ward.D")
x = dev.off()
png(paste(output_dir_path, "presence_heatmap.png", sep=""), width=2400, height=1200, res=90)
presence_heatmap(input_file_path, popmap_file_path, distance="binary", clustering="ward.D")
x = dev.off()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment