Commit 7fa90400 authored by Romain Feron's avatar Romain Feron
Browse files

Implemented visualization module structure + heatmap visualization

parent 9e6df59e
......@@ -2,4 +2,5 @@ from .sex_linked_haplotypes import analysis as sex_linked_haplotypes
from .loci_matrix import analysis as loci_matrix
from .stacks_privacy import analysis as stacks_privacy
from .rescue import analysis as rescue
from .visualize import analysis as visualization
from .analysis import analysis
......@@ -4,11 +4,13 @@ from radseq_analysis.modules import sex_linked_haplotypes
from radseq_analysis.modules import loci_matrix
from radseq_analysis.modules import stacks_privacy
from radseq_analysis.modules import rescue
from radseq_analysis.modules import visualization
from radseq_analysis.file_handler import load_popmap
from radseq_analysis.file_handler import load_positions_list
def analysis(input_dir=None,
input_file_path=None,
popmap_file_path=None,
output_file_path=None,
positions_file_path=None,
......@@ -30,11 +32,12 @@ def analysis(input_dir=None,
load_positions_list(positions_file_path, parameters)
# TODO: support more batch numbers + move this inside analyses?
haplotypes_file_path = os.path.join(input_dir, 'batch_0.haplotypes.tsv')
catalog_file_path = os.path.join(input_dir, 'batch_0.catalog.tags.tsv.gz')
individual_files_paths = [os.path.join(input_dir, f) for
f in os.listdir(input_dir) if
'tags' in f and 'catalog' not in f]
if input_dir:
haplotypes_file_path = os.path.join(input_dir, 'batch_0.haplotypes.tsv')
catalog_file_path = os.path.join(input_dir, 'batch_0.catalog.tags.tsv.gz')
individual_files_paths = [os.path.join(input_dir, f) for
f in os.listdir(input_dir) if
'tags' in f and 'catalog' not in f]
if analysis == 'heatmap':
loci_matrix(haplotypes_file_path, parameters)
......@@ -44,3 +47,5 @@ def analysis(input_dir=None,
stacks_privacy(catalog_file_path, parameters)
elif analysis == 'rescue':
rescue(sequences_file_path, catalog_file_path, individual_files_paths, coverage_file_path, parameters)
elif analysis == 'visualize':
visualization(input_file_path, output_file_path, parameters)
from radseq_analysis import visualization
def detect_plot_type(input_file):
return 'haplotypes'
def analysis(input_file, output_file, parameters):
plot_type = detect_plot_type(input_file)
if plot_type == 'haplotypes':
visualization.haplotypes(input_file, output_file, parameters.species)
......@@ -15,6 +15,7 @@ Command: heatmap\tGenerates a matrix of haplotypes sex distribution
\t haplotypes\tExtract haplotypes present in a given number of males and females
\t frequencies\tCalculate haplotypes frequencies distribution in the population
\t rescue\tRegroup stacks into alleles after analysis
\t visualize\tVisualize analyses results using R
'''
)
parser.add_argument('command', help='Command to run', nargs='?')
......@@ -134,6 +135,8 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
Options: -i\t--input-folder\tPath to a folder containing the output of denovo_map
\t -s\t--sequences\tPath to sequences file (result of haplotypes analysis)
\t -m\t--popmap\tPath to population map
\t -c\t--coverage-file\tPath to a coverage file (result of coverage analysis)
\t -o\t--output-file\tPath to output file (default: extracted_alleles.tsv)
''')
parser.add_argument('--input-folder', '-i',
......@@ -169,3 +172,40 @@ Options: -i\t--input-folder\tPath to a folder containing the output of denovo_m
output_file_path=args.output_file,
coverage_file_path=args.coverage_file,
analysis='rescue')
def visualize(self):
parser = argparse.ArgumentParser(
description='Visualize analyses results using R',
usage='''python3 radseq_analysis.py visualize -i input_file -o output_file -m popmap
Options: -i\t--input-file\tPath to a file generated by this pipeline (haplotype_matrix, frequencies, ...)
\t -o\t--output-file\tPath to output file
\t -m\t--popmap\tPath to population map
''')
parser.add_argument('--input-file', '-i',
help='Path to a file generated by this pipeline')
parser.add_argument('--output-file', '-o',
help='Path to output file')
parser.add_argument('--popmap', '-m',
help='Path to a popmap file')
args = parser.parse_args(sys.argv[2:])
if not args.input_file or not os.path.isfile(args.input_file):
print('\nError: no valid input file specified\n')
parser.print_usage()
print()
exit(1)
if not args.output_file:
print('\nError: no valid output file specified\n')
parser.print_usage()
print()
exit(1)
if not args.popmap or not os.path.isfile(args.popmap):
print('\nError: no valid popmap file specified\n')
parser.print_usage()
print()
exit(1)
analysis(input_file_path=args.input_file,
popmap_file_path=args.popmap,
output_file_path=args.output_file,
analysis='visualize')
......@@ -13,7 +13,8 @@ class Parameters:
popmap={},
n_males=0,
n_females=0,
species=None
species=None,
m_value=None
):
self.files_dir = files_dir
......@@ -27,3 +28,4 @@ class Parameters:
self.n_males = n_males
self.n_females = n_females
self.species = species
self.m_value = m_value
from radseq_analysis.visualization.haplotypes import haplotypes
import os
def haplotypes(input_file, output_file, species_name):
scripts_d = ''.join(os.path.split(os.path.realpath(__file__))[:-1])
cmd = ('Rscript ' + os.path.join(scripts_d, 'r_scripts', 'heatmap.R') +
' ' + input_file +
' ' + output_file +
' ' + species_name)
os.system(cmd)
library(readr)
library(ggplot2)
library(grid)
library(gridExtra)
#!/usr/bin/env Rscript
args = commandArgs(trailingOnly=TRUE)
# test if there is at least one argument: if not, return an error
if (length(args) != 3){
stop("Usage: R frequencies.R input_file.tsv output_file.png species_name")
}
suppressMessages(require(readr))
suppressMessages(require(ggplot2))
input_file_path = args[1]
output_file_path = args[2]
species_name = args[3]
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
......@@ -9,7 +19,7 @@ g_legend<-function(a.gplot){
legend <- tmp$grobs[[leg]]
return(legend)}
sex_heatmap = function(file_path, m_value){
sex_heatmap = function(file_path, species_name="none"){
data = suppressMessages(read_delim(file_path, "\t", escape_double = FALSE, col_names = FALSE, trim_ws = TRUE))
......@@ -37,9 +47,14 @@ sex_heatmap = function(file_path, m_value){
}
d$Bin = factor(group)
title = ""
if (species_name != "none") {
title = paste(title, species_name, sep="")
}
g = ggplot(d, aes(x=Males, y=Females)) +
geom_tile(aes(fill=Bin), color = "grey") +
ggtitle(paste("m=", m_value, sep="")) + theme(plot.title = element_text(hjust = 0.5)) +
ggtitle(title) + theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(name="Number of\nhaplotypes", values = palette, labels = bin_labels) +
scale_x_continuous(breaks = seq(0, max(d$Males), 5), minor_breaks = seq(0, max(d$Males), 1)) +
scale_y_continuous(breaks = seq(0, max(d$Females), 5), minor_breaks = seq(0, max(d$Females), 1))
......@@ -47,53 +62,15 @@ sex_heatmap = function(file_path, m_value){
return(g)
}
folder = "/home/rferon/work/analyses/radseq_all/other_species/astatotilapia/"
file_2 = paste(folder, "m_2.tsv", sep="")
if (file.exists(file_2)){
plot_2 = sex_heatmap(file_2, 2)
} else {
print(file_2)
}
heatmap = sex_heatmap(input_file_path, species_name)
file_3 = paste(folder, "m_3.tsv", sep="")
if (file.exists(file_3)){
plot_3 = sex_heatmap(file_3, 3)
} else {
print(file_3)
}
file_5 = paste(folder, "m_5.tsv", sep="")
if (file.exists(file_5)) {
plot_5 = sex_heatmap(file_5, 5)
} else {
print(file_5)
}
file_10 = paste(folder, "m_10.tsv", sep="")
if (file.exists(file_10)){
plot_10 = sex_heatmap(file_10, 10)
} else {
print(file_10)
}
n_males = max(ggplot_build(plot_5)$data[[1]]$x)
n_females = max(ggplot_build(plot_5)$data[[1]]$y)
n_males = max(ggplot_build(heatmap)$data[[1]]$x)
n_females = max(ggplot_build(heatmap)$data[[1]]$y)
ratio = n_females / n_males
n_plots = 0
if (file.exists(file_2)) {n_plots = n_plots + 1}
if (file.exists(file_3)) {n_plots = n_plots + 1}
if (file.exists(file_5)) {n_plots = n_plots + 1}
if (file.exists(file_10)) {n_plots = n_plots + 1}
width_c = 1600
png(paste(folder, "heatmap.png", sep = ""), width=width_c, height=width_c*ratio, res=100)
species_name = "Astatotilapia"
grid.arrange(plot_2 + theme(legend.position="none"), plot_3 + theme(legend.position="none"), plot_5 + theme(legend.position="none"), plot_10 + theme(legend.position="none"),
ncol=2, nrow=2, top=species_name, right=g_legend(plot_2), widths = c(n_males, n_males), heights = c(n_females, n_females))
width_c = 1200
dev.off()
png(output_file_path, width=width_c, height=width_c*ratio, res=130)
print(heatmap)
x=dev.off()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment