Commit 2a7f2410 authored by Romain Feron's avatar Romain Feron
Browse files

Moved to standard module architecture.

Added requirements, empty readme, and moved files to module directory
parent 42250351
......@@ -98,3 +98,4 @@ user.json
*.pyc
results_10/*
output/*
results/*
import os
from analyse.analysis import analyse_directory
root_dir = '/home/rferon/work/analyses/multi_species/rad_seq/denovo_map/scripts/pipeline/'
root_dir = '/home/rferon/work/code/radseq_analyses_pipeline/'
files_dir = os.path.join(root_dir, 'results_10')
output_dir = os.path.join(root_dir, 'output')
......
......@@ -4,6 +4,6 @@ import os
def frequencies(file, output_dir, n_individuals):
scripts_d = ''.join(os.path.split(os.path.realpath(__file__))[:-1])
cmd = ('Rscript ' + os.path.join(scripts_d, 'ggplot.R') + ' ' + file + ' ' +
output_dir + ' ' + str(n_individuals))
cmd = ('Rscript ' + os.path.join(scripts_d, 'r_scripts', 'ggplot.R') + ' ' +
file + ' ' + output_dir)
os.system(cmd)
......@@ -3,9 +3,9 @@ args = commandArgs(trailingOnly=TRUE)
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input file).n", call.=FALSE)
} else if (length(args) != 3){
stop("Usage: R ggplot.R input_file.tsv output_dir n_individuals")
stop("At least one argument must be supplied (input file).", call.=FALSE)
} else if (length(args) != 2){
stop("Usage: R frequencies.R input_file.tsv output_dir")
}
suppressMessages(library(readr))
......@@ -14,28 +14,21 @@ suppressMessages(library(svglite))
suppressMessages(library(scales))
file = args[1]
name = strsplit(file, '/')[[1]][length(strsplit(file, '/')[[1]])]
output_dir = args[2]
n_max = strtoi(args[3])
split = strsplit(name, '_')[[1]]
species_name = paste(split[1], "_", split[2], sep="")
png_name = paste(species_name, "_", split[4], ".png", sep='')
data = suppressMessages(read_delim(file, "\t", escape_double = FALSE, col_names = FALSE, trim_ws = TRUE))
species = rep(species_name, dim(data)[1])
m_value = rep(split[4],dim(data)[1])
colnames(data) = c("Stack_ID", "Frequency")
png_name = paste("tags_distribution.png", sep='')
data = subset(data, data$Frequency <= n_max)
data = suppressMessages(read_delim(file, "\t", escape_double = FALSE, col_names = TRUE, trim_ws = TRUE))
colnames(data) = c("Frequency", "Count")
g = ggplot(data, aes(x=Frequency)) +
geom_bar(aes(y = (..count..)/sum(..count..)), colour="black", fill="#CCCCCC") +
g = ggplot(data, aes(x=Frequency, y=Count/sum(Count))) +
geom_bar(stat="identity", colour="black", fill="#CCCCCC") +
geom_vline(xintercept = mean(data$Frequency), colour = "red") +
geom_vline(xintercept = median(data$Frequency), colour = "blue") +
ggtitle(paste("Total tags: ", dim(data)[1], sep='')) + theme(plot.title = element_text(hjust = 0.5)) +
xlab("Number of individuals in which a tag is present") + ylab("Frequency (%)") +
scale_y_continuous(labels=percent)
png(paste(output_dir, gsub(".tsv", "", name), ".png", sep=''), width=1600, height=800, res=100)
png(paste(output_dir, png_name, sep=''), width=1600, height=1000, res=130)
print(g)
x = dev.off()
#!/usr/bin/env Rscript
args = commandArgs(trailingOnly=TRUE)
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input file).n", call.=FALSE)
} else if (length(args) != 3){
stop("Usage: R haplotypes.R input_file.tsv output_dir treshold")
}
suppressMessages(library(readr))
suppressMessages(library(ggplot2))
file = args[1]
output_dir = args[2]
file = "~/work/code/radseq_analyses_pipeline/output/haplotypes_data.tsv"
output_dir = "~/work/code/radseq_analyses_pipeline/results/"
png_name = paste("sex_variable_haplotypes.png", sep='')
data <- suppressMessages(read_delim(file, "\t", col_names = TRUE, escape_double = FALSE, trim_ws = TRUE))
names(data) = c('Locus', 'Sequence', 'Males', 'Females', 'Male_outliers', 'Female_outliers')
threshold = 5
males = data.frame(table(data$Males[which(data$Males > threshold)], data$Females[which(data$Males > threshold)]))
names(males) = c('Males', 'Females', 'Count')
females = data.frame(table(data$Males[which(data$Females > threshold)], data$Females[which(data$Females > threshold)]))
names(females) = c('Males', 'Females', 'Count')
g <- ggplot(males, aes(x = Males, y = Count, fill = Females))
g = g + geom_bar(stat='identity', position=position_dodge(), colour='black')
h <- ggplot(females, aes(x = Females, y = Count, fill = Males))
h = h + geom_bar(stat='identity', position=position_dodge(), colour='black')
grid.arrange(g, h, ncol=2)
progress>=1.3
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment