Commit a1d681bc authored by mmassaviol's avatar mmassaviol
Browse files

Update RADseq_denovo

parent fad73026
......@@ -3,8 +3,6 @@ import re
import snakemake.utils
import csv
dir_path = "/workflow"
#############
# Wildcards #
#############
......@@ -14,6 +12,7 @@ STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
OUTPUTS = config["outputs"]
PARAMS_INFO = config["params_info"]
config = config["params"]
def get_individus():
......@@ -111,8 +110,6 @@ def prepare_report_inputs():
inputs = list()
for step in STEPS:
inputs.extend(step_outputs(step["name"]))
if (step["name"] == config["final_step"]):
break
return inputs
def prepare_report_scripts():
......@@ -122,8 +119,6 @@ def prepare_report_scripts():
script = tool+".prepare.report.R"
if (script in PREPARE_REPORT_SCRIPTS):
scripts.append("/workflow/scripts/"+script)
if (tool == config["final_step"]):
break
return scripts
def prepare_report_outputs():
......@@ -134,8 +129,6 @@ def prepare_report_outputs():
if (tool in PREPARE_REPORT_OUTPUTS.keys()):
for output in PREPARE_REPORT_OUTPUTS[tool]:
outputs.append(config["results_dir"]+"/"+tool+"/"+output)
if (tool == config["final_step"]):
break
return outputs
def multiqc_inputs():
......@@ -559,16 +552,19 @@ rule prepare_report:
i+=1
# Params list for Multiqc report
params_list = "params_name\tvalue\n"
head = """
# description: 'This is the list of the parameters for each rule'
params_list = "params_name\tdescription\tvalue\n"
head = """# description: 'This is the list of the parameters for each rule'
# section_name: 'Workflow parameters'
"""
for step in STEPS:
tool = config[step["name"]]
for key, value in config.items():
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]):
params_list += key + "\t'" + str(value) + "'\n"
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
if (key in PARAMS_INFO.keys() and "label" in PARAMS_INFO[key].keys()):
description = PARAMS_INFO[key]["label"]
else:
description = ''
params_list += key + "\t'" + description + "'\t'" + str(value) + "'\n"
with open(output.params_tab,"w") as out:
out.write(head)
......@@ -594,7 +590,7 @@ rule multiqc:
# Final Snakemake rule waiting for outputs of the final step choosen by user (default all steps)
rule all:
input:
workflow_outputs(config["final_step"])
workflow_outputs("all")
output:
Snakefile = config["results_dir"]+"/workflow/Snakefile",
get_samples = config["results_dir"]+"/workflow/get_samples.py",
......
#!/usr/bin/env python3
# This script will take a directory and a parameter to tell if the reads are paired end or single end and return the sample list and the suffix
# Needs 2 arguments: reads_directory, SeOrPe
# SeOrPe is SE for single end reads and PE for paired end reads
# Usage: ./get_samples.py reads_directory SeOrPe
import os
import re
import csv
import sys
def sample_list(dir, filesuffix=".fastq.gz"):
samples = list()
files = os.listdir(dir)
regex = re.compile("^(.+?)(_R1|_R2|)("+filesuffix+")")
for file in files:
res = re.match(regex, file)
if res:
if res.group(1) not in samples:
samples.append(res.group(1))
return sorted(samples)
def sample_list2(dir, SeOrPe):
def sample_list(dir, SeOrPe):
samples = list()
suffixes = list()
files = os.listdir(dir)
if SeOrPe == "PE":
regex = re.compile("^(.+?)(_R1|_R2)(.+)")
regex = re.compile(r"^(.+?)(_R1|_R2)(.+)")
else:
regex = re.compile("^(.+?)(\..*)")
regex = re.compile(r"^(.+?)(\..*)")
for file in files:
res = re.match(regex, file)
if res:
......@@ -34,25 +27,17 @@ def sample_list2(dir, SeOrPe):
suffixes.append(res.group(2))
if (len(set(suffixes)) == 1 ):
return {'samples': samples, 'suffix': set(suffixes)}
return {'samples': sorted(samples), 'suffix': list(set(suffixes))[0]}
else:
pass
exit("Files have different suffixes:" + ','.join(suffixes))
def group_list(dir, groupfile, filesuffix=".fastq.gz"):
samples = sample_list(dir,filesuffix)
if os.path.isfile(groupfile):
groups = dict()
with open(groupfile, mode="r") as infile:
reader = csv.reader(infile)
groups = {row[0]: row[1] for row in reader}
return {sample: groups[sample] for sample in samples}
def main():
if len(sys.argv) == 3:
print(sample_list(sys.argv[1],sys.argv[2]))
else:
return {sample: "group" for sample in samples}
#print(sample_list2(sys.argv[1],sys.argv[2]))
exit("""Needs 2 arguments: reads_directory, SeOrPe
Usage: ./get_samples.py reads_directory SeOrPe""")
if len(sys.argv) == 4:
print(group_list(sys.argv[1], sys.argv[2], sys.argv[3]))
else:
print(group_list(sys.argv[1], sys.argv[2]))
if __name__ == "__main__":
# execute only if run as a script
main()
......@@ -2,7 +2,6 @@ pipeline: RADseq_denovo
params:
results_dir: /Results
sample_dir: /Data
sample_suffix: .fastq.gz
SeOrPe: PE
quality_check: fastqc
fastqc_SE_output_dir: fastqc_SE
......@@ -55,7 +54,6 @@ params:
populations_p: 2
samples: []
groups: []
final_step: all
steps:
- title: Quality check
name: quality_check
......@@ -104,14 +102,13 @@ params_info:
type: output_dir
sample_dir:
type: input_dir
sample_suffix:
type: text
SeOrPe:
type: radio
fastqc_threads:
tool: fastqc
rule: fastqc_PE
type: numeric
label: Number of threads to use
process_radtags_barcode_file_select:
tool: process_radtags
rule: process_radtags_PE
......@@ -120,42 +117,53 @@ params_info:
tool: process_radtags
rule: process_radtags_PE
type: input_file
label: Barcode file
process_radtags_barcode_type:
tool: process_radtags
rule: process_radtags_PE
type: select
label: Barcode position
process_radtags_enzyme_SE:
tool: process_radtags
rule: process_radtags_SE
type: select
label: Provide the restriction enzyme used
process_radtags_enzyme_1_PE:
tool: process_radtags
rule: process_radtags_PE
type: select
label: Provide the restriction enzyme used
process_radtags_enzyme_2_PE:
tool: process_radtags
rule: process_radtags_PE
type: select
label: If a double digest was used, provide the second restriction enzyme used
ustacks_threads:
tool: ustacks
rule: ustacks
type: numeric
label: Number of threads to use
ustacks_M:
tool: ustacks
rule: ustacks
type: numeric
label: Maximum distance (in nucleotides) allowed between stacks
ustacks_m:
tool: ustacks
rule: ustacks
type: numeric
label: Minimum depth of coverage required to create a stack
ustacks_N:
tool: ustacks
rule: ustacks
type: numeric
label: 'Maximum distance allowed to align secondary reads to primary stacks (default:
M + 2)'
cstacks_threads:
tool: cstacks
rule: cstacks
type: numeric
label: Number of threads to use
cstacks_population_tsv_select:
tool: cstacks
rule: cstacks
......@@ -164,22 +172,27 @@ params_info:
tool: cstacks
rule: cstacks
type: input_file
label: Path to population tsv file
cstacks_n:
tool: cstacks
rule: cstacks
type: numeric
label: Number of mismatches allowed between sample loci when build the catalog.
sstacks_threads:
tool: sstacks
rule: sstacks
type: numeric
label: Number of threads to use
tsv2bam_threads:
tool: tsv2bam
rule: tsv2bam
type: numeric
label: Number of threads to use
gstacks_threads:
tool: gstacks
rule: gstacks
type: numeric
label: Number of threads to use
gstacks_population_tsv_select:
tool: gstacks
rule: gstacks
......@@ -188,46 +201,60 @@ params_info:
tool: gstacks
rule: gstacks
type: input_file
label: Path to population tsv file
gstacks_model:
tool: gstacks
rule: gstacks
type: select
label: Model to use to call variants and genotypes
gstacks_var_alpha:
tool: gstacks
rule: gstacks
type: numeric
label: Alpha threshold for discovering SNPs
gstacks_gt_alpha:
tool: gstacks
rule: gstacks
type: numeric
label: Alpha threshold for calling genotypes
gstacks_min_mapq:
tool: gstacks
rule: gstacks
type: numeric
label: Minimum PHRED-scaled mapping quality to consider a read
gstacks_max_clipped:
tool: gstacks
rule: gstacks
type: numeric
label: Maximum soft-clipping level, in fraction of read length
populations_threads:
tool: populations
rule: populations
type: numeric
label: Number of threads to use
populations_r:
tool: populations
rule: populations
type: numeric
label: Minimum percentage of individuals in a population required to process a
locus for that population
populations_max_obs_het:
tool: populations
rule: populations
type: numeric
label: Specify a maximum observed heterozygosity required to process a nucleotide
site at a locus
populations_min_maf:
tool: populations
rule: populations
type: numeric
label: Specify a minimum minor allele frequency required to process a nucleotide
site at a locus
populations_p:
tool: populations
rule: populations
type: numeric
label: Minimum number of populations a locus must be present in to process a locus
prepare_report_scripts:
- populations.prepare.report.R
prepare_report_outputs:
......
......@@ -21,16 +21,15 @@ PCA1 <- snpgdsPCA(genofile, snp.id=NULL, maf=NaN, missing.rate=0.2, num.thread=
#PCA eigenvalues
fic = paste(parameters$results_dir,parameters$populations_output_dir,'PCA_Eigenvalues_mqc.txt',sep = "/")
cat("
# id: custom_bargraph_tsv
# section_name: 'PCA eigenvalues'
# description: 'valeurs propres (variance expliquée par chaque axe).'
# format: 'tsv'
# plot_type: 'bargraph'
# pconfig:
# id: 'custom_bargraph_w_header'
# title: PCA eigenvalues
# ylab: 'Percent'\n", file=fic)
cat("# id: custom_bargraph_tsv
# section_name: 'PCA eigenvalues'
# description: 'valeurs propres (variance expliquée par chaque axe).'
# format: 'tsv'
# plot_type: 'bargraph'
# pconfig:
# id: 'custom_bargraph_w_header'
# title: PCA eigenvalues
# ylab: 'Percent'\n", file=fic)
for (i in 1: 20 )
{
......@@ -84,37 +83,37 @@ for (i in 1:length(tab1$sample.id))
#FST par paire
cat("#plot_type: 'heatmap'\n\t",file = paste(parameters$results_dir,parameters$populations_output_dir,'Mean_Pairwise_Pop_FST_mqc.csv',sep = "/"))
fst_summary = read.csv2(paste(parameters$results_dir,parameters$populations_output_dir,'populations.fst_summary.tsv',sep = "/"), sep = "\t",row.names = 1, stringsAsFactors=F)
if(nrow(fst_summary) > 0)
{
tryCatch({
cat("#plot_type: 'heatmap'\n\t",file = paste(parameters$results_dir,parameters$populations_output_dir,'Mean_Pairwise_Pop_FST_mqc.csv',sep = "/"))
fst_summary = read.csv2(paste(parameters$results_dir,parameters$populations_output_dir,'populations.fst_summary.tsv',sep = "/"), sep = "\t",row.names = 1, stringsAsFactors=F)
if(nrow(fst_summary) > 0){
for (i in 1: nrow(fst_summary))
{
fst_summary[i,i] = 0
for (j in 1: i)
{fst_summary[i,j] = fst_summary[j,i]}
for (j in 1: i) {fst_summary[i,j] = fst_summary[j,i]}
}
}
write.table(fst_summary, sep='\t',file = paste(parameters$results_dir,parameters$populations_output_dir,'Mean_Pairwise_Pop_FST_mqc.csv',sep = "/"), quote=F, row.names=T, col.names=T ,append=TRUE)
}
write.table(fst_summary, sep='\t',file = paste(parameters$results_dir,parameters$populations_output_dir,'Mean_Pairwise_Pop_FST_mqc.csv',sep = "/"), quote=F, row.names=T, col.names=T ,append=TRUE)
}, error = function(e) {
cat("",file = paste(parameters$results_dir,parameters$populations_output_dir,'Mean_Pairwise_Pop_FST_mqc.csv',sep = "/"))
})
#IBS
tryCatch({
ibs <- snpgdsIBS(genofile, sample.id=sample.id1, snp.id=NULL, maf=0.1, missing.rate=0.05, num.thread=2, verbose=FALSE, autosome.only=FALSE)
tryCatch({
ibs <- snpgdsIBS(genofile, sample.id=sample.id1, snp.id=NULL, maf=0, missing.rate=1, num.thread=2, verbose=TRUE, autosome.only=FALSE)
colnames(ibs$ibs)=ibs$sample.id
rownames(ibs$ibs)=ibs$sample.id
png(filename = paste(parameters$results_dir,parameters$populations_output_dir,"IBS_mqc.png",sep = "/"), height=800, width=800)
heatmap.2(ibs$ibs, col=terrain.colors(20),RowSideColors = pop_color1[ibs$sample.id], ColSideColors = pop_color1[ibs$sample.id], trace="none", cexRow = 1, cexCol = 1)
par(lend = 1) # square line ends for the color legend
legend(0.0, 1.0, ncol=2, legend = names(popc), col = popc, lty= 1,lwd = 5, bty="n")
dev.off()
}, error = function(e) {
}, error = function(e) {
png(filename = paste(parameters$results_dir,parameters$populations_output_dir,"IBS_mqc.png",sep = "/"), height=800, width=800)
print(plot(c(0,1),c(0,1),ann =F,bty ='n',type ='n',xaxt ='n',yaxt ='n'))
text(x =0.5,y =0.5,paste("Problem with IBS "),cex =1.6,col ="black")
dev.off()
})
})
#snpgdsClose(genofile)
......
......@@ -21,9 +21,6 @@ echo "To get help for an app :\nsingularity help --app appName this_container.si
echo "To run an app :\nsingularity run --app appName this_container.sif"
########################
# App UI
########################
%apprun UI
exec Rscript -e "shiny::runApp('/sagApp/app.R',host='$1',port=$2)"
......@@ -33,9 +30,6 @@ You must also provide the host address and port where the shiny app will be laun
exemple : singularity run --app UI -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif 127.0.0.1 1234
########################
# App Snakemake
########################
%apprun Snakemake
configfile=$1
cores=$2
......@@ -48,9 +42,7 @@ To run the Snakemake app you should bind data and results directories like in th
You must also provide the configfile and the number of cores provided to snakemake command (you can add other parameters after these two)
exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif myconfig.yml 16 otherparams
########################
# App getConfigfile
########################
%apprun getConfigfile
exec cp /workflow/params.total.yml ./params.yml
......@@ -58,11 +50,21 @@ exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /p
To run the getConfigfile app you dont need to bind directories. This app will only copy the default parameters file from the container to your local disk.
exemple : singularity run --app getConfigfile this_container.sif
%apprun getSamples
exec python3 /workflow/get_samples.py $1 $2
%apphelp getSamples
To run the getSamples app you need to bind the data directory. This app will give you the list of samples detected in a given directory and their file suffix.
exemple : singularity run --app getSamples -B /path/to/data/directory:/Data this_container.sif /Data PE
%help
This container contains three apps (UI, Snakemake and getConfigfile).
This container contains four apps (UI, Snakemake, getConfigfile and getSamples).
* UI is a user interface to set up the workflow and launch it.
* Snakemake let you provide your configfile and other parameters to the snakemake command and launch it.
* getConfigfile give you a copy of a default parameters file to fill and use with the Snakemake app
* getConfigfile gives you a copy of a default parameters file to fill and use with the Snakemake app.
* getSamples gives you the list of samples detected in a given directory and their file suffix (usefull for filling samples and sample_suffix in parameters file).
To get help for an app :
singularity help --app appName this_container.sif
To run an app :
......
......@@ -23,7 +23,7 @@ MenuGauche = sidebarMenu(id="sidebarmenu",
tags$br(),
numericInput("cores", label = "Threads available", min = 1, max = 24, step = 1, width = "auto", value = 16),
selectInput("final_step", label = "Select the step to reach : ", selected = "all", choices = list('Quality check'='quality_check','Demultiplexing'='demultiplexing','ustacks'='ustacks','cstacks'='cstacks','sstacks'='stacks','tsv2bam'='tsv2bam','gstacks'='gstacks','populations'='populations',"All"="all")), tags$br(),
selectInput("force_from", label = "Start again from a step : ", selected = "none", choices = list('none'='none','Quality check'='quality_check','Demultiplexing'='demultiplexing','ustacks'='ustacks','cstacks'='cstacks','sstacks'='stacks','tsv2bam'='tsv2bam','gstacks'='gstacks','populations'='populations',"All"="all")), tags$br(),
actionButton("RunPipeline", "Run pipeline", icon("play"), class="btn btn-info"),
actionButton("StopPipeline", "Stop pipeline", icon("stop"), class="btn btn-secondary"),
......
......@@ -17,8 +17,6 @@ box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE,
)
,
textInput("sample_suffix", label = "Samples suffix: ", value = ".fastq.gz", width = "auto"),
radioButtons("SeOrPe", label = "Single end reads (SE) or Paired end reads (PE): ", choices = list("Single end" = "SE", "Paired end" = "PE"), selected = "PE", width = "auto"),
textAreaInput("memo", label = "Text area for the user", value = "")
......
......@@ -13,12 +13,6 @@ save_params <- function(path_param){
res = paste0(res, paste("sample_dir:", paste0('"', input$sample_dir, '"'), "\n", sep = " "))
}
if(!is.na(as.numeric(input$sample_suffix))) {
res = paste0(res, paste("sample_suffix:", input$sample_suffix, "\n", sep = " "))
} else {
res = paste0(res, paste("sample_suffix:", paste0('"', input$sample_suffix, '"'), "\n", sep = " "))
}
if(!is.na(as.numeric(input$SeOrPe))) {
res = paste0(res, paste("SeOrPe:", input$SeOrPe, "\n", sep = " "))
} else {
......@@ -215,8 +209,6 @@ save_params <- function(path_param){
res = paste0(res, paste("populations_p:", paste0('"', input$populations_p, '"'), "\n", sep = " "))
}
res = paste0(res, paste("final_step:", paste0('"', input$final_step, '"'), "\n", sep = " "))
a = yaml.load_file("/workflow/params.total.yml")
p = a[["params"]]
a["params"] = NULL
......@@ -230,30 +222,27 @@ save_params <- function(path_param){
return(result)
}
d = c(d,a)
samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," /Data/groups.csv ", input$sample_suffix),intern = T))
d$samples = names(samples)
names(samples) = NULL
d$groups = unlist(samples)
get_samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," ",input$SeOrPe),intern = T))
d$samples = get_samples$samples
d$params$sample_suffix = get_samples$suffix
write_yaml(d,path_param,handlers=list(logical = logical))
}
compare_params = function(dossierAnalyse){
if (!file.exists(paste0(dossierAnalyse,"/lastrun/params.yml"))){
return(c())
force_rule <- function(force_from){
if (input$force_from=="none"){
return("")
}
else if (input$force_from=="all"){ return("--forcerun all") }
else {
params = yaml.load_file(paste0(input$results_dir,"/params.yml"))
outputs = params[["outputs"]]
tool = params[["params"]][[force_from]]
if (length(outputs[[tool]])==1)
rule = names(outputs[[tool]])[[1]]
else{
new_params = yaml.load_file(paste0(dossierAnalyse,"/params.yml"))
old_params = yaml.load_file(paste0(dossierAnalyse,"/workflow/params.yml"))
changed = new_params[!(new_params %in% old_params)]
rules = c()
if (length(changed)>=1){
for (param in names(changed)){
if (!grepl("_threads$",param)){
rules = c(rules, new_params$params_info[[param]]$rule)
}
rule = names(outputs[[tool]])[[grep(input$SeOrPe,names(outputs[[tool]]))]]
}
}
return(unique(rules))
return(paste0("--forcerun ",rule))
}
}
#' Event when use RULEGRAPH button
......@@ -302,21 +291,10 @@ observeEvent(input$RunPipeline, {
if (!file.exists(paste0(input$results_dir,"/logs/runlog.txt"))){
file.create(paste0(input$results_dir,"/logs/runlog.txt"))
}
forcerun = compare_params(input$results_dir)
if (length(forcerun>1)){
rules = paste(forcerun, collapse=" ")
forcerun = paste(" --forcerun ",rules)
showModal(modalDialog(
title = "Params have changed since the last run",
forcerun
))
}
else{
forcerun = ""
}
system(paste0("touch ",input$results_dir,"/logs/workflow.running"),wait = T)
system(paste0("snakemake -s /workflow/Snakefile --configfile ",input$results_dir,"/params.yml -d ",input$results_dir," all --rulegraph | dot -Tpng -Gratio=0.75 > ",input$results_dir,"/Rule_graph_mqc.png"))
system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " --forcerun all -d ", input$results_dir , " --cores ", input$cores, " all ", forcerun),wait = FALSE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt"))
force = force_rule(input$force_from)
system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " -d ", input$results_dir , " --cores ", input$cores, " all ", force),wait = FALSE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt"))
tags$iframe(src="results/multiqc_report.html",width="100%", height="900px")},
error = function(e){
system(paste0("touch ",input$results_dir,"/logs/workflow_end.error"),wait = T)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment