Commit 934f9664 authored by mmassaviol's avatar mmassaviol
Browse files

Update Variant_calling

parent 3e273de8
......@@ -12,6 +12,7 @@ STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
OUTPUTS = config["outputs"]
PARAMS_INFO = config["params_info"]
config = config["params"]
##########
......@@ -123,8 +124,6 @@ def prepare_report_inputs():
inputs = list()
for step in STEPS:
inputs.extend(step_outputs(step["name"]))
if (step["name"] == config["final_step"]):
break
return inputs
def prepare_report_scripts():
......@@ -134,8 +133,6 @@ def prepare_report_scripts():
script = tool+".prepare.report.R"
if (script in PREPARE_REPORT_SCRIPTS):
scripts.append("/workflow/scripts/"+script)
if (tool == config["final_step"]):
break
return scripts
def prepare_report_outputs():
......@@ -146,8 +143,6 @@ def prepare_report_outputs():
if (tool in PREPARE_REPORT_OUTPUTS.keys()):
for output in PREPARE_REPORT_OUTPUTS[tool]:
outputs.append(config["results_dir"]+"/"+tool+"/"+output)
if (tool == config["final_step"]):
break
return outputs
def multiqc_inputs():
......@@ -575,7 +570,7 @@ rule gatk_haplotype_caller:
"&& bcftools stats "
"-F {input.ref_fasta} "
"-s - "
"{output.vcf} > {output.stats}"
"{output.vcf} > {output.stats} "
"|& tee -a {log} "
rule bcftools_mpileup_and_call:
......@@ -646,16 +641,19 @@ rule prepare_report:
i+=1
# Params list for Multiqc report
params_list = "params_name\tvalue\n"
head = """
# description: 'This is the list of the parameters for each rule'
params_list = "params_name\tdescription\tvalue\n"
head = """# description: 'This is the list of the parameters for each rule'
# section_name: 'Workflow parameters'
"""
for step in STEPS:
tool = config[step["name"]]
for key, value in config.items():
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]):
params_list += key + "\t'" + str(value) + "'\n"
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
if (key in PARAMS_INFO.keys() and "label" in PARAMS_INFO[key].keys()):
description = PARAMS_INFO[key]["label"]
else:
description = ''
params_list += key + "\t'" + description + "'\t'" + str(value) + "'\n"
with open(output.params_tab,"w") as out:
out.write(head)
......@@ -681,7 +679,7 @@ rule multiqc:
# Final Snakemake rule waiting for outputs of the final step choosen by user (default all steps)
rule all:
input:
workflow_outputs(config["final_step"])
workflow_outputs("all")
output:
Snakefile = config["results_dir"]+"/workflow/Snakefile",
get_samples = config["results_dir"]+"/workflow/get_samples.py",
......
......@@ -29,7 +29,7 @@ def sample_list(dir, SeOrPe):
if (len(set(suffixes)) == 1 ):
return {'samples': sorted(samples), 'suffix': list(set(suffixes))[0]}
else:
exit("Files have different suffixes:" + suffixes)
exit("Files have different suffixes:" + ','.join(suffixes))
def main():
if len(sys.argv) == 3:
......
......@@ -59,7 +59,6 @@ params:
bcftools_mpileup_and_call_threads: 4
samples: []
groups: []
final_step: all
steps:
- title: Preprocessing
name: preprocess
......@@ -102,42 +101,56 @@ params_info:
tool: fastp
rule: fastp_SE
type: numeric
label: Number of threads to use
fastp_complexity_threshold:
tool: fastp
rule: fastp_SE
type: numeric
label: The threshold for low complexity filter (0~100)
fastp_report_title:
tool: fastp
rule: fastp_SE
type: text
label: fastp report title
fastp_adapter_sequence:
tool: fastp
rule: fastp_SE
type: text
label: The adapter for read1. For SE data, if not specified, the adapter will
be auto-detected. For PE data, this is used if R1/R2 are found not overlapped.
fastp_adapter_sequence_R2_PE:
tool: fastp
rule: fastp_PE
type: text
label: the adapter for read2 (PE data only). This is used if R1/R2 are found not
overlapped. If not specified, it will be the same as <adapter_sequence>
fastp_P:
tool: fastp
rule: fastp_SE
type: numeric
label: One in (--overrepresentation_sampling) reads will be computed for overrepresentation
analysis (1~10000), smaller is slower.
fastp_correction_PE:
tool: fastp
rule: fastp_PE
type: checkbox
label: Enable base correction in overlapped regions
fastp_low_complexity_filter:
tool: fastp
rule: fastp_SE
type: checkbox
label: Enable low complexity filter. The complexity is defined as the percentage
of base that is different from its next base (base[i] != base[i+1]).
fastp_overrepresentation_analysis:
tool: fastp
rule: fastp_SE
type: checkbox
label: enable overrepresented sequence analysis.
bwa_index_path:
tool: bwa
rule: bwa_index
type: input_dir
label: Path to an existing bwa index (or where to save a new one)
bwa_index_genome_fasta_select:
tool: bwa
rule: bwa_index
......@@ -146,22 +159,27 @@ params_info:
tool: bwa
rule: bwa_index
type: input_file
label: Path to reference genome fasta file
bwa_index_algorithm:
tool: bwa
rule: bwa_index
type: radio
label: Algorithm for constructing BWT index (see documentation for details)
bwa_mem_threads:
tool: bwa
rule: bwa_mem_SE
type: numeric
label: Number of threads to use
bwa_mem_quality0_multimapping:
tool: bwa
rule: bwa_mem_SE
type: checkbox
label: Put 0 as mapping quality for multimapping reads
bowtie_index_path:
tool: bowtie
rule: bowtie_index
type: input_dir
label: Path to an existing bowtie index (or where to save a new one)
bowtie_index_genome_fasta_select:
tool: bowtie
rule: bowtie_index
......@@ -170,66 +188,93 @@ params_info:
tool: bowtie
rule: bowtie_index
type: input_file
label: Path to reference genome fasta file
bowtie_index_threads:
tool: bowtie
rule: bowtie_index
type: numeric
label: Number of threads to use to index genome
bowtie_threads:
tool: bowtie
rule: bowtie_SE
type: numeric
label: Number of threads to use to align reads
bowtie_minins_PE:
tool: bowtie
rule: bowtie_PE
type: numeric
label: The minimum insert size for valid paired-end alignments
bowtie_maxins_PE:
tool: bowtie
rule: bowtie_PE
type: numeric
label: The maximum insert size for valid paired-end alignments
bowtie_orientation_PE:
tool: bowtie
rule: bowtie_PE
type: radio
label: The upstream/downstream mate orientations for a valid paired-end alignment
against the forward reference strand.
bowtie_mult_align_limit:
tool: bowtie
rule: bowtie_SE
type: numeric
label: Suppress all alignments for a particular read or pair if more than 'x'
reportable alignments exist for it
bowtie_best:
tool: bowtie
rule: bowtie_SE
type: checkbox
label: '--best : Make Bowtie guarantee that reported singleton alignments are
''best'' in terms of stratum (i.e. number of mismatches, or mismatches in the
seed in the case of -n mode) and in terms of the quality values at the mismatched
position(s).'
bowtie_strata:
tool: bowtie
rule: bowtie_SE
type: checkbox
label: '--strata : If many valid alignments exist and are reportable (e.g. are
not disallowed via the -k option) and they fall into more than one alignment
''stratum'', report only those alignments that fall into the best stratum. When
--strata is specified, --best must also be specified.'
Picard_MarkDuplicates_threads:
tool: Picard_MarkDuplicates
rule: Picard_MarkDuplicates
type: numeric
label: Number of threads to use
Picard_MarkDuplicates_remove_all_duplicates:
tool: Picard_MarkDuplicates
rule: Picard_MarkDuplicates
type: checkbox
label: 'remove_all_duplicates : If true do not write duplicates to the output
file instead of writing them with appropriate flags set.'
Picard_MarkDuplicates_samtools_memory:
tool: Picard_MarkDuplicates
rule: Picard_MarkDuplicates
type: numeric
label: '-m parameter for samtools sort: Memory allocated per thread for samtools_sort
(in Gb).'
gatk_IndelRealigner_threads:
tool: gatk_IndelRealigner
rule: gatk_IndelRealigner
type: numeric
label: Number of threads to use
gatk_IndelRealigner_samtools_memory:
tool: gatk_IndelRealigner
rule: gatk_IndelRealigner
type: numeric
label: '-m parameter for samtools sort: Memory allocated per thread for samtools_sort
(in Gb).'
gatk_haplotype_caller_threads:
tool: gatk_haplotype_caller
rule: gatk_haplotype_caller
type: numeric
label: Number of threads to use
bcftools_mpileup_and_call_threads:
tool: bcftools_mpileup
rule: bcftools_mpileup_and_call
type: numeric
label: Threads to use
prepare_report_scripts: []
prepare_report_outputs: {}
outputs:
......
......@@ -103,10 +103,10 @@ singularity run --app appName this_container.sif
mv samtools-1.9/samtools bin/samtools
rm -r samtools-1.9 samtools-1.9.tar.bz2
wget https://vorboss.dl.sourceforge.net/project/bowtie-bio/bowtie/1.2.2/bowtie-1.2.2-linux-x86_64.zip
unzip bowtie-1.2.2-linux-x86_64.zip
cp bowtie-1.2.2-linux-x86_64/bowtie* /usr/bin
rm -rf bowtie-1.2.2*
wget -O bowtie-1.2.3-linux-x86_64.zip https://sourceforge.net/projects/bowtie-bio/files/bowtie/1.2.3/bowtie-1.2.3-linux-x86_64.zip/download
unzip bowtie-1.2.3-linux-x86_64.zip
cp bowtie-1.2.3-linux-x86_64/bowtie* /usr/bin
rm -rf bowtie-1.2.3*
cd /opt/biotools/bin
wget https://github.com/broadinstitute/picard/releases/download/2.20.8/picard.jar
......@@ -131,7 +131,8 @@ singularity run --app appName this_container.sif
./configure --prefix=/opt/biotools
make
make install
cd .. && rm bcftools-1.9.tar.bz2
mv bcftools /opt/biotools/bin/
cd .. && rm -r bcftools-1.9.tar.bz2 bcftools-1.9
apt install -y tabix
......@@ -17,7 +17,7 @@ MenuGauche = sidebarMenu(id="sidebarmenu",
tags$br(),
numericInput("cores", label = "Threads available", min = 1, max = 24, step = 1, width = "auto", value = 16),
selectInput("final_step", label = "Select the step to reach : ", selected = "all", choices = list('Preprocessing'='preprocess','Mapping'='mapping','Mark Duplicates'='mark_duplicates','Indel Realign'='indel_realign','Variant calling'='variant_calling',"All"="all")), tags$br(),
selectInput("force_from", label = "Start again from a step : ", selected = "none", choices = list('none'='none','Preprocessing'='preprocess','Mapping'='mapping','Mark Duplicates'='mark_duplicates','Indel Realign'='indel_realign','Variant calling'='variant_calling',"All"="all")), tags$br(),
actionButton("RunPipeline", "Run pipeline", icon("play"), class="btn btn-info"),
actionButton("StopPipeline", "Stop pipeline", icon("stop"), class="btn btn-secondary"),
......
......@@ -225,8 +225,6 @@ save_params <- function(path_param){
res = paste0(res, paste("bcftools_mpileup_and_call_threads:", paste0('"', input$bcftools_mpileup_and_call_threads, '"'), "\n", sep = " "))
}
res = paste0(res, paste("final_step:", paste0('"', input$final_step, '"'), "\n", sep = " "))
a = yaml.load_file("/workflow/params.total.yml")
p = a[["params"]]
a["params"] = NULL
......@@ -246,23 +244,21 @@ save_params <- function(path_param){
write_yaml(d,path_param,handlers=list(logical = logical))
}
compare_params = function(dossierAnalyse){
if (!file.exists(paste0(dossierAnalyse,"/lastrun/params.yml"))){
return(c())
force_rule <- function(force_from){
if (input$force_from=="none"){
return("")
}
else{
new_params = yaml.load_file(paste0(dossierAnalyse,"/params.yml"))
old_params = yaml.load_file(paste0(dossierAnalyse,"/workflow/params.yml"))
changed = new_params[!(new_params %in% old_params)]
rules = c()
if (length(changed)>=1){
for (param in names(changed)){
if (!grepl("_threads$",param)){
rules = c(rules, new_params$params_info[[param]]$rule)
}
}
else if (input$force_from=="all"){ return("--forcerun all") }
else {
params = yaml.load_file(paste0(input$results_dir,"/params.yml"))
outputs = params[["outputs"]]
tool = params[["params"]][[force_from]]
if (length(outputs[[tool]])==1)
rule = names(outputs[[tool]])[[1]]
else{
rule = names(outputs[[tool]])[[grep(input$SeOrPe,names(outputs[[tool]]))]]
}
return(unique(rules))
return(paste0("--forcerun ",rule))
}
}
#' Event when use RULEGRAPH button
......@@ -311,21 +307,10 @@ observeEvent(input$RunPipeline, {
if (!file.exists(paste0(input$results_dir,"/logs/runlog.txt"))){
file.create(paste0(input$results_dir,"/logs/runlog.txt"))
}
forcerun = compare_params(input$results_dir)
if (length(forcerun>1)){
rules = paste(forcerun, collapse=" ")
forcerun = paste(" --forcerun ",rules)
showModal(modalDialog(
title = "Params have changed since the last run",
forcerun
))
}
else{
forcerun = ""
}
system(paste0("touch ",input$results_dir,"/logs/workflow.running"),wait = T)
system(paste0("snakemake -s /workflow/Snakefile --configfile ",input$results_dir,"/params.yml -d ",input$results_dir," all --rulegraph | dot -Tpng -Gratio=0.75 > ",input$results_dir,"/Rule_graph_mqc.png"))
system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " --forcerun all -d ", input$results_dir , " --cores ", input$cores, " all ", forcerun),wait = FALSE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt"))
force = force_rule(input$force_from)
system2("python3",paste0("-u -m snakemake -s /workflow/Snakefile --configfile ", paste0(input$results_dir,"/params.yml") , " -d ", input$results_dir , " --cores ", input$cores, " all ", force),wait = FALSE, stdout = paste0(input$results_dir,"/logs/runlog.txt"), stderr = paste0(input$results_dir,"/logs/runlog.txt"))
tags$iframe(src="results/multiqc_report.html",width="100%", height="900px")},
error = function(e){
system(paste0("touch ",input$results_dir,"/logs/workflow_end.error"),wait = T)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment