Commit 6eb9efd6 authored by mmassaviol's avatar mmassaviol
Browse files

New workflow, Interop_report

parent 4b988e06
......@@ -30,6 +30,7 @@ Liste des workflows existants:
- Virus_Assembler_Megahit (Megahit + Blast + BWA)
- RNAseq (pseudoalign (kallisto, Salmon) + expression différentielle (Edger, DESeq2))
- Variant_calling (bwa or bowtie and gatk or bcftools)
- Interop_report (Illumina InterOp)
## 1 Construire un conteneur
......
import os
import re
import snakemake.utils
import csv
#############
# Wildcards #
#############
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
OUTPUTS = config["outputs"]
PARAMS_INFO = config["params_info"]
config = config["params"]
##########
# Inputs #
##########
# Tools inputs functions
def interop_inputs():
inputs = dict()
inputs["runinfo"] = config["interop_analysis_dir"]+"/RunInfo.xml"
return inputs
def prepare_report_inputs():
inputs = list()
for step in STEPS:
inputs.extend(step_outputs(step["name"]))
return inputs
def prepare_report_scripts():
scripts = list()
for step in STEPS:
tool = config[step["name"]]
script = tool+".prepare.report.R"
if (script in PREPARE_REPORT_SCRIPTS):
scripts.append("/workflow/scripts/"+script)
return scripts
def prepare_report_outputs():
outputs = list()
outputs.append(config["results_dir"] + "/outputs_mqc.csv")
for step in STEPS:
tool = config[step["name"]]
if (tool in PREPARE_REPORT_OUTPUTS.keys()):
for output in PREPARE_REPORT_OUTPUTS[tool]:
outputs.append(config["results_dir"]+"/"+tool+"/"+output)
return outputs
def multiqc_inputs():
# Need prepare_report inputs and outputs in case prepare_reports has no outputs
return prepare_report_outputs()
###########
# Outputs #
###########
def step_outputs(step):
outputs = list()
if (step == "interop_read_metrics"):
outputs = rules.interop.output
elif (step == "all"):
outputs = list(rules.multiqc.output)
return outputs
# get outputs for each choosen tools
def workflow_outputs(step):
outputs = list()
outputs.extend(step_outputs(step))
return outputs
#########
# Rules #
#########
rule interop:
input:
**interop_inputs()
output:
summary = config["results_dir"]+"/"+config["interop_output_dir"]+"/summary.csv",
index_summary = config["results_dir"]+"/"+config["interop_output_dir"]+"/index_summary.csv",
dump_text = config["results_dir"]+"/"+config["interop_output_dir"]+"/dump_text.csv",
log: config["results_dir"]+'/logs/interop/interop_log.txt'
params:
analysis_dir = config["interop_analysis_dir"],
output_dir = config["results_dir"]+"/"+config["interop_output_dir"],
shell:
"cd {params.output_dir}; "
"summary {params.analysis_dir} | sed 's/ *,/,/g' > summary.csv " #remove unneeded whitespaces
"&& index-summary {params.analysis_dir} | sed 's/ \{{2,\}}/,/g' | sed 's/ CV/,CV/' | sed 's/^ //g' | sed 's/,$//g' > index_summary.csv " # format as a csv for multiqc
"&& dumptext {params.analysis_dir} > dump_text.csv "
"&& imaging_table {params.analysis_dir} | sed 's/;/,/g' > imaging_table.csv "
"&& plot_qscore_heatmap {params.analysis_dir} | sed \"s/set output '.*'/set output 'Qscore_heatmap_mqc.png'/\" | gnuplot "
"&& plot_qscore_histogram {params.analysis_dir} | sed \"s/set output '.*'/set output 'Qscore_histogram_mqc.png'/\" | gnuplot "
"&& plot_by_cycle {params.analysis_dir} | sed \"s/set output '.*'/set output 'Intensity_by_cycle_mqc.png'/\" | gnuplot "
"&& plot_by_lane {params.analysis_dir} | sed \"s/set output '.*'/set output 'Cluster_count_by_lane_mqc.png'/\" | gnuplot "
"&& plot_flowcell {params.analysis_dir} | sed \"s/set output '.*'/set output 'Flowcell_intensity_mqc.png'/\" | gnuplot "
rule prepare_report:
input:
*prepare_report_inputs(),
output:
*prepare_report_outputs(),
config_multiqc = config["results_dir"] + "/config_multiqc.yaml",
params_tab = config["results_dir"] + "/params_tab_mqc.csv"
params:
params_file = config["results_dir"]+"/params.yml",
results_dir = config["results_dir"]
log:
config["results_dir"]+"/logs/prepare_report_log.txt"
run:
# Specific scripts for each tool
for script in prepare_report_scripts():
shell("Rscript "+script+" {params.params_file} |& tee {log}")
# Outputs files for Multiqc report
outfile = config["results_dir"] + "/outputs_mqc.csv"
head = """
# description: 'This is the list of the files generated by each step of the workflow'
# section_name: 'Workflow outputs'
"""
with open(outfile,"w") as out:
out.write(head)
out.write("step\ttool\tfile\tdescription\n")#\tname
for step in STEPS:
tool = config[step["name"]]
i=1
for command in OUTPUTS[tool]:
if ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
outputs = OUTPUTS[tool][command]
for files in outputs:
path = config[command+"_output_dir"] + "/" + files["file"] #config["results_dir"] +"/"+
out.write(str(i)+"-"+step["title"]+"\t"+tool+"\t"+path+"\t"+files["description"]+"\n")#"\t"+files["name"]+
i+=1
# Params list for Multiqc report
params_list = "params_name\tdescription\tvalue\n"
head = """# description: 'This is the list of the parameters for each rule'
# section_name: 'Workflow parameters'
"""
for step in STEPS:
tool = config[step["name"]]
for key, value in config.items():
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
if (key in PARAMS_INFO.keys() and "label" in PARAMS_INFO[key].keys()):
description = PARAMS_INFO[key]["label"]
else:
description = ''
params_list += key + "\t'" + description + "'\t'" + str(value) + "'\n"
with open(output.params_tab,"w") as out:
out.write(head)
out.write(params_list)
# Config for Multiqc report
shell("python3 /workflow/generate_multiqc_config.py {params.params_file} {output.config_multiqc}")
rule multiqc:
input:
multiqc_inputs(),
config_multiqc = config["results_dir"] + "/config_multiqc.yaml"
output:
multiqc_dir = directory(config["results_dir"]+"/multiqc_data")
params:
output_dir = config["results_dir"]
log:
config["results_dir"]+'/logs/multiqc/multiqc_log.txt'
shell:
"multiqc --config {input.config_multiqc} -f {params.output_dir} "
"-o {params.output_dir} |& tee {log}"
# Final Snakemake rule waiting for outputs of the final step choosen by user (default all steps)
rule all:
input:
workflow_outputs("all")
output:
Snakefile = config["results_dir"]+"/workflow/Snakefile",
get_samples = config["results_dir"]+"/workflow/get_samples.py",
scripts = directory(config["results_dir"]+"/workflow/scripts"),
params = config["results_dir"]+"/workflow/params.yml"
params:
params_file = config["results_dir"]+"/params.yml",
shell:
"cp /workflow/Snakefile {output.Snakefile} && "
"cp /workflow/get_samples.py {output.get_samples} && "
"cp -r /workflow/scripts {output.scripts} && "
"cp {params.params_file} {output.params}"
onsuccess:
print("Workflow finished, no error")
shell("touch "+config["results_dir"]+"/logs/workflow_end.ok")
onerror:
print("An error occurred")
shell("cat {log} > "+config["results_dir"]+"/logs/workflow_end.error")
#shell("mail -s "an error occurred" youremail@provider.com < {log}")
import re
import sys
from tools import *
config = read_yaml(sys.argv[1])
def report_section_order():
res = "skip_generalstats: true\n\n"
res += "report_section_order:\n"
res += " Rule_graph:\n"
res += " order: 990\n"
res += " params_tab:\n"
res += " order: 980\n"
res += " outputs:\n"
res += " order: 970\n"
cpt = 960
for step in config["steps"]:
tool = config["params"][step["name"]]
if (config["multiqc"][tool] != "custom"):
res += " " + config["multiqc"][tool] + ":\n"
res += " " + "order: " + str(cpt) + "\n"
cpt += -10
for rule in config["outputs"][tool]:
if ((config["params"]["SeOrPe"] == "SE" and not("_PE" in rule)) or (config["params"]["SeOrPe"] == "PE" and not("_SE" in rule))):
for output in config["outputs"][tool][rule]:
if("mqc" in output["file"] and '{' not in output["file"]): # case of dynamic files ({wildcard}_mqc.png) to deal with
section = re.sub('\_mqc.*$', '', output["file"])
res += " " + section + ":\n"
res += " " + "order: " + str(cpt) + "\n"
cpt += -10
return res
def main():
res = ""
res += report_section_order()
with open(sys.argv[2],"w") as out:
out.write(res)
if __name__ == "__main__":
# execute only if run as a script
main()
\ No newline at end of file
#!/usr/bin/env python3
# This script will take a directory and a parameter to tell if the reads are paired end or single end and return the sample list and the suffix
# Needs 2 arguments: reads_directory, SeOrPe
# SeOrPe is SE for single end reads and PE for paired end reads
# Usage: ./get_samples.py reads_directory SeOrPe
import os
import re
import csv
import sys
def sample_list(dir, SeOrPe):
samples = list()
suffixes = list()
files = os.listdir(dir)
if SeOrPe == "PE":
regex = re.compile(r"^(.+?)(_R1|_R2)(.+)")
else:
regex = re.compile(r"^(.+?)(\..*)")
for file in files:
res = re.match(regex, file)
if res:
if res.group(1) not in samples:
samples.append(res.group(1))
if SeOrPe == "PE":
suffixes.append(res.group(3))
else:
suffixes.append(res.group(2))
if (len(set(suffixes)) == 1 ):
return {'samples': sorted(samples), 'suffix': list(set(suffixes))[0]}
else:
exit("Files have different suffixes:" + ','.join(suffixes))
def main():
if len(sys.argv) == 3:
print(sample_list(sys.argv[1],sys.argv[2]))
else:
exit("""Needs 2 arguments: reads_directory, SeOrPe
Usage: ./get_samples.py reads_directory SeOrPe""")
if __name__ == "__main__":
# execute only if run as a script
main()
pipeline: Interop_report
params:
results_dir: /Results
sample_dir: /Data
SeOrPe: PE
interop_read_metrics: interop
interop_output_dir: interop
interop_analysis_dir: /Data
samples: []
groups: []
steps:
- title: Interop read metrics
name: interop_read_metrics
tools:
- interop
default: interop
params_info:
results_dir:
type: output_dir
sample_dir:
type: input_dir
SeOrPe:
type: radio
interop_analysis_dir:
tool: interop
rule: interop
type: input_dir
label: 'Data directory: '
prepare_report_scripts: []
prepare_report_outputs: {}
outputs:
interop:
interop:
- name: summary
file: summary.csv
description: Summary table
- name: index_summary
file: index_summary.csv
description: Index summary table
- name: dump_text
file: dump_text.csv
description: All Interop data as tables
multiqc:
interop: interop
Bootstrap: localimage
From: ../base.sif
%environment
export PATH=/opt/biotools/bin:$PATH
export ROOTSYS=/opt/biotools/root
export LD_LIBRARY_PATH='$LD_LIBRARY_PATH:$ROOTSYS/lib'
%labels
Author YourName
Version v0.0.1
build_date 2018 déc. 07
%runscript
echo "This container contains two apps (UI and Snakemake)."
echo "UI is a user interface to set up the workflow and launch it."
echo "Snakemake let you provide your configfile and other parameters to the snakemake command and launch it."
echo "To get help for an app :\nsingularity help --app appName this_container.sif"
echo "To run an app :\nsingularity run --app appName this_container.sif"
%apprun UI
exec Rscript -e "shiny::runApp('/sagApp/app.R',host='$1',port=$2)"
%apphelp UI
To run the UI app you should bind data and results directories like in the following example.
You must also provide the host address and port where the shiny app will be launched
exemple : singularity run --app UI -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif 127.0.0.1 1234
%apprun Snakemake
configfile=$1
cores=$2
shift
shift
exec snakemake -s /workflow/Snakefile all --configfile $configfile --cores $cores $@
%apphelp Snakemake
To run the Snakemake app you should bind data and results directories like in the following example.
You must also provide the configfile and the number of cores provided to snakemake command (you can add other parameters after these two)
exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif myconfig.yml 16 otherparams
%apprun getConfigfile
exec cp /workflow/params.total.yml ./params.yml
%apphelp getConfigfile
To run the getConfigfile app you dont need to bind directories. This app will only copy the default parameters file from the container to your local disk.
exemple : singularity run --app getConfigfile this_container.sif
%apprun getSamples
exec python3 /workflow/get_samples.py $1 $2
%apphelp getSamples
To run the getSamples app you need to bind the data directory. This app will give you the list of samples detected in a given directory and their file suffix.
exemple : singularity run --app getSamples -B /path/to/data/directory:/Data this_container.sif /Data PE
%help
This container contains four apps (UI, Snakemake, getConfigfile and getSamples).
* UI is a user interface to set up the workflow and launch it.
* Snakemake let you provide your configfile and other parameters to the snakemake command and launch it.
* getConfigfile gives you a copy of a default parameters file to fill and use with the Snakemake app.
* getSamples gives you the list of samples detected in a given directory and their file suffix (usefull for filling samples and sample_suffix in parameters file).
To get help for an app :
singularity help --app appName this_container.sif
To run an app :
singularity run --app appName this_container.sif
%files
./files /workflow
./sagApp /sagApp
%post
mkdir /Data
mkdir /Results
apt-get update -y
cd /opt/biotools
wget https://github.com/Illumina/interop/releases/download/v1.1.8/InterOp-1.1.8-Linux-GNU.tar.gz
tar -xvzf InterOp-1.1.8-Linux-GNU.tar.gz
rm InterOp-1.1.8-Linux-GNU.tar.gz
echo 'export PATH="/opt/biotools/InterOp-1.1.8-Linux-GNU/bin/:$PATH"' >>$SINGULARITY_ENVIRONMENT
apt install -y gnuplot
import oyaml as yaml
def read_yaml(filepath):
try:
with open(filepath, 'r') as file:
data = yaml.load(file)
return data
except IOError as e:
print("Error in file opening:", e)
except yaml.YAMLError as exc:
print("Error in yaml loading:", exc)
def write_yaml(filepath,data):
try:
with open(filepath, 'w') as file:
yaml.dump(data, file, default_flow_style=False)
except IOError as e:
print("Error in file opening:", e)
\ No newline at end of file
chooserInput <- function(inputId, leftLabel, rightLabel, leftChoices, rightChoices,
size = 5, multiple = FALSE) {
leftChoices <- lapply(leftChoices, tags$option)
rightChoices <- lapply(rightChoices, tags$option)
if (multiple)
multiple <- "multiple"
else
multiple <- NULL
tagList(
singleton(tags$head(
tags$script(src="chooser-binding.js"),
tags$style(type="text/css",
HTML(".chooser-container { display: inline-block; }")
)
)),
div(id=inputId, class="chooser",
div(class="chooser-container chooser-left-container",
tags$select(class="left", size=size, multiple=multiple, leftChoices)
),
div(class="chooser-container chooser-center-container",
icon("arrow-circle-o-right", "right-arrow fa-3x text-primary"),
tags$br(),
icon("arrow-circle-o-left", "left-arrow fa-3x text-primary")
),
div(class="chooser-container chooser-right-container",
tags$select(class="right", size=size, multiple=multiple, rightChoices)
)
)
)
}
registerInputHandler("shinyjsexamples.chooser", function(data, ...) {
if (is.null(data))
NULL
else
list(left=as.character(data$left), right=as.character(data$right))
}, force = TRUE)
\ No newline at end of file
MenuGauche = sidebarMenu(id="sidebarmenu",
menuItem("Global parameters", tabName="global_params", icon=icon("pencil", lib="font-awesome"), newtab=FALSE),
menuItem("Interop read metrics", tabName="interop_read_metrics", icon=icon("pencil", lib="font-awesome"), newtab=FALSE),
menuItem("Rule Graph", tabName="RULEGRAPH", icon=icon("gear", lib="font-awesome"), newtab=FALSE),
tags$br(),
numericInput("cores", label = "Threads available", min = 1, max = 24, step = 1, width = "auto", value = 16),
selectInput("force_from", label = "Start again from a step : ", selected = "none", choices = list('none'='none','Interop read metrics'='interop_read_metrics',"All"="all")), tags$br(),
actionButton("RunPipeline", "Run pipeline", icon("play"), class="btn btn-info"),
actionButton("StopPipeline", "Stop pipeline", icon("stop"), class="btn btn-secondary"),
tags$br(),
tags$br(),
menuItem("Running Workflow output", tabName="run_out", icon=icon("terminal", lib="font-awesome"), newtab=FALSE),
menuItem("Final report", tabName="Report", icon=icon("file", lib="font-awesome"), newtab=FALSE),
tags$br(),
actionButton("close_session", "Close session", icon("times"), class="btn btn-primary"),
tags$br(),tags$br(),
menuItem("Powered by mbb", href="http://mbb.univ-montp2.fr/MBB/index.php", newtab=TRUE, icon=icon("book", lib="font-awesome"), selected=NULL)
)
#@author jimmy.lopez@univ-montp2.fr
library(shiny)
library(shinydashboard)
library(shinyjs)
library(yaml)
library(stringr)
library(shinyFiles)
library(tools)
library(knitr)
library(DT)
source("./R/chooser.R", local=T)
source("./pages/pages_def_global_params.R", local=T)
source("./pages/pages_def_interop_read_metrics.R", local=T)
tabRULEGRAPH = fluidPage(box(title = 'Rule Graph :', width = 12, status = 'primary', collapsible = TRUE, solidHeader = TRUE, uiOutput('RULEGRAPH_svg'),actionButton('refresh_rg', 'Refresh', icon('sync'), class='btn btn-info')))
tabReport = fluidPage(box(title = 'Report :', width = 12, status = 'primary', collapsible = TRUE, solidHeader = TRUE, uiOutput('report_html')))
tabRUN = fluidPage(box(title = 'Run :', width = 12 , status = 'primary', collapsible = TRUE, solidHeader = TRUE, uiOutput('run_out',style = 'overflow-y: scroll; height: 600px')),actionButton("unlock", "Unlock the directory in case of previous failure"))
source("./R/menugauche.R", local=T)
style <- tags$style(HTML(readLines("www/added_styles.css")))
UI <- dashboardPage(
skin="blue",
dashboardHeader(title="Interop_report", titleWidth=230),
dashboardSidebar(width=230, MenuGauche),
dashboardBody(
shinyjs::useShinyjs(),
tags$head(tags$link(rel="stylesheet", type="text/css", href="bootstrap.min.readable.css")),
tags$head(style),
tabItems(
tabItem(tabName = "global_params", tabglobal_params),
tabItem(tabName = "interop_read_metrics", tabinterop_read_metrics)
,tabItem(tabName = "RULEGRAPH", tabRULEGRAPH)
,tabItem(tabName = "Report", tabReport)
,tabItem(tabName = "run_out", tabRUN)
)
)
)
reload = function(dossierAnalyse,session,output){
# if params exists reload them
if (file.exists(paste0(dossierAnalyse,"/params.yml"))){
params = read_yaml(paste0(dossierAnalyse,"/params.yml"))
for (param in names(params$params_info)){
if (params$params_info[[param]]$type == "text" || params$params_info[[param]]$type == "input_dir" || params$params_info[[param]]$type == "output_dir"){
updateTextInput(session, param, value = params[["params"]][[param]])
}
if (params$params_info[[param]]$type == "textArea"){
updateTextAreaInput(session, paste0(param,"_server"), value = params[["params"]][[param]])
}
if (params$params_info[[param]]$type == "input_file" && params[["params"]][[paste0(param,"_select")]] == "server"){
updateTextInput(session, paste0(param,"_server"), value = params[["params"]][[param]])
}
if (params$params_info[[param]]$type == "numeric"){
updateNumericInput(session, param, value = params[["params"]][[param]])
}
if (params$params_info[[param]]$type == "radio"){
updateRadioButtons(session, param, selected = params[["params"]][[param]])