Commit e1c8a657 authored by mmassaviol's avatar mmassaviol
Browse files

Update radseqref (update stacks to 2.5)

parent 9a9ad1c4
FROM mmassaviol/mbb_workflows_base:latest as alltools
FROM mbbteam/mbb_workflows_base:latest as alltools
RUN apt install -y fastqc=0.11.5+dfsg-6
RUN cd /opt/biotools \
&& wget http://catchenlab.life.illinois.edu/stacks/source/stacks-2.3b.tar.gz \
&& tar -zxvf stacks-2.3b.tar.gz \
&& cd stacks-2.3b/ \
&& wget http://catchenlab.life.illinois.edu/stacks/source/stacks-2.5.tar.gz \
&& tar -zxvf stacks-2.5.tar.gz \
&& cd stacks-2.5/ \
&& ./configure \
&& make -j 10 \
&& make install \
&& mv -t ../bin sstacks kmer_filter gstacks tsv2bam process_shortreads populations ustacks clone_filter phasedstacks cstacks process_radtags \
&& cd .. && rm -r stacks-2.3b stacks-2.3b.tar.gz
&& mv -t ../bin sstacks kmer_filter gstacks tsv2bam process_shortreads populations ustacks phasedstacks cstacks process_radtags \
&& cd .. && rm -r stacks-2.5 stacks-2.5.tar.gz
RUN wget -O bowtie-1.2.3-linux-x86_64.zip https://sourceforge.net/projects/bowtie-bio/files/bowtie/1.2.3/bowtie-1.2.3-linux-x86_64.zip/download \
&& unzip bowtie-1.2.3-linux-x86_64.zip \
......@@ -35,9 +35,11 @@ RUN cd /opt/biotools \
&& cd .. \
&& rm -r bwa-0.7.17 bwa-0.7.17.tar.bz2
RUN Rscript -e 'install.packages("calibrate",repos="https://cloud.r-project.org/",Ncpus=8, clean=TRUE)'
RUN Rscript -e 'install.packages("calibrate",repos="https://cloud.r-project.org/",Ncpus=8, clean=TRUE);library("calibrate")'
RUN Rscript -e 'BiocManager::install("SNPRelate", version = "3.8",Ncpus=8, clean=TRUE)'
RUN Rscript -e 'BiocManager::install("SNPRelate", version = "3.8",Ncpus=8, clean=TRUE);library("SNPRelate")'
RUN Rscript -e 'library(devtools);install_github("jokergoo/ComplexHeatmap");library("ComplexHeatmap")'
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
......
......@@ -89,7 +89,7 @@ then
echo " "
echo Results will be written to : $2
echo " "
hostname -I | awk -v port=$APP_PORT '{print "You can access the workflow interface at : http://"$1":"port}'
hostname -I | grep -E -o "162.38.181.[0-9]{1,3}" | awk -v port=$APP_PORT '{print "You can access the workflow interface at : http://"$1":"port}'
echo " "
echo To start a Bash session inside the container : docker exec -it $CONTAINER_ID /bin/bash
else
......
from tools import *
from raw_reads import raw_reads
import os
import re
import snakemake.utils
......@@ -7,7 +9,6 @@ import csv
# Wildcards #
#############
SAMPLES = config["samples"]
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
......@@ -29,43 +30,40 @@ individus = get_individus()
# Inputs #
##########
# Generic input functions
## get raw_reads
def raw_reads():
inputs = dict()
if (config["SeOrPe"] == "PE"):
inputs["read"] = config['sample_dir']+'/{sample}_R1'+config["sample_suffix"]
inputs["read2"] = config['sample_dir']+'/{sample}_R2'+config["sample_suffix"]
elif (config["SeOrPe"] == "SE"):
inputs["read"] = config['sample_dir']+'/{sample}'+config["sample_suffix"]
else:
sys.exit("SeOrPe should be SE or PE")
return inputs
# raw_inputs function call
raw_reads = raw_reads(config['results_dir'], config['sample_dir'], config['SeOrPe'])
config.update(raw_reads)
SAMPLES = raw_reads['samples']
## get reads (trimmed or raw)
## get reads
def reads():
inputs = dict()
if (config["trimming"] == "null"):
return raw_reads()
elif (config["trimming"] == "trimmomatic"):
if (config["SeOrPe"] == "SE"):
inputs["read"] = rules.trimmomatic_SE.output.read
elif (config["SeOrPe"] == "PE"):
inputs["read"] = rules.trimmomatic_PE.output.readFP
inputs["read2"] = rules.trimmomatic_PE.output.readRP
return inputs
if (config["SeOrPe"] == "SE"):
inputs["read"] = raw_reads['read']
elif (config["SeOrPe"] == "PE"):
inputs["read"] = raw_reads['read']
inputs["read2"] = raw_reads['read2']
return inputs
# Tools inputs functions
def fastqc_inputs():
return raw_reads()
def fastqc_SE_inputs():
inputs = dict()
inputs["read"] = raw_reads["read"]
return inputs
def fastqc_PE_inputs():
inputs = dict()
inputs["read"] = raw_reads["read"]
inputs["read2"] = raw_reads["read2"]
return inputs
def process_radtags_inputs():
return raw_reads()
return reads()
def bwa_mem_inputs():
if (config["demultiplexing"] == "null"):
return raw_reads()
return reads()
else:
inputs = dict()
if (config["SeOrPe"] == "PE"):
......@@ -211,7 +209,7 @@ def workflow_outputs(step):
rule fastqc_SE:
input:
**fastqc_inputs()
**fastqc_SE_inputs()
output:
html = config["results_dir"]+'/'+config["fastqc_SE_output_dir"]+'/{sample}'+config["sample_suffix"].split(".")[0]+'_fastqc.html',
zip = config["results_dir"]+'/'+config["fastqc_SE_output_dir"]+'/{sample}'+config["sample_suffix"].split(".")[0]+'_fastqc.zip',
......@@ -220,11 +218,16 @@ rule fastqc_SE:
params:
output_dir = config["results_dir"]+'/'+config["fastqc_SE_output_dir"]
shell:
"fastqc {input.read} -t {threads} -o {params.output_dir} |& tee {log}"
"fastqc "
"{input.read} "
"-t {threads} "
"-o {params.output_dir} "
"--quiet "
"|& tee {log}"
rule fastqc_PE:
input:
**fastqc_inputs()
**fastqc_PE_inputs()
output:
html1 = config["results_dir"]+'/'+config["fastqc_PE_output_dir"]+'/{sample}_R1'+config["sample_suffix"].split(".")[0]+'_fastqc.html',
zip1 = config["results_dir"]+'/'+config["fastqc_PE_output_dir"]+'/{sample}_R1'+config["sample_suffix"].split(".")[0]+'_fastqc.zip',
......@@ -235,7 +238,13 @@ rule fastqc_PE:
params:
output_dir = config["results_dir"]+'/'+config["fastqc_PE_output_dir"]
shell:
"fastqc {input.read} {input.read2} -t {threads} -o {params.output_dir} |& tee {log}"
"fastqc "
"{input.read} "
"{input.read2} "
"-t {threads} "
"-o {params.output_dir} "
"--quiet "
"|& tee {log}"
ruleorder: fastqc_PE > fastqc_SE
......@@ -379,7 +388,8 @@ rule bowtie_PE:
"-1 {input.read} "
"-2 {input.read2} 2> {log} "
"| samtools view -b 2>> {log} "
"| samtools sort -@ {threads} > {output.bam} 2>> {log}"
"| samtools sort -@ {threads} > {output.bam} 2>> {log} &&"
"samtools index -@ {threads} {output.bam} 2>> {log}"
rule bowtie_SE:
input:
......@@ -409,7 +419,8 @@ rule bowtie_SE:
"-S "#output in sam format
"{input.read} 2> {log} "
"| samtools view -b 2>> {log} "
"| samtools sort -@ {threads} > {output.bam} 2>> {log}"
"| samtools sort -@ {threads} > {output.bam} 2>> {log} &&"
"samtools index -@ {threads} {output.bam} 2>> {log}"
ruleorder: bowtie_PE > bowtie_SE
......@@ -461,7 +472,8 @@ rule bwa_mem_PE:
"{input.read2} 2> {log} "
"{params.quality0_multimapping} "
"| samtools view -b 2>> {log} "
"| samtools sort -@ {threads} > {output.bam} 2>> {log}"
"| samtools sort -@ {threads} > {output.bam} 2>> {log} &&"
"samtools index -@ {threads} {output.bam} 2>> {log}"
rule bwa_mem_SE:
input:
......@@ -490,7 +502,8 @@ rule bwa_mem_SE:
"{input.read} 2> {log} "
"{params.quality0_multimapping} "
"| samtools view -b 2>> {log} "
"| samtools sort -@ {threads} > {output.bam} 2>> {log}"
"| samtools sort -@ {threads} > {output.bam} 2>> {log} &&"
"samtools index -@ {threads} {output.bam} 2>> {log}"
ruleorder: bwa_mem_PE > bwa_mem_SE
......@@ -566,8 +579,8 @@ rule populations:
"-M {input.popmap} "
"-O {params.output_dir} "
"-r {params.r} "
"--max_obs_het {params.max_obs_het} "
"--min_maf {params.min_maf} "
"--max-obs-het {params.max_obs_het} "
"--min-maf {params.min_maf} "
"-p {params.p} "
"--vcf "
"--fstats "
......@@ -588,9 +601,10 @@ rule prepare_report:
output:
*prepare_report_outputs(),
config_multiqc = config["results_dir"] + "/config_multiqc.yaml",
params_tab = config["results_dir"] + "/params_tab_mqc.csv"
params_tab = config["results_dir"] + "/params_tab_mqc.csv",
versions_tab = config["results_dir"] + "/Tools_version_mqc.csv"
params:
params_file = workflow.overwrite_configfile,
params_file = workflow.overwrite_configfiles,
results_dir = config["results_dir"]
log:
config["results_dir"]+"/logs/prepare_report_log.txt"
......@@ -611,7 +625,7 @@ rule prepare_report:
tool = config[step["name"]]
i=1
for command in OUTPUTS[tool]:
if ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
if ("SeOrPe" not in config.keys() or (config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
outputs = OUTPUTS[tool][command]
for files in outputs:
name = files["file"] if 'file' in files.keys() else files["directory"]
......@@ -627,7 +641,7 @@ rule prepare_report:
for step in STEPS:
tool = config[step["name"]]
for key, value in config.items():
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ((config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
if (tool in key and tool != "null") or (key in ["results_dir","sample_dir","sample_suffix","SeOrPe"]) and ("SeOrPe" not in config.keys() or (config["SeOrPe"] == "SE" and not("_PE" in command)) or (config["SeOrPe"] == "PE" and not("_SE" in command))):
if (key in PARAMS_INFO.keys() and "label" in PARAMS_INFO[key].keys()):
description = PARAMS_INFO[key]["label"]
else:
......@@ -638,6 +652,19 @@ rule prepare_report:
out.write(head)
out.write(params_list)
# Tools version
with open(output.versions_tab,"w") as out:
versions = read_yaml("/workflow/versions.yaml")
head = """# description: 'This is the list of the tools used and their version'
# section_name: 'Tools version'
"""
out.write(head)
out.write("Tool\tVersion"+"\n")
for step in STEPS:
tool = config[step["name"]]
if (tool in versions.keys()):
out.write(tool+"\t"+versions[tool]+"\n")
# Config for Multiqc report
shell("python3 /workflow/generate_multiqc_config.py {params.params_file} {output.config_multiqc}")
......@@ -665,7 +692,7 @@ rule all:
scripts = directory(config["results_dir"]+"/workflow/scripts"),
params = config["results_dir"]+"/workflow/params.yml"
params:
params_file = workflow.overwrite_configfile,
params_file = workflow.overwrite_configfiles,
shell:
"cp /workflow/Snakefile {output.Snakefile} && "
"cp /workflow/get_samples.py {output.get_samples} && "
......@@ -673,11 +700,11 @@ rule all:
"cp {params.params_file} {output.params}"
onsuccess:
print("Workflow finished, no error")
print("Workflow finished with SUCCESS")
shell("touch "+config["results_dir"]+"/logs/workflow_end.ok")
onerror:
print("An error occurred")
print("An ERROR occurred")
shell("cat {log} > "+config["results_dir"]+"/logs/workflow_end.error")
#shell("mail -s "an error occurred" youremail@provider.com < {log}")
import re
import sys
from tools import *
from tools import read_yaml
config = read_yaml(sys.argv[1])
......@@ -13,7 +13,9 @@ def report_section_order():
res += " order: 980\n"
res += " outputs:\n"
res += " order: 970\n"
cpt = 960
res += " Tools_version:\n"
res += " order: 960\n"
cpt = 950
for step in config["steps"]:
tool = config["params"][step["name"]]
if (config["multiqc"][tool] != "custom"):
......@@ -21,7 +23,7 @@ def report_section_order():
res += " " + "order: " + str(cpt) + "\n"
cpt += -10
for rule in config["outputs"][tool]:
if ((config["params"]["SeOrPe"] == "SE" and not("_PE" in rule)) or (config["params"]["SeOrPe"] == "PE" and not("_SE" in rule))):
if ("SeOrPe" not in config.keys() or (config["params"]["SeOrPe"] == "SE" and not("_PE" in rule)) or (config["params"]["SeOrPe"] == "PE" and not("_SE" in rule))):
for output in config["outputs"][tool][rule]:
if("file" in output.keys() and "mqc" in output["file"] and '{' not in output["file"]): # case of dynamic files ({wildcard}_mqc.png) to deal with
section = re.sub('\_mqc.*$', '', output["file"])
......
pipeline: RADseq_ref
params:
results_dir: /Results
sample_dir: /Data
SeOrPe: PE
quality_check: fastqc
fastqc_SE_output_dir: fastqc_SE
fastqc_PE_output_dir: fastqc_PE
fastqc_threads: 4
null_output_dir: ''
demultiplexing: process_radtags
process_radtags_SE_output_dir: process_radtags/SE
......@@ -105,11 +102,6 @@ params_info:
type: input_dir
SeOrPe:
type: radio
fastqc_threads:
tool: fastqc
rule: fastqc_PE
type: numeric
label: Number of threads to use
process_radtags_barcode_file_select:
tool: process_radtags
rule: process_radtags_PE
......
import os
import re
import sys
def raw_reads(results_dir, sample_dir, SeOrPe):
samples = list()
PE_mark = "" # _R or _
suffixes = list()
dicoSamples = dict() # sample_name: file(s)
files = os.listdir(sample_dir)
if SeOrPe == "PE":
regex = re.compile(r"^(.+)(_R1|_R2|_1|_2)(.+)")
else:
regex = re.compile(r"^(.+?)(\..*)")
for file in files:
res = re.match(regex, file)
if res:
if res.group(1) not in samples:
samples.append(res.group(1))
if SeOrPe == "PE":
suffixes.append(res.group(3))
if len(res.group(2)) == 3:
PE_mark = "_R"
else:
PE_mark = "_"
else:
suffixes.append(res.group(2))
if res.group(1) not in dicoSamples.keys():
dicoSamples[res.group(1)] = list()
dicoSamples[res.group(1)].append(file)
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab:
if SeOrPe == "PE":
sampleTab.write("sample\tfile_read_1\tfile_read_2")
else:
sampleTab.write("sample\tfile_read_1")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'sample_suffix': suffix, 'dico': dicoSamples}
if SeOrPe == "SE":
out ["read"] = os.path.join(sample_dir,"{sample}"+suffix)
out ["read2"] = ""
else:
out ["read"] = os.path.join(sample_dir,"{sample}"+PE_mark+"1"+suffix)
out ["read2"] = os.path.join(sample_dir,"{sample}"+PE_mark+"2"+suffix)
return out
else:
exit("Files have different suffixes:" + ','.join(suffixes))
#print(raw_reads(sys.argv[1],sys.argv[2],sys.argv[3]))
\ No newline at end of file
library(SNPRelate)
library(RColorBrewer)
library(gplots)
library(ComplexHeatmap)
library(yaml)
palette = "Accent" #"Spectral"
......@@ -104,9 +104,9 @@ tryCatch({
colnames(ibs$ibs)=ibs$sample.id
rownames(ibs$ibs)=ibs$sample.id
png(filename = paste(parameters$results_dir,parameters$populations_output_dir,"IBS_mqc.png",sep = "/"), height=800, width=800)
heatmap.2(ibs$ibs, col=terrain.colors(20),RowSideColors = pop_color1[ibs$sample.id], ColSideColors = pop_color1[ibs$sample.id], trace="none", cexRow = 1, cexCol = 1)
par(lend = 1) # square line ends for the color legend
legend(0.0, 1.0, ncol=2, legend = names(popc), col = popc, lty= 1,lwd = 5, bty="n")
topAnnot = HeatmapAnnotation(Population = pop_code1, col = list(Population=popc))
leftAnnot = HeatmapAnnotation(Population = pop_code1, col = list(Population=popc),which = "row",show_legend = FALSE)
Heatmap(ibs$ibs, top_annotation=topAnnot, left_annotation=leftAnnot)
dev.off()
}, error = function(e) {
png(filename = paste(parameters$results_dir,parameters$populations_output_dir,"IBS_mqc.png",sep = "/"), height=800, width=800)
......
......@@ -85,14 +85,14 @@ singularity run --app appName this_container.sif
apt install -y fastqc=0.11.5+dfsg-6
cd /opt/biotools
wget http://catchenlab.life.illinois.edu/stacks/source/stacks-2.3b.tar.gz
tar -zxvf stacks-2.3b.tar.gz
cd stacks-2.3b/
wget http://catchenlab.life.illinois.edu/stacks/source/stacks-2.5.tar.gz
tar -zxvf stacks-2.5.tar.gz
cd stacks-2.5/
./configure
make -j 10
make install
mv -t ../bin sstacks kmer_filter gstacks tsv2bam process_shortreads populations ustacks clone_filter phasedstacks cstacks process_radtags
cd .. && rm -r stacks-2.3b stacks-2.3b.tar.gz
mv -t ../bin sstacks kmer_filter gstacks tsv2bam process_shortreads populations ustacks phasedstacks cstacks process_radtags
cd .. && rm -r stacks-2.5 stacks-2.5.tar.gz
wget -O bowtie-1.2.3-linux-x86_64.zip https://sourceforge.net/projects/bowtie-bio/files/bowtie/1.2.3/bowtie-1.2.3-linux-x86_64.zip/download
unzip bowtie-1.2.3-linux-x86_64.zip
......@@ -117,9 +117,11 @@ singularity run --app appName this_container.sif
cd ..
rm -r bwa-0.7.17 bwa-0.7.17.tar.bz2
Rscript -e 'install.packages("calibrate",repos="https://cloud.r-project.org/",Ncpus=8, clean=TRUE)'
Rscript -e 'install.packages("calibrate",repos="https://cloud.r-project.org/",Ncpus=8, clean=TRUE);library("calibrate")'
Rscript -e 'BiocManager::install("SNPRelate", version = "3.8",Ncpus=8, clean=TRUE)'
Rscript -e 'BiocManager::install("SNPRelate", version = "3.8",Ncpus=8, clean=TRUE);library("SNPRelate")'
Rscript -e 'library(devtools);install_github("jokergoo/ComplexHeatmap");library("ComplexHeatmap")'
mkdir -p /share/apps/bin
mkdir -p /share/apps/lib
......
......@@ -4,7 +4,7 @@ import shutil
def read_yaml(filepath):
try:
with open(filepath, 'r') as file:
data = yaml.load(file)
data = yaml.load(file, Loader=yaml.FullLoader)
return data
except IOError as e:
print("Error in file opening:", e)
......
fastqc: 0.11.5
process_radtags: 2.3b
bowtie: 1.2.3
bwa: 0.7.17
samtools_stats: '1.9'
gstacks: 2.3b
populations: 2.3b
......@@ -10,14 +10,14 @@ box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE,
)
,
tags$label("Data directory: "),
tags$label("Directory containing the fastq files: "),
fluidRow(
column(4,shinyDirButton("shinydir_sample_dir",label="Please select a directory", title="Data directory: ")),
column(4,shinyDirButton("shinydir_sample_dir",label="Please select a directory", title="Directory containing the fastq files: ")),
column(8,textInput("sample_dir",label=NULL,value=""))
)
,
radioButtons("SeOrPe", label = "Single end reads (SE) or Paired end reads (PE): ", choices = list("Single end" = "SE", "Paired end" = "PE"), selected = "PE", width = "auto"),
radioButtons("SeOrPe", label = "Single end reads (SE) or Paired end reads (PE): ", choices = list("Single end" = "SE", "Paired end" = "PE"), selected = "SE", width = "auto"),
textAreaInput("memo", label = "Text area for the user", value = "")
......
......@@ -5,8 +5,6 @@ box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE,
selectInput("selectquality_check", label = "Select the tool to use : ", selected = "fastqc", choices = list("fastqc" = "fastqc", "null" = "null")),
conditionalPanel(condition = "input.selectquality_check == 'fastqc'",box(title = "FastQC", width = 12, status = "success", collapsible = TRUE, solidHeader = TRUE,
numericInput("fastqc_threads", label = "Number of threads to use", min = 1, max = NA, step = 1, width = "auto", value = 4),
p("FastQC: A quality control tool for high throughput raw sequence data."),
p("Website : ",a(href="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/","https://www.bioinformatics.babraham.ac.uk/projects/fastqc/",target="_blank")),
......
......@@ -27,12 +27,6 @@ save_params <- function(path_param){
# Page : quality_check
res = paste0(res , paste("quality_check:", paste0('"', input$selectquality_check, '"'), "\n", sep = " "))
if(!is.na(as.numeric(input$fastqc_threads))) {
res = paste0(res, paste("fastqc_threads:", input$fastqc_threads, "\n", sep = " "))
} else {
res = paste0(res, paste("fastqc_threads:", paste0('"', input$fastqc_threads, '"'), "\n", sep = " "))
}
# Page : demultiplexing
res = paste0(res , paste("demultiplexing:", paste0('"', input$selectdemultiplexing, '"'), "\n", sep = " "))
res = paste0(res, paste("process_radtags_barcode_file_select:", paste0('"', input$process_radtags_barcode_file_select, '"'), "\n", sep = " "))
......@@ -262,9 +256,6 @@ save_params <- function(path_param){
return(result)
}
d = c(d,a)
get_samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," ",input$SeOrPe),intern = T))
d$samples = get_samples$samples
d$params$sample_suffix = get_samples$suffix
write_yaml(d,path_param,handlers=list(logical = logical))
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment