Commit b66226e9 authored by mmassaviol's avatar mmassaviol
Browse files

Add Variant_calling workflow

parent 2abd0932
......@@ -29,6 +29,7 @@ Liste des workflows existants:
- RADseq_denovo (stacks)
- Virus_Assembler_Megahit (Megahit + Blast + BWA)
- RNAseq (pseudoalign (kallisto, Salmon) + expression différentielle (Edger, DESeq2))
- Variant_calling (bwa or bowtie and gatk or bcftools)
## 1 Construire un conteneur
......
This diff is collapsed.
import re
import sys
from tools import *
config = read_yaml(sys.argv[1])
def report_section_order():
res = "skip_generalstats: true\n\n"
res += "report_section_order:\n"
res += " Rule_graph:\n"
res += " order: 990\n"
res += " params_tab:\n"
res += " order: 980\n"
res += " outputs:\n"
res += " order: 970\n"
cpt = 960
for step in config["steps"]:
tool = config["params"][step["name"]]
if (config["multiqc"][tool] != "custom"):
res += " " + config["multiqc"][tool] + ":\n"
res += " " + "order: " + str(cpt) + "\n"
cpt += -10
for rule in config["outputs"][tool]:
if ((config["params"]["SeOrPe"] == "SE" and not("_PE" in rule)) or (config["params"]["SeOrPe"] == "PE" and not("_SE" in rule))):
for output in config["outputs"][tool][rule]:
if("mqc" in output["file"] and '{' not in output["file"]): # case of dynamic files ({wildcard}_mqc.png) to deal with
section = re.sub('\_mqc.*$', '', output["file"])
res += " " + section + ":\n"
res += " " + "order: " + str(cpt) + "\n"
cpt += -10
return res
def main():
res = ""
res += report_section_order()
with open(sys.argv[2],"w") as out:
out.write(res)
if __name__ == "__main__":
# execute only if run as a script
main()
\ No newline at end of file
#!/usr/bin/env python3
# This script will take a directory and a parameter to tell if the reads are paired end or single end and return the sample list and the suffix
# Needs 2 arguments: reads_directory, SeOrPe
# SeOrPe is SE for single end reads and PE for paired end reads
# Usage: ./get_samples.py reads_directory SeOrPe
import os
import re
import csv
import sys
def sample_list(dir, SeOrPe):
samples = list()
suffixes = list()
files = os.listdir(dir)
if SeOrPe == "PE":
regex = re.compile(r"^(.+?)(_R1|_R2)(.+)")
else:
regex = re.compile(r"^(.+?)(\..*)")
for file in files:
res = re.match(regex, file)
if res:
if res.group(1) not in samples:
samples.append(res.group(1))
if SeOrPe == "PE":
suffixes.append(res.group(3))
else:
suffixes.append(res.group(2))
if (len(set(suffixes)) == 1 ):
return {'samples': sorted(samples), 'suffix': list(set(suffixes))[0]}
else:
exit("Files have different suffixes:" + suffixes)
def main():
if len(sys.argv) == 3:
print(sample_list(sys.argv[1],sys.argv[2]))
else:
exit("""Needs 2 arguments: reads_directory, SeOrPe
Usage: ./get_samples.py reads_directory SeOrPe""")
if __name__ == "__main__":
# execute only if run as a script
main()
pipeline: Variant_calling
params:
results_dir: /Results
sample_dir: /Data
SeOrPe: PE
preprocess: fastp
fastp_PE_output_dir: fastp_PE
fastp_threads: 4
fastp_complexity_threshold: 30
fastp_report_title: fastp report
fastp_adapter_sequence: ''
fastp_adapter_sequence_R2_PE: ''
fastp_P: 20
fastp_correction_PE: true
fastp_low_complexity_filter: true
fastp_overrepresentation_analysis: true
fastp_SE_output_dir: fastp_SE
null_output_dir: ''
mapping: bwa
bwa_index_output_dir: bwa/index
bwa_index_genome_fasta: ''
bwa_index_path: ''
bwa_index_genome_fasta_select: server
bwa_index_algorithm: bwtsw
bwa_mem_PE_output_dir: bwa/mem/PE
index: bowtie/index/index
bwa_mem_threads: 4
bwa_mem_quality0_multimapping: true
bwa_mem_SE_output_dir: bwa/mem/SE
bowtie_index_output_dir: bowtie/index
bowtie_index_genome_fasta: ''
bowtie_index_path: ''
bowtie_index_genome_fasta_select: server
bowtie_index_threads: 4
bowtie_PE_output_dir: bowtie/PE
bowtie_threads: 4
bowtie_minins_PE: 0
bowtie_maxins_PE: 250
bowtie_orientation_PE: ''
bowtie_mult_align_limit: 1
bowtie_best: true
bowtie_strata: true
bowtie_SE_output_dir: bowtie/SE
mark_duplicates: Picard_MarkDuplicates
Picard_MarkDuplicates_output_dir: Picard_MarkDuplicates
Picard_MarkDuplicates_threads: 4
Picard_MarkDuplicates_remove_all_duplicates: true
Picard_MarkDuplicates_samtools_memory: 2
indel_realign: gatk_IndelRealigner
gatk_IndelRealigner_output_dir: gatk_IndelRealigner
gatk_IndelRealigner_threads: 4
gatk_IndelRealigner_samtools_memory: 2
variant_calling: gatk_haplotype_caller
gatk_haplotype_caller_output_dir: gatk_haplotype_caller
gatk_haplotype_caller_threads: 4
bcftools_mpileup_and_call_output_dir: bcftools_call
reference: ''
bam: align.bam
bcftools_mpileup_and_call_threads: 4
samples: []
groups: []
final_step: all
steps:
- title: Preprocessing
name: preprocess
tools:
- fastp
- 'null'
default: fastp
- title: Mapping
name: mapping
tools:
- bwa
- bowtie
default: bwa
- title: Mark Duplicates
name: mark_duplicates
tools:
- Picard_MarkDuplicates
- 'null'
default: Picard_MarkDuplicates
- title: Indel Realign
name: indel_realign
tools:
- gatk_IndelRealigner
- 'null'
default: gatk_IndelRealigner
- title: Variant calling
name: variant_calling
tools:
- gatk_haplotype_caller
- bcftools_mpileup
default: gatk_haplotype_caller
params_info:
results_dir:
type: output_dir
sample_dir:
type: input_dir
SeOrPe:
type: radio
fastp_threads:
tool: fastp
rule: fastp_SE
type: numeric
fastp_complexity_threshold:
tool: fastp
rule: fastp_SE
type: numeric
fastp_report_title:
tool: fastp
rule: fastp_SE
type: text
fastp_adapter_sequence:
tool: fastp
rule: fastp_SE
type: text
fastp_adapter_sequence_R2_PE:
tool: fastp
rule: fastp_PE
type: text
fastp_P:
tool: fastp
rule: fastp_SE
type: numeric
fastp_correction_PE:
tool: fastp
rule: fastp_PE
type: checkbox
fastp_low_complexity_filter:
tool: fastp
rule: fastp_SE
type: checkbox
fastp_overrepresentation_analysis:
tool: fastp
rule: fastp_SE
type: checkbox
bwa_index_path:
tool: bwa
rule: bwa_index
type: input_dir
bwa_index_genome_fasta_select:
tool: bwa
rule: bwa_index
type: select
bwa_index_genome_fasta:
tool: bwa
rule: bwa_index
type: input_file
bwa_index_algorithm:
tool: bwa
rule: bwa_index
type: radio
bwa_mem_threads:
tool: bwa
rule: bwa_mem_SE
type: numeric
bwa_mem_quality0_multimapping:
tool: bwa
rule: bwa_mem_SE
type: checkbox
bowtie_index_path:
tool: bowtie
rule: bowtie_index
type: input_dir
bowtie_index_genome_fasta_select:
tool: bowtie
rule: bowtie_index
type: select
bowtie_index_genome_fasta:
tool: bowtie
rule: bowtie_index
type: input_file
bowtie_index_threads:
tool: bowtie
rule: bowtie_index
type: numeric
bowtie_threads:
tool: bowtie
rule: bowtie_SE
type: numeric
bowtie_minins_PE:
tool: bowtie
rule: bowtie_PE
type: numeric
bowtie_maxins_PE:
tool: bowtie
rule: bowtie_PE
type: numeric
bowtie_orientation_PE:
tool: bowtie
rule: bowtie_PE
type: radio
bowtie_mult_align_limit:
tool: bowtie
rule: bowtie_SE
type: numeric
bowtie_best:
tool: bowtie
rule: bowtie_SE
type: checkbox
bowtie_strata:
tool: bowtie
rule: bowtie_SE
type: checkbox
Picard_MarkDuplicates_threads:
tool: Picard_MarkDuplicates
rule: Picard_MarkDuplicates
type: numeric
Picard_MarkDuplicates_remove_all_duplicates:
tool: Picard_MarkDuplicates
rule: Picard_MarkDuplicates
type: checkbox
Picard_MarkDuplicates_samtools_memory:
tool: Picard_MarkDuplicates
rule: Picard_MarkDuplicates
type: numeric
gatk_IndelRealigner_threads:
tool: gatk_IndelRealigner
rule: gatk_IndelRealigner
type: numeric
gatk_IndelRealigner_samtools_memory:
tool: gatk_IndelRealigner
rule: gatk_IndelRealigner
type: numeric
gatk_haplotype_caller_threads:
tool: gatk_haplotype_caller
rule: gatk_haplotype_caller
type: numeric
bcftools_mpileup_and_call_threads:
tool: bcftools_mpileup
rule: bcftools_mpileup_and_call
type: numeric
prepare_report_scripts: []
prepare_report_outputs: {}
outputs:
fastp:
fastp_PE:
- name: report_html
file: fastp_report_{sample}.html
description: "Rapport HTML du pr\xE9processing effectu\xE9"
- name: report_json
file: fastp_report_{sample}.json
description: "Rapport JSON du pr\xE9processing effectu\xE9"
- name: read
file: '{sample}_R1.fq.gz'
description: "Reads R1 pr\xE9process\xE9s"
- name: read2
file: '{sample}_R2.fq.gz'
description: "Reads R2 pr\xE9process\xE9s"
fastp_SE:
- name: report_html
file: fastp_report_{sample}.html
description: "Rapport HTML du pr\xE9processing effectu\xE9"
- name: report_json
file: fastp_report_{sample}.json
description: "Rapport JSON du pr\xE9processing effectu\xE9"
- name: read
file: '{sample}.fq.gz'
description: "Reads pr\xE9process\xE9s"
'null':
'null': []
bwa:
bwa_index:
- name: index
file: index
description: Index files for bwa alignment
bwa_mem_PE:
- name: bam
file: '{sample}.bam'
description: Alignment files
bwa_mem_SE:
- name: bam
file: '{sample}.bam'
description: Alignment files
bowtie:
bowtie_index:
- name: index
file: index
description: Index files for bowtie alignment
bowtie_PE:
- name: bam
file: '{sample}.bam'
description: Alignment files
bowtie_SE:
- name: bam
file: '{sample}.bam'
description: Alignment files
Picard_MarkDuplicates:
Picard_MarkDuplicates: []
gatk_IndelRealigner:
gatk_IndelRealigner: []
gatk_haplotype_caller:
gatk_haplotype_caller: []
bcftools_mpileup:
bcftools_mpileup_and_call:
- name: VCF
file: variants.vcf.gz
description: Variant file (vcf)
multiqc:
fastp: fastp
'null': custom
bwa: custom
bowtie: bowtie1
Picard_MarkDuplicates: custom
gatk_IndelRealigner: custom
gatk_haplotype_caller: bcftools
bcftools_mpileup: bcftools
Bootstrap: localimage
From: ../base.sif
%environment
export PATH=/opt/biotools/bin:$PATH
export ROOTSYS=/opt/biotools/root
export LD_LIBRARY_PATH='$LD_LIBRARY_PATH:$ROOTSYS/lib'
%labels
Author YourName
Version v0.0.1
build_date 2018 déc. 07
%runscript
echo "This container contains two apps (UI and Snakemake)."
echo "UI is a user interface to set up the workflow and launch it."
echo "Snakemake let you provide your configfile and other parameters to the snakemake command and launch it."
echo "To get help for an app :\nsingularity help --app appName this_container.sif"
echo "To run an app :\nsingularity run --app appName this_container.sif"
%apprun UI
exec Rscript -e "shiny::runApp('/sagApp/app.R',host='$1',port=$2)"
%apphelp UI
To run the UI app you should bind data and results directories like in the following example.
You must also provide the host address and port where the shiny app will be launched
exemple : singularity run --app UI -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif 127.0.0.1 1234
%apprun Snakemake
configfile=$1
cores=$2
shift
shift
exec snakemake -s /workflow/Snakefile all --configfile $configfile --cores $cores $@
%apphelp Snakemake
To run the Snakemake app you should bind data and results directories like in the following example.
You must also provide the configfile and the number of cores provided to snakemake command (you can add other parameters after these two)
exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif myconfig.yml 16 otherparams
%apprun getConfigfile
exec cp /workflow/params.total.yml ./params.yml
%apphelp getConfigfile
To run the getConfigfile app you dont need to bind directories. This app will only copy the default parameters file from the container to your local disk.
exemple : singularity run --app getConfigfile this_container.sif
%apprun getSamples
exec python3 /workflow/get_samples.py $1 $2
%apphelp getSamples
To run the getSamples app you need to bind the data directory. This app will give you the list of samples detected in a given directory and their file suffix.
exemple : singularity run --app getSamples -B /path/to/data/directory:/Data this_container.sif /Data PE
%help
This container contains four apps (UI, Snakemake, getConfigfile and getSamples).
* UI is a user interface to set up the workflow and launch it.
* Snakemake let you provide your configfile and other parameters to the snakemake command and launch it.
* getConfigfile gives you a copy of a default parameters file to fill and use with the Snakemake app.
* getSamples gives you the list of samples detected in a given directory and their file suffix (usefull for filling samples and sample_suffix in parameters file).
To get help for an app :
singularity help --app appName this_container.sif
To run an app :
singularity run --app appName this_container.sif
%files
./files /workflow
./sagApp /sagApp
%post
mkdir /Data
mkdir /Results
apt-get update -y
wget http://opengene.org/fastp/fastp
chmod a+x ./fastp
mv fastp /opt/biotools/bin/fastp
cd /opt/biotools
wget https://github.com/lh3/bwa/releases/download/v0.7.17/bwa-0.7.17.tar.bz2
tar -xvjf bwa-0.7.17.tar.bz2
cd bwa-0.7.17
make
mv bwa ../bin/
cd ..
rm -r bwa-0.7.17 bwa-0.7.17.tar.bz2
cd /opt/biotools
wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2
tar -xvjf samtools-1.9.tar.bz2
cd samtools-1.9
./configure && make
cd ..
mv samtools-1.9/samtools bin/samtools
rm -r samtools-1.9 samtools-1.9.tar.bz2
wget https://vorboss.dl.sourceforge.net/project/bowtie-bio/bowtie/1.2.2/bowtie-1.2.2-linux-x86_64.zip
unzip bowtie-1.2.2-linux-x86_64.zip
cp bowtie-1.2.2-linux-x86_64/bowtie* /usr/bin
rm -rf bowtie-1.2.2*
cd /opt/biotools/bin
wget https://github.com/broadinstitute/picard/releases/download/2.20.8/picard.jar
cd /opt/biotools
wget -O GenomeAnalysisTK-3.6-0.tar.bz2 'https://software.broadinstitute.org/gatk/download/auth?package=GATK-archive&version=3.6-0-g89b7209'
mkdir gatk3
tar -C gatk3 -xjf GenomeAnalysisTK-3.6-0.tar.bz2
rm GenomeAnalysisTK-3.6-0.tar.bz2
rm -r gatk3/resources
cd /opt/biotools
wget https://github.com/broadinstitute/gatk/releases/download/4.1.1.0/gatk-4.1.1.0.zip
unzip gatk-4.1.1.0.zip
cd bin
ln -s /opt/biotools/gatk-4.1.1.0/gatk gatk
cd /opt/biotools
wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2
tar -xvjf bcftools-1.9.tar.bz2
cd bcftools-1.9
./configure --prefix=/opt/biotools
make
make install
cd .. && rm bcftools-1.9.tar.bz2
apt install -y tabix
import oyaml as yaml
def read_yaml(filepath):
try:
with open(filepath, 'r') as file:
data = yaml.load(file)
return data
except IOError as e:
print("Error in file opening:", e)
except yaml.YAMLError as exc:
print("Error in yaml loading:", exc)
def write_yaml(filepath,data):
try:
with open(filepath, 'w') as file:
yaml.dump(data, file, default_flow_style=False)
except IOError as e:
print("Error in file opening:", e)
\ No newline at end of file
chooserInput <- function(inputId, leftLabel, rightLabel, leftChoices, rightChoices,
size = 5, multiple = FALSE) {
leftChoices <- lapply(leftChoices, tags$option)
rightChoices <- lapply(rightChoices, tags$option)
if (multiple)
multiple <- "multiple"
else
multiple <- NULL
tagList(
singleton(tags$head(
tags$script(src="chooser-binding.js"),
tags$style(type="text/css",
HTML(".chooser-container { display: inline-block; }")
)
)),
div(id=inputId, class="chooser",
div(class="chooser-container chooser-left-container",
tags$select(class="left", size=size, multiple=multiple, leftChoices)
),
div(class="chooser-container chooser-center-container",
icon("arrow-circle-o-right", "right-arrow fa-3x text-primary"),
tags$br(),
icon("arrow-circle-o-left", "left-arrow fa-3x text-primary")
),
div(class="chooser-container chooser-right-container",
tags$select(class="right", size=size, multiple=multiple, rightChoices)
)
)
)
}
registerInputHandler("shinyjsexamples.chooser", function(data, ...) {
if (is.null(data))
NULL
else
list(left=as.character(data$left), right=as.character(data$right))
}, force = TRUE)
\ No newline at end of file
MenuGauche = sidebarMenu(id="sidebarmenu",
menuItem("Global parameters", tabName="global_params", icon=icon("pencil", lib="font-awesome"), newtab=FALSE),