Commit b0f119e5 authored by mmassaviol's avatar mmassaviol
Browse files

Update workflow Mito_Assembler_Megahit

(multiple mitochondrion case)
parent 35bf0bcd
......@@ -282,13 +282,14 @@ rule mitoz_annotate_SE:
**mitoz_annotate_inputs()
output:
annotate = config["results_dir"]+"/mitoz_annotate_SE.result/mitoz_annotate_SE_mitoscaf.fa.gbf",
circos_png = config["results_dir"]+"/mitoz_annotate_SE/circos_mqc.png",
#circos_png = config["results_dir"]+"/mitoz_annotate_SE/circos_mqc.png",
circos_bam = config["results_dir"]+"/mitoz_annotate_SE/circos.bam"
log: config["results_dir"]+'/logs/mitoz_annotate/mitoz_annotate_log.txt'
threads: config["mitoz_annotate_threads"]
params:
tmp_circos = config["results_dir"]+"/mitoz_annotate_SE.tmp/mitoz_annotate_SE.annotation/visualization/circos.sorted.bam",
circos_png = config["results_dir"]+"/mitoz_annotate_SE.result/mitoz_annotate_SE.circos.png",
circos_dir = config["results_dir"]+"/mitoz_annotate_SE.tmp/mitoz_annotate_SE.annotation/visualization/",
results_dir = config["results_dir"]+"/mitoz_annotate_SE",
prefix = "mitoz_annotate_SE",
clade = config["clade"]
shell:
......@@ -298,8 +299,28 @@ rule mitoz_annotate_SE:
"--thread_number {threads} "
"--outprefix {params.prefix} "
"--clade {params.clade} "
"|& tee {log} && mv {params.tmp_circos} {output.circos_bam} "
"&& mv {params.circos_png} {output.circos_png}"
"|& tee {log} && mv {params.tmp_circos} {output.circos_bam} && "
# multiple mito case
"cd {params.circos_dir} && "
"sed -i \"s/circos.png/circos_mqc.png/g\" circos.conf && "
"sed -i \"s|{params.circos_dir}||g\" circos.conf && "
"if [ $(grep \"mt[0-9]\{{1,\}}\" circos.dep | cut -f 1 | uniq | wc -l) -gt 1 ] \n"
"then \n"
" for mito in $(grep \"mt[0-9]\{{1,\}}\" circos.dep | cut -f 1 | uniq) \n"
" do \n"
" sed \"s/circos/circos$mito/g\" circos.conf > circos$mito.conf && "
" grep $mito circos.depth.txt > circos$mito.depth.txt && "
" grep $mito circos.karyotype.txt > circos$mito.karyotype.txt && "
" grep $mito circos.gene.text.txt > circos$mito.gene.text.txt && "
" grep $mito circos.strand.text.txt > circos$mito.strand.text.txt && "
" grep $mito circos.fa.gc.txt > circos$mito.fa.gc.txt && "
" grep $mito circos.features.txt > circos$mito.features.txt && "
" cd .. && "
" circos -conf visualization/circos$mito.conf \n"
" done \n"
"else \n"
" mv circos.png circos_mqc.png \n"
"fi \n"
rule mitoz_annotate_PE:
input:
......@@ -307,13 +328,13 @@ rule mitoz_annotate_PE:
output:
annotate = config["results_dir"]+"/mitoz_annotate_PE.result/mitoz_annotate_PE_mitoscaf.fa.gbf",
circos_bam = config["results_dir"]+"/mitoz_annotate_PE/circos.bam",
circos_png = config["results_dir"]+"/mitoz_annotate_PE/circos_mqc.png",
#circos_png = config["results_dir"]+"/mitoz_annotate_PE/circos_mqc.png",
log: config["results_dir"]+'/logs/mitoz_annotate/mitoz_annotate_log.txt'
threads: config["mitoz_annotate_threads"]
params:
tmp_circos = config["results_dir"]+"/mitoz_annotate_PE.tmp/mitoz_annotate_PE.annotation/visualization/circos.sorted.bam",
circos_png = config["results_dir"]+"/mitoz_annotate_PE.result/mitoz_annotate_PE.circos.png",
result_dir = config["results_dir"]+"/mitoz_annotate_PE.result",
circos_dir = config["results_dir"]+"/mitoz_annotate_PE.tmp/mitoz_annotate_PE.annotation/visualization/",
results_dir = config["results_dir"]+"/mitoz_annotate_PE",
prefix = "mitoz_annotate_PE",
clade = config["clade"]
shell:
......@@ -325,8 +346,28 @@ rule mitoz_annotate_PE:
"--thread_number {threads} "
"--outprefix {params.prefix} "
"--clade {params.clade} "
"|& tee {log} && mv {params.tmp_circos} {output.circos_bam} "
"&& mv {params.circos_png} {output.circos_png} "
"|& tee {log} && mv {params.tmp_circos} {output.circos_bam} && "
# multiple mito case
"cd {params.circos_dir} && "
"sed -i \"s/circos.png/circos_mqc.png/g\" circos.conf && "
"sed -i \"s|{params.circos_dir}||g\" circos.conf && "
"if [ $(grep \"mt[0-9]\{{1,\}}\" circos.dep | cut -f 1 | uniq | wc -l) -gt 1 ] \n"
"then \n"
" for mito in $(grep \"mt[0-9]\{{1,\}}\" circos.dep | cut -f 1 | uniq) \n"
" do \n"
" sed \"s/circos/circos$mito/g\" circos.conf > circos$mito.conf && "
" grep $mito circos.depth.txt > circos$mito.depth.txt && "
" grep $mito circos.karyotype.txt > circos$mito.karyotype.txt && "
" grep $mito circos.gene.text.txt > circos$mito.gene.text.txt && "
" grep $mito circos.strand.text.txt > circos$mito.strand.text.txt && "
" grep $mito circos.fa.gc.txt > circos$mito.fa.gc.txt && "
" grep $mito circos.features.txt > circos$mito.features.txt && "
" cd .. && "
" circos -conf visualization/circos$mito.conf \n"
" done \n"
"else \n"
" mv circos.png circos_mqc.png \n"
"fi \n"
ruleorder: mitoz_annotate_PE > mitoz_annotate_SE
......
#!/usr/bin/env python3
# This script will take a directory and a parameter to tell if the reads are paired end or single end and return the sample list and the suffix
# Needs 2 arguments: reads_directory, SeOrPe
# SeOrPe is SE for single end reads and PE for paired end reads
# Usage: ./get_samples.py reads_directory SeOrPe
import os
import re
import csv
import sys
def sample_list(dir, filesuffix=".fastq.gz"):
samples = list()
files = os.listdir(dir)
regex = re.compile("^(.+?)(_R1|_R2|)("+filesuffix+")")
for file in files:
res = re.match(regex, file)
if res:
if res.group(1) not in samples:
samples.append(res.group(1))
return sorted(samples)
def sample_list2(dir, SeOrPe):
def sample_list(dir, SeOrPe):
samples = list()
suffixes = list()
files = os.listdir(dir)
if SeOrPe == "PE":
regex = re.compile("^(.+?)(_R1|_R2)(.+)")
regex = re.compile(r"^(.+?)(_R1|_R2)(.+)")
else:
regex = re.compile("^(.+?)(\..*)")
regex = re.compile(r"^(.+?)(\..*)")
for file in files:
res = re.match(regex, file)
if res:
......@@ -34,25 +27,17 @@ def sample_list2(dir, SeOrPe):
suffixes.append(res.group(2))
if (len(set(suffixes)) == 1 ):
return {'samples': samples, 'suffix': set(suffixes)}
return {'samples': sorted(samples), 'suffix': list(set(suffixes))[0]}
else:
pass
exit("Files have different suffixes:" + suffixes)
def group_list(dir, groupfile, filesuffix=".fastq.gz"):
samples = sample_list(dir,filesuffix)
if os.path.isfile(groupfile):
groups = dict()
with open(groupfile, mode="r") as infile:
reader = csv.reader(infile)
groups = {row[0]: row[1] for row in reader}
return {sample: groups[sample] for sample in samples}
def main():
if len(sys.argv) == 3:
print(sample_list(sys.argv[1],sys.argv[2]))
else:
return {sample: "group" for sample in samples}
#print(sample_list2(sys.argv[1],sys.argv[2]))
exit("""Need two parameters: workflow name, output dir, (local_config)
Usage: ./generate_workflow.py workflow_name output_dir (local_config)""")
if len(sys.argv) == 4:
print(group_list(sys.argv[1], sys.argv[2], sys.argv[3]))
else:
print(group_list(sys.argv[1], sys.argv[2]))
if __name__ == "__main__":
# execute only if run as a script
main()
pipeline: Mito_Assembler_Megahit
params:
results_dir: /Results
help_sample: ''
sample_dir: /Data
sample_suffix: .fastq.gz
SeOrPe: PE
clade: Arthropoda
quality_check: fastqc
......@@ -57,10 +57,10 @@ steps:
params_info:
results_dir:
type: output_dir
help_sample:
type: help
sample_dir:
type: input_dir
sample_suffix:
type: text
SeOrPe:
type: radio
clade:
......
......@@ -21,9 +21,6 @@ echo "To get help for an app :\nsingularity help --app appName this_container.si
echo "To run an app :\nsingularity run --app appName this_container.sif"
########################
# App UI
########################
%apprun UI
exec Rscript -e "shiny::runApp('/sagApp/app.R',host='$1',port=$2)"
......@@ -33,9 +30,6 @@ You must also provide the host address and port where the shiny app will be laun
exemple : singularity run --app UI -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif 127.0.0.1 1234
########################
# App Snakemake
########################
%apprun Snakemake
configfile=$1
cores=$2
......@@ -48,9 +42,7 @@ To run the Snakemake app you should bind data and results directories like in th
You must also provide the configfile and the number of cores provided to snakemake command (you can add other parameters after these two)
exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /path/to/store/Results:/Results this_container.sif myconfig.yml 16 otherparams
########################
# App getConfigfile
########################
%apprun getConfigfile
exec cp /workflow/params.total.yml ./params.yml
......@@ -58,11 +50,21 @@ exemple : singularity run --app Snakemake -B /path/to/data/directory:/Data -B /p
To run the getConfigfile app you dont need to bind directories. This app will only copy the default parameters file from the container to your local disk.
exemple : singularity run --app getConfigfile this_container.sif
%apprun getSamples
exec python3 /workflow/get_samples.py $1 $2
%apphelp getSamples
To run the getSamples app you need to bind the data directory. This app will give you the list of samples detected in a given directory and their file suffix.
exemple : singularity run --app getSamples -B /path/to/data/directory:/Data this_container.sif /Data PE
%help
This container contains three apps (UI, Snakemake and getConfigfile).
This container contains four apps (UI, Snakemake, getConfigfile and getSamples).
* UI is a user interface to set up the workflow and launch it.
* Snakemake let you provide your configfile and other parameters to the snakemake command and launch it.
* getConfigfile give you a copy of a default parameters file to fill and use with the Snakemake app
* getConfigfile gives you a copy of a default parameters file to fill and use with the Snakemake app.
* getSamples gives you the list of samples detected in a given directory and their file suffix (usefull for filling samples and sample_suffix in parameters file).
To get help for an app :
singularity help --app appName this_container.sif
To run an app :
......
......@@ -10,6 +10,8 @@ box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE,
)
,
p("The Data directory must only contain the reads files (fastq). In case of paired end reads, the files names must contain '_R1' or '_R2'"),
tags$label("Data directory: "),
fluidRow(
column(4,shinyDirButton("shinydir_sample_dir",label="Please select a directory", title="Data directory: ")),
......@@ -17,8 +19,6 @@ box(title = "Parameters :", width = 12, status = "primary", collapsible = TRUE,
)
,
textInput("sample_suffix", label = "Samples suffix: ", value = ".fastq.gz", width = "auto"),
radioButtons("SeOrPe", label = "Single end reads (SE) or Paired end reads (PE): ", choices = list("Single end" = "SE", "Paired end" = "PE"), selected = "PE", width = "auto"),
radioButtons("clade", label = "Taxa group for MitoZ : ", choices = list("Arthropoda" = "Arthropoda", "Chordata" = "Chordata", "Echinodermata" = "Echinodermata", "Annelida-segmented-worms" = "Annelida-segmented-worms", "Bryozoa" = "Bryozoa", "Mollusca" = "Mollusca", "Nematoda" = "Nematoda", "Nemertea-ribbon-worms" = "Nemertea-ribbon-worms", "Porifera-sponges" = "Porifera-sponges"), selected = "Arthropoda", width = "auto"),
......
......@@ -13,12 +13,6 @@ save_params <- function(path_param){
res = paste0(res, paste("sample_dir:", paste0('"', input$sample_dir, '"'), "\n", sep = " "))
}
if(!is.na(as.numeric(input$sample_suffix))) {
res = paste0(res, paste("sample_suffix:", input$sample_suffix, "\n", sep = " "))
} else {
res = paste0(res, paste("sample_suffix:", paste0('"', input$sample_suffix, '"'), "\n", sep = " "))
}
if(!is.na(as.numeric(input$SeOrPe))) {
res = paste0(res, paste("SeOrPe:", input$SeOrPe, "\n", sep = " "))
} else {
......@@ -92,10 +86,9 @@ save_params <- function(path_param){
return(result)
}
d = c(d,a)
samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," /Data/groups.csv ", input$sample_suffix),intern = T))
d$samples = names(samples)
names(samples) = NULL
d$groups = unlist(samples)
get_samples = yaml.load(system(paste0("python3 /workflow/get_samples.py ",input$sample_dir," ",input$SeOrPe),intern = T))
d$samples = get_samples$samples
d$params$sample_suffix = get_samples$suffix
write_yaml(d,path_param,handlers=list(logical = logical))
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment