Commit 0b96bd1a authored by mmassaviol's avatar mmassaviol
Browse files

Update long_read_assembly (add raw_long_reads)

parent a5097c0a
import os
import re
import sys
def raw_long_reads(results_dir, sample_dir):
samples = list()
suffixes = list()
dicoSamples = dict() # sample_name: file(s)
files = os.listdir(sample_dir)
regex = re.compile(r"^(.+?)(\..*)")
for file in files:
res = re.match(regex, file)
if res:
if res.group(1) not in samples:
samples.append(res.group(1))
suffixes.append(res.group(2))
if res.group(1) not in dicoSamples.keys():
dicoSamples[res.group(1)] = list()
dicoSamples[res.group(1)].append(file)
if (len(set(suffixes)) == 1 ):
suffix = list(set(suffixes))[0]
with open(results_dir+"/samples.tsv","w") as sampleTab:
sampleTab.write("sample\tfile_reads")
for sample in sorted(samples):
sampleTab.write("\n"+sample+"\t"+"\t".join(sorted(dicoSamples[sample])))
out = {'samples': sorted(samples), 'suffix': suffix, 'dico': dicoSamples}
out ["read"] = os.path.join(sample_dir,"{sample}"+suffix)
return out
else:
exit("Files have different suffixes:" + ','.join(suffixes))
#print(raw_reads(sys.argv[1],sys.argv[2])
\ No newline at end of file
{
name: raw_long_reads,
function_call: "raw_long_reads(config['results_dir'], config['sample_dir'])",
options: [
{
name: "sample_dir",
type: "input_dir",
value: "/Data",
label: "Directory containing the fastq files: ",
volumes: [Data: "/Data", Results: "/Results"]
},
]
}
\ No newline at end of file
......@@ -7,7 +7,6 @@ import csv
# Wildcards #
#############
SAMPLES = config["samples"]
STEPS = config["steps"]
PREPARE_REPORT_OUTPUTS = config["prepare_report_outputs"]
PREPARE_REPORT_SCRIPTS = config["prepare_report_scripts"]
......@@ -19,41 +18,38 @@ config = config["params"]
# Inputs #
##########
# Generic input functions
## get raw_reads
def raw_reads():
inputs = dict()
inputs["read"] = expand(config['sample_dir']+'/{sample}'+config["sample_suffix"],sample=SAMPLES)
return inputs
# raw_inputs function call
raw_long_reads = raw_long_reads(config['results_dir'], config['sample_dir'])
SAMPLES = raw_long_reads['samples']
# Tools inputs functions
def minimap2_overlap_self_inputs():
inputs = dict()
inputs = raw_reads()
inputs["read"] = expand(raw_long_reads["read"],sample=SAMPLES)
return inputs
def miniasm_inputs():
inputs = dict()
inputs["reads"] = raw_reads()["read"]
inputs["reads"] = expand(raw_long_reads["read"],sample=SAMPLES)
inputs["paf"] = rules.minimap2_overlap_self.output.reads_overlaps
return inputs
def minimap2_reference_inputs():
inputs = dict()
inputs["reads"] = raw_reads()["read"]
inputs["reads"] = expand(raw_long_reads["read"],sample=SAMPLES)
inputs["fasta"] = rules.miniasm.output.assembly_fasta
return inputs
def racon_inputs():
inputs = dict()
inputs["reads"] = raw_reads()["read"]
inputs["reads"] = expand(raw_long_reads["read"],sample=SAMPLES)
inputs["assembly"] = rules.miniasm.output.assembly_fasta
inputs["overlaps"] = rules.minimap2_reference.output.reads_mapping
return inputs
def medaka_inputs():
inputs = dict()
inputs["reads"] = raw_reads()["read"]
inputs["reads"] = expand(raw_long_reads["read"],sample=SAMPLES)
inputs["assembly_fasta"] = rules.racon.output.assembly_corrected
return inputs
......
......@@ -4,6 +4,7 @@
description: "",
version: "0.0.1",
author: "MBB",
input: raw_long_reads,
steps:
[
{ title: Find overlaps, name: find_overlaps, tools: [minimap2_overlap_self], default: minimap2_overlap_self },
......@@ -22,13 +23,6 @@
label: "Results directory: ",
volumes: [Results: "/Results"]
},
{
name: "sample_dir",
type: "input_dir",
value: "/Data",
label: "Data directory: ",
volumes: [Data: "/Data", Results: "/Results"]
},
{
name: "pacbio_oxfordNanopore",
type: "radio",
......@@ -36,45 +30,38 @@
choices: [Pacbio: pb, Oxford Nanopore: ont],
label: "Sequencing used to produce reads",
},
{
name: "SeOrPe",
type: "radio",
value: "SE",
choices: [Single end: SE, Paired end: PE],
label: "Single end reads (SE) or Paired end reads (PE): ",
},
],
steps_in: [
{ step_name: find_overlaps, tool_name: minimap2_overlap_self, rule_name: minimap2_overlap_self,
params: [
{input_name: reads, origin_command: raw_reads, origin_name: read }
{input_name: reads, origin_command: raw_long_reads, origin_name: read }
]
},
{ step_name: assembly, tool_name: miniasm, rule_name: miniasm,
params: [
{ input_name: reads, origin_command: raw_reads, origin_name: read },
{ input_name: reads, origin_command: raw_long_reads, origin_name: read },
{ input_name: paf, origin_command: minimap2_overlap_self, origin_name: reads_overlaps }
]
},
{ step_name: mapping, tool_name: minimap2_reference, rule_name: minimap2_reference,
params: [
{ input_name: reads, origin_command: raw_reads, origin_name: read },
{ input_name: reads, origin_command: raw_long_reads, origin_name: read },
{ input_name: fasta, origin_command: miniasm, origin_name: assembly_fasta }
]
},
{ step_name: correction, tool_name: racon, rule_name: racon,
params: [
{ input_name: reads, origin_command: raw_reads, origin_name: read },
{ input_name: reads, origin_command: raw_long_reads, origin_name: read },
{ input_name: assembly, origin_command: miniasm, origin_name: assembly_fasta },
{ input_name: overlaps, origin_command: minimap2_reference, origin_name: reads_mapping }
]
},
{ step_name: polishing, tool_name: medaka, rule_name: medaka,
params: [
{ input_name: reads, origin_command: raw_reads, origin_name: read },
{ input_name: reads, origin_command: raw_long_reads, origin_name: read },
{ input_name: assembly_fasta, origin_command: racon, origin_name: assembly_corrected },
]
},
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment