Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
edna
nextflow_obitools
Commits
6897e7a1
Commit
6897e7a1
authored
Nov 29, 2018
by
peguerin
Browse files
step1 nf
parent
37d004e5
Changes
3
Hide whitespace changes
Inline
Side-by-side
main.sh
deleted
100644 → 0
View file @
37d004e5
scripts/step1.nf
View file @
6897e7a1
params.str = 'Hello world!'
params.workingfolder="$(pwd)"
params.datafolder="/media/superdisk/edna/donnees/rhone_test"
sequences= Channel.fromFilePairs(params.datafolder+"/*_R{1,2}.fastq.gz",flat:true)
barcodes=Channel.fromPath(params.datafolder+"/*.dat")
params.count=10
params.seq_length=20
params.obiclean_r=0.05
process illuminapairedend {
"""
[t=2h]paired end alignment then keep reads with quality > 40
"""
input:
set val(
id
), file(R1_fastq), file(R2_fastq) from sequences
set val(
run
), file(R1_fastq), file(R2_fastq) from sequences
output:
file fastqM
erged into fastqMergeds
set val(run), file("${run}.m
erged
")
into fastqMergeds
script:
"""
illuminapairedend -r $R2_fastq $R1_fastq --score-min=40 >
fastqM
erged
illuminapairedend -r $R2_fastq $R1_fastq --score-min=40 >
${run}.m
erged
"""
}
}
process remove_unaligned {
"""
[t=1h]remove unaligned sequence records
"""
input:
file fastqM
erged from fastqMergeds
set val(run), file("${run}.m
erged
")
from fastqMergeds
output:
file mergedAligned
into mergedAligneds
set val(run), file("${run}.merg.aln")
into mergedAligneds
script:
"""
obigrep -p 'mode!="joined"' $
fastqM
erged >
mergedAligned
obigrep -p 'mode!="joined"' $
{run}.m
erged >
${run}.merg.aln
"""
}
...
...
@@ -38,41 +41,87 @@ process assign_sequences {
[t=6h]assign each sequence record to the corresponding sample/marker combination
"""
input:
file mergedAligned
from mergedAligneds
set val(run), file("${run}.merg.aln")
from mergedAligneds
file barcode from barcodes
output:
file
assigned
Merged
into assigedMergeds
file
unassigned
Merged
into unassignedMergeds
set val(run), file("${run}.m.aln.
assigned
")
into assigedMergeds
set val(run), file("${run}.m.aln.
unassigned
")
into unassignedMergeds
script:
"""
ngsfilter -t $barcode -u
unassignedMerged $mergedAligned
--fasta-output > assigned
Merged
ngsfilter -t $barcode -u
${run}.m.aln.unassigned ${run}.merg.aln
--fasta-output >
${run}.m.aln.
assigned
"""
}
process split_sequences {
"""
split the input sequence file in a set of subfiles according to the values of attribute "sample"
"""
input:
file
assigned
Merged
from assigedMergeds
set val(run), file("${run}.m.aln.
assigned
")
from assigedMergeds
output:
file
'
sample_*.fasta
'
into demultiplexed mode flatten
set val(run),
file
("
sample_
${run}_
*.fasta
")
into demultiplexed mode flatten
script:
"""
obisplit -p "sample_" -t sample --fasta $assigned
Merged
obisplit -p "sample_
${run}_
" -t sample --fasta $
{run}.m.aln.
assigned
"""
}
process dereplicate {
"""
dereplicate reads into uniq sequences
"""
input:
set val(sample), file("${sample}.fasta") from demultiplexed
output:
set val(sample), file("${sample}.uniq.fa") into dereplicateds
script:
"""
obiuniq -m sample ${sample}.fasta > ${sample}.uniq.fa
"""
}
process seq_count_filter {
"""
keep only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
"""
input:
set val(sample), file("${sample}.uniq.fa") from dereplicateds
output:
set val(sample), file("${sample}.u.filtered.fa") into goodlength_goodcounts
script:
"""
obigrep -p 'count>${params.count}' -s '^[ACGT]+\$' -p 'seq_length>${params.seq_length}' ${sample}.uniq.fa > ${sample}.u.filtered.fa
"""
}
process annotate_pcrerr {
"""
Clean the sequences for PCR/sequencing errors (sequence variants)
"""
input:
file sampleSplit from demultiplexed
set val(sample), file("${sample}.u.filtered.fa") from goodlength_goodcounts
output:
dereplicated into dereplic
ateds
set val(sample), file("${sample}.u.f.pcr_annotated.fa") into pcrerr_annot
ateds
script:
if (!file("${sample}.u.filtered.fa").isEmpty()){
"""
#dereplicate reads into uniq sequences
obiuniq -m sample $sampleSplit > dereplicated
obiclean -r ${params.obiclean_r} ${sample}.u.filtered.fa > ${sample}.u.f.pcr_annotated.fa
"""
}
}
process remove_internal {
"""
Remove sequence which are classified as 'internal' by obiclean
"""
input:
set val(sample), file("${sample}.u.f.pcr_annotated.fa") from pcrerr_annotateds
output:
set val(sample), file("${sample}.u.f.p.cleaned.fa") into cleaned_samples
script:
"""
obigrep -p 'obiclean_internalcount == 0' ${sample}.u.f.pcr_annotated.fa > ${sample}.u.f.p.cleaned.fa
"""
}
scripts/step2.nf
0 → 100644
View file @
6897e7a1
process cat_samples {
"""
Concatenate sequences from each sample of the same run
"""
input:
set val(run) from sequences
set val(sample), file("${sample}.u.f.p.cleaned.fa") into cleaned_samples
output:
set val(run), file("${run}.fasta") into fastaruns
script:
"""
cat sample_${run}_*.u.f.p.cleaned.fa > ${run}.fasta
"""
}
WORK IN PROGRESS !
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment