Commit 6897e7a1 authored by peguerin's avatar peguerin
Browse files

step1 nf

parent 37d004e5
params.str = 'Hello world!'
params.workingfolder="$(pwd)"
params.datafolder="/media/superdisk/edna/donnees/rhone_test"
sequences= Channel.fromFilePairs(params.datafolder+"/*_R{1,2}.fastq.gz",flat:true)
barcodes=Channel.fromPath(params.datafolder+"/*.dat")
params.count=10
params.seq_length=20
params.obiclean_r=0.05
process illuminapairedend {
"""
[t=2h]paired end alignment then keep reads with quality > 40
"""
input:
set val(id), file(R1_fastq), file(R2_fastq) from sequences
set val(run), file(R1_fastq), file(R2_fastq) from sequences
output:
file fastqMerged into fastqMergeds
set val(run), file("${run}.merged") into fastqMergeds
script:
"""
illuminapairedend -r $R2_fastq $R1_fastq --score-min=40 > fastqMerged
illuminapairedend -r $R2_fastq $R1_fastq --score-min=40 > ${run}.merged
"""
}
}
process remove_unaligned {
"""
[t=1h]remove unaligned sequence records
"""
input:
file fastqMerged from fastqMergeds
set val(run), file("${run}.merged") from fastqMergeds
output:
file mergedAligned into mergedAligneds
set val(run), file("${run}.merg.aln") into mergedAligneds
script:
"""
obigrep -p 'mode!="joined"' $fastqMerged > mergedAligned
obigrep -p 'mode!="joined"' ${run}.merged > ${run}.merg.aln
"""
}
......@@ -38,41 +41,87 @@ process assign_sequences {
[t=6h]assign each sequence record to the corresponding sample/marker combination
"""
input:
file mergedAligned from mergedAligneds
set val(run), file("${run}.merg.aln") from mergedAligneds
file barcode from barcodes
output:
file assignedMerged into assigedMergeds
file unassignedMerged into unassignedMergeds
set val(run), file("${run}.m.aln.assigned") into assigedMergeds
set val(run), file("${run}.m.aln.unassigned") into unassignedMergeds
script:
"""
ngsfilter -t $barcode -u unassignedMerged $mergedAligned --fasta-output > assignedMerged
ngsfilter -t $barcode -u ${run}.m.aln.unassigned ${run}.merg.aln --fasta-output > ${run}.m.aln.assigned
"""
}
process split_sequences {
"""
split the input sequence file in a set of subfiles according to the values of attribute "sample"
"""
input:
file assignedMerged from assigedMergeds
set val(run), file("${run}.m.aln.assigned") from assigedMergeds
output:
file 'sample_*.fasta' into demultiplexed mode flatten
set val(run), file("sample_${run}_*.fasta") into demultiplexed mode flatten
script:
"""
obisplit -p "sample_" -t sample --fasta $assignedMerged
obisplit -p "sample_${run}_" -t sample --fasta ${run}.m.aln.assigned
"""
}
process dereplicate {
"""
dereplicate reads into uniq sequences
"""
input:
set val(sample), file("${sample}.fasta") from demultiplexed
output:
set val(sample), file("${sample}.uniq.fa") into dereplicateds
script:
"""
obiuniq -m sample ${sample}.fasta > ${sample}.uniq.fa
"""
}
process seq_count_filter {
"""
keep only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
"""
input:
set val(sample), file("${sample}.uniq.fa") from dereplicateds
output:
set val(sample), file("${sample}.u.filtered.fa") into goodlength_goodcounts
script:
"""
obigrep -p 'count>${params.count}' -s '^[ACGT]+\$' -p 'seq_length>${params.seq_length}' ${sample}.uniq.fa > ${sample}.u.filtered.fa
"""
}
process annotate_pcrerr {
"""
Clean the sequences for PCR/sequencing errors (sequence variants)
"""
input:
file sampleSplit from demultiplexed
set val(sample), file("${sample}.u.filtered.fa") from goodlength_goodcounts
output:
dereplicated into dereplicateds
set val(sample), file("${sample}.u.f.pcr_annotated.fa") into pcrerr_annotateds
script:
if (!file("${sample}.u.filtered.fa").isEmpty()){
"""
#dereplicate reads into uniq sequences
obiuniq -m sample $sampleSplit > dereplicated
obiclean -r ${params.obiclean_r} ${sample}.u.filtered.fa > ${sample}.u.f.pcr_annotated.fa
"""
}
}
process remove_internal {
"""
Remove sequence which are classified as 'internal' by obiclean
"""
input:
set val(sample), file("${sample}.u.f.pcr_annotated.fa") from pcrerr_annotateds
output:
set val(sample), file("${sample}.u.f.p.cleaned.fa") into cleaned_samples
script:
"""
obigrep -p 'obiclean_internalcount == 0' ${sample}.u.f.pcr_annotated.fa > ${sample}.u.f.p.cleaned.fa
"""
}
process cat_samples {
"""
Concatenate sequences from each sample of the same run
"""
input:
set val(run) from sequences
set val(sample), file("${sample}.u.f.p.cleaned.fa") into cleaned_samples
output:
set val(run), file("${run}.fasta") into fastaruns
script:
"""
cat sample_${run}_*.u.f.p.cleaned.fa > ${run}.fasta
"""
}
WORK IN PROGRESS !
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment