Commit 6897e7a1 authored by peguerin's avatar peguerin
Browse files

step1 nf

parent 37d004e5
params.str = 'Hello world!' params.str = 'Hello world!'
params.workingfolder="$(pwd)"
params.datafolder="/media/superdisk/edna/donnees/rhone_test" params.datafolder="/media/superdisk/edna/donnees/rhone_test"
sequences= Channel.fromFilePairs(params.datafolder+"/*_R{1,2}.fastq.gz",flat:true) sequences= Channel.fromFilePairs(params.datafolder+"/*_R{1,2}.fastq.gz",flat:true)
barcodes=Channel.fromPath(params.datafolder+"/*.dat") barcodes=Channel.fromPath(params.datafolder+"/*.dat")
params.count=10
params.seq_length=20
params.obiclean_r=0.05
process illuminapairedend { process illuminapairedend {
""" """
[t=2h]paired end alignment then keep reads with quality > 40 [t=2h]paired end alignment then keep reads with quality > 40
""" """
input: input:
set val(id), file(R1_fastq), file(R2_fastq) from sequences set val(run), file(R1_fastq), file(R2_fastq) from sequences
output: output:
file fastqMerged into fastqMergeds set val(run), file("${run}.merged") into fastqMergeds
script: script:
""" """
illuminapairedend -r $R2_fastq $R1_fastq --score-min=40 > fastqMerged illuminapairedend -r $R2_fastq $R1_fastq --score-min=40 > ${run}.merged
""" """
} }
process remove_unaligned { process remove_unaligned {
""" """
[t=1h]remove unaligned sequence records [t=1h]remove unaligned sequence records
""" """
input: input:
file fastqMerged from fastqMergeds set val(run), file("${run}.merged") from fastqMergeds
output: output:
file mergedAligned into mergedAligneds set val(run), file("${run}.merg.aln") into mergedAligneds
script: script:
""" """
obigrep -p 'mode!="joined"' $fastqMerged > mergedAligned obigrep -p 'mode!="joined"' ${run}.merged > ${run}.merg.aln
""" """
} }
...@@ -38,41 +41,87 @@ process assign_sequences { ...@@ -38,41 +41,87 @@ process assign_sequences {
[t=6h]assign each sequence record to the corresponding sample/marker combination [t=6h]assign each sequence record to the corresponding sample/marker combination
""" """
input: input:
file mergedAligned from mergedAligneds set val(run), file("${run}.merg.aln") from mergedAligneds
file barcode from barcodes file barcode from barcodes
output: output:
file assignedMerged into assigedMergeds set val(run), file("${run}.m.aln.assigned") into assigedMergeds
file unassignedMerged into unassignedMergeds set val(run), file("${run}.m.aln.unassigned") into unassignedMergeds
script: script:
""" """
ngsfilter -t $barcode -u unassignedMerged $mergedAligned --fasta-output > assignedMerged ngsfilter -t $barcode -u ${run}.m.aln.unassigned ${run}.merg.aln --fasta-output > ${run}.m.aln.assigned
""" """
} }
process split_sequences { process split_sequences {
""" """
split the input sequence file in a set of subfiles according to the values of attribute "sample" split the input sequence file in a set of subfiles according to the values of attribute "sample"
""" """
input: input:
file assignedMerged from assigedMergeds set val(run), file("${run}.m.aln.assigned") from assigedMergeds
output: output:
file 'sample_*.fasta' into demultiplexed mode flatten set val(run), file("sample_${run}_*.fasta") into demultiplexed mode flatten
script: script:
""" """
obisplit -p "sample_" -t sample --fasta $assignedMerged obisplit -p "sample_${run}_" -t sample --fasta ${run}.m.aln.assigned
""" """
} }
process dereplicate { process dereplicate {
"""
dereplicate reads into uniq sequences
"""
input:
set val(sample), file("${sample}.fasta") from demultiplexed
output:
set val(sample), file("${sample}.uniq.fa") into dereplicateds
script:
"""
obiuniq -m sample ${sample}.fasta > ${sample}.uniq.fa
"""
}
process seq_count_filter {
"""
keep only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
"""
input:
set val(sample), file("${sample}.uniq.fa") from dereplicateds
output:
set val(sample), file("${sample}.u.filtered.fa") into goodlength_goodcounts
script:
"""
obigrep -p 'count>${params.count}' -s '^[ACGT]+\$' -p 'seq_length>${params.seq_length}' ${sample}.uniq.fa > ${sample}.u.filtered.fa
"""
}
process annotate_pcrerr {
"""
Clean the sequences for PCR/sequencing errors (sequence variants)
"""
input: input:
file sampleSplit from demultiplexed set val(sample), file("${sample}.u.filtered.fa") from goodlength_goodcounts
output: output:
dereplicated into dereplicateds set val(sample), file("${sample}.u.f.pcr_annotated.fa") into pcrerr_annotateds
script: script:
if (!file("${sample}.u.filtered.fa").isEmpty()){
""" """
#dereplicate reads into uniq sequences obiclean -r ${params.obiclean_r} ${sample}.u.filtered.fa > ${sample}.u.f.pcr_annotated.fa
obiuniq -m sample $sampleSplit > dereplicated
""" """
}
} }
process remove_internal {
"""
Remove sequence which are classified as 'internal' by obiclean
"""
input:
set val(sample), file("${sample}.u.f.pcr_annotated.fa") from pcrerr_annotateds
output:
set val(sample), file("${sample}.u.f.p.cleaned.fa") into cleaned_samples
script:
"""
obigrep -p 'obiclean_internalcount == 0' ${sample}.u.f.pcr_annotated.fa > ${sample}.u.f.p.cleaned.fa
"""
}
process cat_samples {
"""
Concatenate sequences from each sample of the same run
"""
input:
set val(run) from sequences
set val(sample), file("${sample}.u.f.p.cleaned.fa") into cleaned_samples
output:
set val(run), file("${run}.fasta") into fastaruns
script:
"""
cat sample_${run}_*.u.f.p.cleaned.fa > ${run}.fasta
"""
}
WORK IN PROGRESS !
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment