Skip to content
Commits on Source (3)
......@@ -8,11 +8,11 @@ rule illuminapairedend:
R1=config['fastqFolderPath']+'{run}_R1.fastq.gz',
R2=config['fastqFolderPath']+'{run}_R2.fastq.gz'
output:
fq='assembled/{run}/{run}.fastq'
fq='01-assembly/{run}/{run}.fastq'
singularity:
config["container"]
log:
'log/illuminapairedend/{run}.log'
'99-log/illuminapairedend/{run}.log'
params:
s_min=config["illuminapairedend"]["s_min"]
shell:
......@@ -21,42 +21,13 @@ rule illuminapairedend:
### Remove unaligned sequence records
rule remove_unaligned:
input:
fq='assembled/{run}/{run}.fastq'
fq='01-assembly/{run}/{run}.fastq'
output:
ali='assembled/{run}/{run}.ali.fastq'
ali='01-assembly/{run}/{run}.ali.fastq'
singularity:
config["container"]
log:
'log/remove_unaligned/{run}.log'
'99-log/remove_unaligned/{run}.log'
shell:
'''obigrep -p 'mode!=\"joined\"' {input.fq} > {output.ali} 2> {log}'''
### Assign each sequence record to the corresponding sample/marker combination
rule assign_sequences:
input:
'assembled/{run}/{run}.ali.fastq',
output:
assign='assembled/{run}/{run}.ali.assigned.fastq',
unid='assembled/{run}/{run}.unidentified.fastq'
singularity:
config["container"]
params:
barcodeFile=lambda wcs: config["barcodeFiles"][wcs.run]
barcodeFolder=config['fastqFolderPath']
log:
'log/assign_sequences/{run}.log'
shell:
'''ngsfilter -t {params.barcodeFolder}{params.barcodeFile} -u {output.unid} {input[0]} --fasta-output > {output.assign} 2> {log}'''
### Split the input sequence file in a set of subfiles according to the values of attribute `sample`
rule split_sequences:
input:
'assembled/{run}/{run}.ali.assigned.fastq'
params:
'samples/{run}_sample_'
singularity:
config["container"]
log:
'log/split_sequences/{run}.log'
shell:
'''obisplit -p "{params}" -t sample --fasta {input} 2> {log}'''
### Assign each sequence record to the corresponding sample/marker combination
rule assign_sequences:
input:
'01-assembly/{run}/{run}.ali.fastq',
output:
assign='01-assembly/{run}/{run}.ali.assigned.fastq',
unid='01-assembly/{run}/{run}.unidentified.fastq'
singularity:
config["container"]
params:
barcodeFile=lambda wcs: config["barcodeFiles"][wcs.run],
barcodeFolder=config['fastqFolderPath']
log:
'99-log/assign_sequences/{run}.log'
shell:
'''ngsfilter -t {params.barcodeFolder}{params.barcodeFile} -u {output.unid} {input} --fasta-output > {output.assign} 2> {log}'''
### Split the input sequence file in a set of subfiles according to the values of attribute `sample`
rule split_sequences:
input:
'01-assembly/{run}/{run}.ali.assigned.fastq'
params:
'02-demultiplex/{run}_sample_'
singularity:
config["container"]
log:
'log/split_sequences/{run}.log'
shell:
'''obisplit -p "{params}" -t sample --fasta {input} 2> {log}'''
### dereplicate reads into uniq sequences
rule dereplicate_samples:
input:
'samples/{sample}.fasta'
output:
'samples/{sample}.uniq.fasta'
log:
'log/dereplicate_samples/{sample}.log'
shell:
'''obiuniq -m sample {input} > {output} 2> {log}'''
### only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
rule goodlength_samples:
input:
'samples/{sample}.uniq.fasta'
output:
'samples/{sample}.l.u.fasta'
log:
'log/goodlength_samples/{sample}.log'
params:
count=config["good_length_samples"]["count"],
seq_length=config["good_length_samples"]["seq_length"]
shell:
'''obigrep -p 'count>{params.count}' -s '^[ACGT]+$' -p 'seq_length>{params.seq_length}' {input} > {output} 2> {log}'''
### Clean the sequences for PCR/sequencing errors (sequence variants)
rule clean_pcrerr_samples:
input:
'samples/{sample}.l.u.fasta'
output:
'samples/{sample}.r.l.u.fasta'
log:
'log/clean_pcrerr/{sample}.log'
params:
r=config["clean_pcrerr_samples"]["r"]
shell:
'''obiclean -r {params.r} {input} > {output} 2> {log}'''
### Remove sequence which are classified as 'internal' by obiclean
rule rm_internal_samples:
input:
'samples/{sample}.r.l.u.fasta'
output:
'samples/{sample}.c.r.l.u.fasta'
log:
'log/rm_internal_samples/{sample}.log'
shell:
'''obigrep -p 'obiclean_internalcount == 0' {input} > {output} 2> {log}'''