Commit 55ae0d82 authored by peguerin's avatar peguerin
Browse files

new rules with new path wildcards

parent 8ff01002
### Clean the sequences for PCR/sequencing errors (sequence variants)
rule clean_pcrerr_samples:
input:
'../results/04_filter_samples/02_goodlength/{demultiplexs}.l.u.fasta'
output:
'../results/04_filter_samples/03_clean_pcrerr/{demultiplexs}.r.l.u.fasta'
singularity:
config["singularity"]["obitools"]
log:
'../logs/04_filter_samples/03_clean_pcrerr/{demultiplexs}.log'
params:
r=config["clean_pcrerr_samples"]["r"]
shell:
'''if [[ -s {input} ]]; then obiclean -r {params.r} {input} > {output} 2> {log} ; else touch {output} 2> {log} ; fi'''
\ No newline at end of file
### dereplicate reads into uniq sequences
rule dereplicate_samples:
input:
'../results/03_demultiplex/02_raw/{demultiplexs}.fasta'
output:
'../results/04_filter_samples/01_dereplicated/{demultiplexs}.uniq.fasta'
singularity:
config["singularity"]["obitools"]
log:
'../logs/04_filter_samples/01_dereplicated/{demultiplexs}.log'
params:
dmulti= lambda wildcards: dfMultiChecked[dfMultiChecked.demultiplex == wildcards.demultiplexs].to_dict('records')[0],
shell:
'''mkdir -p 01_dereplicated/{params.dmulti[projmarkrun]}; obiuniq -m sample {input} > {output} 2> {log}'''
\ No newline at end of file
### only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
rule goodlength_samples:
input:
'../results/04_filter_samples/01_dereplicated/{demultiplexs}.uniq.fasta'
output:
'../results/04_filter_samples/02_goodlength/{demultiplexs}.l.u.fasta'
singularity:
config["singularity"]["obitools"]
log:
'../logs/04_filter_samples/02_goodlength/{demultiplexs}.log'
params:
seq_count=config["good_length_samples"]["seq_count"],
seq_length=config["good_length_samples"]["seq_length"]
shell:
'''obigrep -p 'count>{params.seq_count}' -s '^[ACGT]+$' -p 'seq_length>{params.seq_length}' {input} > {output} 2> {log}'''
\ No newline at end of file
### Remove sequence which are classified as 'internal' by obiclean
rule rm_internal_samples:
input:
'../results/04_filter_samples/03_clean_pcrerr/{demultiplexs}.r.l.u.fasta'
output:
'../results/04_filter_samples/04_filtered/{demultiplexs}.c.r.l.u.fasta'
params:
dmulti= lambda wildcards: dfMultiChecked[dfMultiChecked.demultiplex == wildcards.demultiplexs].to_dict('records')[0],
singularity:
config["singularity"]["obitools"]
log:
'../logs/04_filter_samples/04_filtered/{demultiplexs}.log'
shell:
'''if [[ -s {input} ]]; then mkdir -p 04_filtered/{params.dmulti[projmarkrun]}; obigrep -p "obiclean_internalcount == 0" {input} > {output} 2> {log} ; else touch {output} 2> {log} ; fi'''
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment