Commit 44a3e14e authored by peguerin's avatar peguerin
Browse files

remove folder rules

parent b0ca6bc6
### Assign each sequence to a taxon
rule assign_taxon:
input:
'02_dereplicated/{run}.uniq.fasta'
output:
'03_assigned/{run}.tag.u.fasta'
singularity:
config["singularity"]["obitools"]
params:
drun= lambda wildcards: dfpmr[dfpmr.projmarkrun == wildcards.run].to_dict('records')[0],
log:
'../99_log/05_assignment/03_assign_taxon/{run}.log'
shell:
'''ecotag -d {params.drun[bdr]} -R {params.drun[fasta]} {input} > {output} 2> {log}'''
### Assign each sequence record to the corresponding sample/marker combination
rule assign_sequences:
output:
assign='01_assign_sequences/{demultiplex}.ali.assigned.fastq',
unid='01_assign_sequences/{demultiplex}.unidentified.fastq'
singularity:
config["singularity"]["obitools"]
params:
dmulti= lambda wildcards: dfRunMarker[dfRunMarker.projMarkRun == wildcards.demultiplex].to_dict('records')[0],
log:
'../99_log/03_demultiplex/01_assign_sequences/{demultiplex}.log'
shell:
'''ngsfilter -t {params.dmulti[dat]} -u {output.unid} ../02_assembly/02_remove_unaligned/{params.dmulti[run]}.ali.fastq --fasta-output > {output.assign} 2> {log}'''
### Split the input sequence file in a set of subfiles according to the values of attribute `sample`
rule split_sequences:
input:
'01_assign_sequences/{demultiplex}.ali.assigned.fastq'
params:
dir='02_raw/{demultiplex}/'
singularity:
config["singularity"]["obitools"]
log:
'../99_log/03_demultiplex/02_split_sequences/{demultiplex}.log'
shell:
'''mkdir -p {params.dir}; obisplit -p "{params.dir}" -t sample --fasta {input} 2> {log}'''
### Dereplicate and merge samples together
rule dereplicate_runs:
input:
'01_runs/{run}.fasta'
output:
'02_dereplicated/{run}.uniq.fasta'
singularity:
config["singularity"]["obitools"]
log:
'../99_log/05_assignment/02_dereplicated/{run}.log'
shell:
'''obiuniq -m sample {input} > {output} 2> {log}'''
### dereplicate reads into uniq sequences
rule dereplicate_samples:
input:
'../03_demultiplex/02_raw/{demultiplexs}.fasta'
output:
'01_dereplicated/{demultiplexs}.uniq.fasta'
singularity:
config["singularity"]["obitools"]
log:
'../99_log/04_filter_samples/01_dereplicated/{demultiplexs}.log'
params:
dmulti= lambda wildcards: dfMultiChecked[dfMultiChecked.demultiplex == wildcards.demultiplexs].to_dict('records')[0],
shell:
'''mkdir -p 01_dereplicated/{params.dmulti[projmarkrun]}; obiuniq -m sample {input} > {output} 2> {log}'''
### only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
rule goodlength_samples:
input:
'01_dereplicated/{demultiplexs}.uniq.fasta'
output:
'02_goodlength/{demultiplexs}.l.u.fasta'
singularity:
config["singularity"]["obitools"]
log:
'../99_log/04_filter_samples/02_goodlength/{demultiplexs}.log'
params:
seq_count=config["good_length_samples"]["seq_count"],
seq_length=config["good_length_samples"]["seq_length"]
shell:
'''obigrep -p 'count>{params.seq_count}' -s '^[ACGT]+$' -p 'seq_length>{params.seq_length}' {input} > {output} 2> {log}'''
### Clean the sequences for PCR/sequencing errors (sequence variants)
rule clean_pcrerr_samples:
input:
'02_goodlength/{demultiplexs}.l.u.fasta'
output:
'03_clean_pcrerr/{demultiplexs}.r.l.u.fasta'
singularity:
config["singularity"]["obitools"]
log:
'../99_log/04_filter_samples/03_clean_pcrerr/{demultiplexs}.log'
params:
r=config["clean_pcrerr_samples"]["r"]
shell:
'''if [[ -s {input} ]]; then obiclean -r {params.r} {input} > {output} 2> {log} ; else touch {output} 2> {log} ; fi'''
### Remove sequence which are classified as 'internal' by obiclean
rule rm_internal_samples:
input:
'03_clean_pcrerr/{demultiplexs}.r.l.u.fasta'
output:
'04_filtered/{demultiplexs}.c.r.l.u.fasta'
params:
dmulti= lambda wildcards: dfMultiChecked[dfMultiChecked.demultiplex == wildcards.demultiplexs].to_dict('records')[0],
singularity:
config["singularity"]["obitools"]
log:
'../99_log/04_filter_samples/04_filtered/{demultiplexs}.log'
shell:
'''if [[ -s {input} ]]; then mkdir -p 04_filtered/{params.dmulti[projmarkrun]}; obigrep -p "obiclean_internalcount == 0" {input} > {output} 2> {log} ; else touch {output} 2> {log} ; fi'''
### Some unuseful attributes can be removed at this stage
rule rm_attributes:
input:
'03_assigned/{run}.tag.u.fasta'
output:
'04_formated/{run}.a.t.u.fasta'
singularity:
config["singularity"]["obitools"]
log:
'../99_log/05_assignment/04_rm_attributes/{run}.log'
shell:
'''obiannotate --delete-tag=scientific_name_by_db --delete-tag=obiclean_samplecount \
--delete-tag=obiclean_count --delete-tag=obiclean_singletoncount \
--delete-tag=obiclean_cluster --delete-tag=obiclean_internalcount \
--delete-tag=obiclean_head --delete-tag=obiclean_headcount \
--delete-tag=id_status --delete-tag=rank_by_db --delete-tag=obiclean_status \
--delete-tag=seq_length_ori --delete-tag=sminL --delete-tag=sminR \
--delete-tag=reverse_score --delete-tag=reverse_primer --delete-tag=reverse_match --delete-tag=reverse_tag \
--delete-tag=forward_tag --delete-tag=forward_score --delete-tag=forward_primer --delete-tag=forward_match \
--delete-tag=tail_quality {input} > {output} 2> {log}'''
### The sequences can be sorted by decreasing order of count
rule sort_runs:
input:
'04_formated/{run}.a.t.u.fasta'
output:
'04_formated/{run}.s.a.t.u.fasta'
singularity:
config["singularity"]["obitools"]
log:
'../99_log/05_assignment/05_sort_runs/{run}.log'
shell:
'''obisort -k count -r {input} > {output} 2> {log}'''
### Generate a table final results
rule table_runs:
input:
'04_formated/{run}.s.a.t.u.fasta'
output:
'../06_final_tables/{run}.csv'
singularity:
config["singularity"]["obitools"]
params:
drun= lambda wildcards: dfpmr[dfpmr.projmarkrun == wildcards.run].to_dict('records')[0],
log:
'../99_log/05_assignment/06_table_runs/{run}.log'
shell:
'''mkdir -p ../06_final_tables/{params.drun[projet]}/{params.drun[marker]}; obitab -o {input} > {output} 2> {log}'''
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment