Commit 46bcf015 authored by peguerin's avatar peguerin
Browse files

write new rules

parent 2bbeb9a7
### Assign each sequence to a taxon
rule assign_taxon:
input:
'02-dereplicated/{run}_run.uniq.fasta'
output:
'03-assigned/{run}_run.tag.u.fasta'
singularity:
config["container"]
params:
bdr=config["assign_taxon"]["bdr"],
fasta=config["assign_taxon"]["fasta"]
log:
'../99-log/10-assign_taxon/{run}.log'
shell:
'''ecotag -d {params.bdr} -R {params.fasta} {input} > {output} 2> {log}'''
### Concatenate sequences from each sample of the same run
rule cat_samples:
input:
'03-cleaned/{run}/{sample}.c.r.l.u.fasta'
params:
dir='03-cleaned/{run}/',
merged='../03-filtered/{run}.fasta'
log:
'../99-log/09-cat_samples/{run}/{sample}.log'
shell:
'''ls {input} > {log}; cat {params.dir}*.c.r.l.u.fasta > {params.merged}'''
\ No newline at end of file
### Dereplicate and merge samples together
rule dereplicate_runs:
input:
'01-runs/{run}_run.fasta'
output:
'02-dereplicated/{run}_run.uniq.fasta'
singularity:
config["container"]
log:
'../99-log/09-dereplicate_runs/{run}.log'
shell:
'''obiuniq -m sample {input} > {output} 2> {log}'''
### dereplicate reads into uniq sequences
rule dereplicate_samples:
input:
'01-raw/{run}/{sample}.fasta'
'01-runs/{run}/{sample}.fasta'
output:
'02-filtered/{run}/{sample}.uniq.fasta'
singularity:
......
### Some unuseful attributes can be removed at this stage
rule rm_attributes:
input:
'03-assigned/{run}_run.tag.u.fasta'
output:
'04-formated/{run}_run.a.t.u.fasta'
singularity:
config["container"]
log:
'../99-log/11-rm_attributes/{run}.log'
shell:
'''obiannotate --delete-tag=scientific_name_by_db --delete-tag=obiclean_samplecount \
--delete-tag=obiclean_count --delete-tag=obiclean_singletoncount \
--delete-tag=obiclean_cluster --delete-tag=obiclean_internalcount \
--delete-tag=obiclean_head --delete-tag=obiclean_headcount \
--delete-tag=id_status --delete-tag=rank_by_db --delete-tag=obiclean_status \
--delete-tag=seq_length_ori --delete-tag=sminL --delete-tag=sminR \
--delete-tag=reverse_score --delete-tag=reverse_primer --delete-tag=reverse_match --delete-tag=reverse_tag \
--delete-tag=forward_tag --delete-tag=forward_score --delete-tag=forward_primer --delete-tag=forward_match \
--delete-tag=tail_quality {input} > {output} 2> {log}'''
### The sequences can be sorted by decreasing order of count
rule sort_runs:
input:
'04-formated/{run}_run.a.t.u.fasta'
output:
'04-formated/{run}_run.s.a.t.u.fasta'
singularity:
config["container"]
log:
'../99-log/12-sort_runs/{run}.log'
shell:
'''obisort -k count -r {input} > {output} 2> {log}'''
### Generate a table final results
rule table_runs:
input:
'04-formated/{run}_run.s.a.t.u.fasta'
output:
'../04-final_tables/{run}.csv'
singularity:
config["container"]
log:
'../99-log/13-table_runs/{run}.log'
shell:
'''obitab -o {input} > {output} 2> {log}'''
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment