Commit 780f0026 authored by peguerin's avatar peguerin
Browse files

run sample

parent 1d522d24
### dereplicate reads into uniq sequences
rule dereplicate_samples:
input:
'01-raw/{sample}.fasta'
'01-raw/{run}/{sample}.fasta'
output:
'02-filtered/{sample}.uniq.fasta'
'02-filtered/{run}/{sample}.uniq.fasta'
singularity:
config["container"]
log:
'../99-log/05-dereplicate_samples/{sample}.log'
'../99-log/05-dereplicate_samples/{run}/{sample}.log'
params:
dir='02-filtered/{run}/'
shell:
'''obiuniq -m sample {input} > {output} 2> {log}'''
'''mkdir -p {params.dir}; obiuniq -m sample {input} > {output} 2> {log}'''
### only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
rule goodlength_samples:
input:
'02-filtered/{sample}.uniq.fasta'
'02-filtered/{run}/{sample}.uniq.fasta'
output:
'02-filtered/{sample}.l.u.fasta'
'02-filtered/{run}/{sample}.l.u.fasta'
singularity:
config["container"]
log:
'../99-log/06-goodlength_samples/{sample}.log'
'../99-log/06-goodlength_samples/{run}/{sample}.log'
params:
count=config["good_length_samples"]["count"],
seq_length=config["good_length_samples"]["seq_length"]
seq_length=config["good_length_samples"]["seq_length"]
shell:
'''obigrep -p 'count>{params.count}' -s '^[ACGT]+$' -p 'seq_length>{params.seq_length}' {input} > {output} 2> {log}'''
### Clean the sequences for PCR/sequencing errors (sequence variants)
rule clean_pcrerr_samples:
input:
'02-filtered/{sample}.l.u.fasta'
'02-filtered/{run}/{sample}.l.u.fasta'
output:
'02-filtered/{sample}.r.l.u.fasta'
'02-filtered/{run}/{sample}.r.l.u.fasta'
singularity:
config["container"]
log:
'../99-log/07-clean_pcrerr/{sample}.log'
'../99-log/07-clean_pcrerr/{run}/{sample}.log'
params:
r=config["clean_pcrerr_samples"]["r"]
r=config["clean_pcrerr_samples"]["r"]
shell:
'''if [ -s {input} ]
then
......@@ -50,13 +52,15 @@ fi'''
### Remove sequence which are classified as 'internal' by obiclean
rule rm_internal_samples:
input:
'02-filtered/{sample}.r.l.u.fasta'
'02-filtered/{run}/{sample}.r.l.u.fasta'
output:
'03-cleaned/{sample}.c.r.l.u.fasta'
'03-cleaned/{run}/{sample}.c.r.l.u.fasta'
params:
dir='03-cleaned/{run}/'
singularity:
config["container"]
log:
'../99-log/08-rm_internal_samples/{sample}.log'
'../99-log/08-rm_internal_samples/{run}/{sample}.log'
shell:
'''obigrep -p 'obiclean_internalcount == 0' {input} > {output} 2> {log}'''
''''mkdir -p {params.dir}; obigrep -p 'obiclean_internalcount == 0' {input} > {output} 2> {log}'''
......@@ -11,15 +11,15 @@ print(RUNS)
rule all:
input:
expand('02-filtered/{sample}.uniq.fasta',sample=SAMPLES),
expand('02-filtered/{sample}.l.u.fasta',sample=SAMPLES),
expand('02-filtered/{sample}.r.l.u.fasta',sample=SAMPLES),
expand('03-cleaned/{sample}.c.r.l.u.fasta',sample=SAMPLES),
expand('02-filtered/{run}/{sample}.uniq.fasta',run=RUNS, sample=SAMPLES),
expand('02-filtered/{run}/{sample}.l.u.fasta',run=RUNS, sample=SAMPLES),
expand('02-filtered/{run}/{sample}.r.l.u.fasta',run=RUNS, sample=SAMPLES),
expand('03-cleaned/{run}/{sample}.c.r.l.u.fasta',run=RUNS, sample=SAMPLES),
expand('../03-filtered/{run}.fasta',run=config["fastqFiles"]),
expand('../99-log/05-dereplicate_samples/{sample}.log',sample=SAMPLES),
expand('../99-log/06-goodlength_samples/{sample}.log',sample=SAMPLES),
expand('../99-log/07-clean_pcrerr/{sample}.log',sample=SAMPLES),
expand('../99-log/08-rm_internal_samples/{sample}.log',sample=SAMPLES)
expand('../99-log/05-dereplicate_samples/{run}/{sample}.log',run=RUNS, sample=SAMPLES),
expand('../99-log/06-goodlength_samples/{run}/{sample}.log',run=RUNS, sample=SAMPLES),
expand('../99-log/07-clean_pcrerr/{run}/{sample}.log',run=RUNS, sample=SAMPLES),
expand('../99-log/08-rm_internal_samples/{run}/{sample}.log',run=RUNS, sample=SAMPLES)
include: "../00-rules/filtered.smk"
include: "../00-rules/filter_samples.smk"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment