step2.sf 2.14 KB
Newer Older
peguerin's avatar
peguerin committed
1
configfile: "config.yaml"
peguerin's avatar
peguerin committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
SAMPLES, = glob_wildcards('samples/{sample}.fasta')

rule all:
    input:
        expand('samples/{sample}.uniq.fasta',sample=SAMPLES),
        expand('samples/{sample}.l.u.fasta',sample=SAMPLES),
        expand('samples/{sample}.r.l.u.fasta',sample=SAMPLES),
        expand('samples/{sample}.c.r.l.u.fasta',sample=SAMPLES),
        expand('log/dereplicate_samples/{sample}.log',sample=SAMPLES),
        expand('log/goodlength_samples/{sample}.log',sample=SAMPLES),
        expand('log/clean_pcrerr/{sample}.log',sample=SAMPLES),
        expand('log/rm_internal_samples/{sample}.log',sample=SAMPLES)

### dereplicate reads into uniq sequences
rule dereplicate_samples:
    input:
        'samples/{sample}.fasta'
    output:
        'samples/{sample}.uniq.fasta'
    log:
        'log/dereplicate_samples/{sample}.log'
    shell:
        '''obiuniq -m sample {input} > {output} 2> {log}'''

### only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
rule goodlength_samples:
    input:
        'samples/{sample}.uniq.fasta'
    output:
        'samples/{sample}.l.u.fasta'
    log:
        'log/goodlength_samples/{sample}.log'
peguerin's avatar
peguerin committed
34
35
36
    params:
         count=config["good_length_samples"]["count"]
         seq_length=config["goodlength_samples"]["seq_length"]
peguerin's avatar
peguerin committed
37
    shell:
peguerin's avatar
peguerin committed
38
        '''obigrep  -p 'count>{params.count}' -s '^[ACGT]+$' -p 'seq_length>{params.seq_length}' {input} > {output} 2> {log}'''
peguerin's avatar
peguerin committed
39
40
41
42
43
44
45
46
47

### Clean the sequences for PCR/sequencing errors (sequence variants)
rule clean_pcrerr_samples:
    input:
        'samples/{sample}.l.u.fasta'
    output:
        'samples/{sample}.r.l.u.fasta'
    log:
        'log/clean_pcrerr/{sample}.log'
peguerin's avatar
peguerin committed
48
49
    params:
         r=config["clean_pcrerr_samples"]["r"]
peguerin's avatar
peguerin committed
50
    shell:
peguerin's avatar
peguerin committed
51
        '''obiclean -r {params.r} {input} > {output} 2> {log}'''
peguerin's avatar
peguerin committed
52
53
54
55
56
57
58
59
60
61
62

### Remove sequence which are classified as 'internal' by obiclean
rule rm_internal_samples:
    input:
        'samples/{sample}.r.l.u.fasta'
    output:
        'samples/{sample}.c.r.l.u.fasta'
    log:
        'log/rm_internal_samples/{sample}.log'
    shell:
        '''obigrep -p 'obiclean_internalcount == 0' {input} > {output} 2> {log}'''