filter_samples.smk 1.98 KB
Newer Older
peguerin's avatar
peguerin committed
1
2
3
### dereplicate reads into uniq sequences
rule dereplicate_samples:
    input:
peguerin's avatar
peguerin committed
4
        '01-raw/{run}/{sample}.fasta'
peguerin's avatar
peguerin committed
5
    output:
peguerin's avatar
peguerin committed
6
        '02-filtered/{run}/{sample}.uniq.fasta'
peguerin's avatar
peguerin committed
7
8
    singularity:
        config["container"]
peguerin's avatar
peguerin committed
9
    log:
peguerin's avatar
peguerin committed
10
11
12
        '../99-log/05-dereplicate_samples/{run}/{sample}.log'
    params:
        dir='02-filtered/{run}/'
peguerin's avatar
peguerin committed
13
    shell:
peguerin's avatar
peguerin committed
14
        '''mkdir -p {params.dir}; obiuniq -m sample {input} > {output} 2> {log}'''
peguerin's avatar
peguerin committed
15
16
17
18

### only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
rule goodlength_samples:
    input:
peguerin's avatar
peguerin committed
19
        '02-filtered/{run}/{sample}.uniq.fasta'
peguerin's avatar
peguerin committed
20
    output:
peguerin's avatar
peguerin committed
21
        '02-filtered/{run}/{sample}.l.u.fasta'
peguerin's avatar
peguerin committed
22
23
    singularity:
        config["container"]
peguerin's avatar
peguerin committed
24
    log:
peguerin's avatar
peguerin committed
25
        '../99-log/06-goodlength_samples/{run}/{sample}.log'
peguerin's avatar
peguerin committed
26
    params:
peguerin's avatar
peguerin committed
27
        count=config["good_length_samples"]["count"],
peguerin's avatar
peguerin committed
28
        seq_length=config["good_length_samples"]["seq_length"]        
peguerin's avatar
peguerin committed
29
30
31
32
33
34
    shell:
        '''obigrep  -p 'count>{params.count}' -s '^[ACGT]+$' -p 'seq_length>{params.seq_length}' {input} > {output} 2> {log}'''

### Clean the sequences for PCR/sequencing errors (sequence variants)
rule clean_pcrerr_samples:
    input:
peguerin's avatar
peguerin committed
35
        '02-filtered/{run}/{sample}.l.u.fasta'
peguerin's avatar
peguerin committed
36
    output:
peguerin's avatar
peguerin committed
37
        '02-filtered/{run}/{sample}.r.l.u.fasta'
peguerin's avatar
peguerin committed
38
39
    singularity:
        config["container"]
peguerin's avatar
peguerin committed
40
    log:
peguerin's avatar
peguerin committed
41
        '../99-log/07-clean_pcrerr/{run}/{sample}.log'
peguerin's avatar
peguerin committed
42
    params:
peguerin's avatar
peguerin committed
43
         r=config["clean_pcrerr_samples"]["r"]         
peguerin's avatar
peguerin committed
44
    shell:
45
        '''obiclean -r {params.r} {input} > {output} 2> {log}'''
peguerin's avatar
peguerin committed
46
47
48
49

### Remove sequence which are classified as 'internal' by obiclean
rule rm_internal_samples:
    input:
peguerin's avatar
peguerin committed
50
        '02-filtered/{run}/{sample}.r.l.u.fasta'
peguerin's avatar
peguerin committed
51
    output:
peguerin's avatar
peguerin committed
52
53
54
        '03-cleaned/{run}/{sample}.c.r.l.u.fasta'
    params:
        dir='03-cleaned/{run}/'
peguerin's avatar
peguerin committed
55
56
    singularity:
        config["container"]
peguerin's avatar
peguerin committed
57
    log:
peguerin's avatar
peguerin committed
58
        '../99-log/08-rm_internal_samples/{run}/{sample}.log'
peguerin's avatar
peguerin committed
59
    shell:
60
        ''''mkdir -p {params.dir}; obigrep -p "obiclean_internalcount == 0" {input} > {output} 2> {log}'''
peguerin's avatar
peguerin committed
61