Commit b67c7e4d authored by peguerin's avatar peguerin
Browse files

format snakemake

parent c28064aa
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -7,7 +7,6 @@ configfile: "config.yaml"
rule all:
    input:
        expand("{folder}/{fastqf}_R1.fastq.gz", fastqf=config["fastqFiles"],folder=config["fastqFolderPath"]),        
        expand("{folder}/{barcode}.dat", barcode=config["barcodeFiles"],folder=config["fastqFolderPath"]),
        expand('assembled/{run}/{run}.fastq', run=RUNS),
        expand('assembled/{run}/{run}.ali.fastq', run=RUNS),
        expand('assembled/{run}/{run}.ali.assigned.fastq', run=RUNS),
+11 −11
Original line number Diff line number Diff line
@@ -15,17 +15,17 @@ fastqFiles:
  - 161124_SND393_A_L005_GWM-858
  - 161124_SND393_A_L005_GWM-859
barcodeFiles:
  - MB1016K_Teleo
  - MB1016L_Teleo
  - MB1016M_Teleo
  - MB1016N_Teleo
  - MB1016O_Teleo
  - MB1016P_Teleo
  - MB1016Q_Teleo
  - MB1016R_Teleo
  - MB1016S_Teleo
  - MB1016T_Teleo
  - MB1016U_Teleo
  - 161124_SND393_A_L005_GWM-849 : MB1016K_Teleo
  - 161124_SND393_A_L005_GWM-850 : MB1016L_Teleo
  - 161124_SND393_A_L005_GWM-851 : MB1016M_Teleo
  - 161124_SND393_A_L005_GWM-852 : MB1016N_Teleo
  - 161124_SND393_A_L005_GWM-853 : MB1016O_Teleo
  - 161124_SND393_A_L005_GWM-854 : MB1016P_Teleo
  - 161124_SND393_A_L005_GWM-855 : MB1016Q_Teleo
  - 161124_SND393_A_L005_GWM-856 : MB1016R_Teleo
  - 161124_SND393_A_L005_GWM-857 : MB1016S_Teleo
  - 161124_SND393_A_L005_GWM-858 : MB1016T_Teleo
  - 161124_SND393_A_L005_GWM-859 : MB1016U_Teleo
illuminapairedend:
  s_min : 40
good_length_samples:

rules/step1.sf

0 → 100644
+68 −0
Original line number Diff line number Diff line
configfile: "config.yaml"
RUNS, = glob_wildcards('raw/{run}_R1.fastq.gz')
BARCODES, = glob_wildcards('barcodes/{barcode}.dat')
DICBARCODES={}
i=0
for bc in BARCODES:
   DICBARCODES[RUNS[i]]="barcodes/"+bc+".dat"
   i=i+1
#print(DICBARCODES)
rule all:
    input:
        expand('assembled/{run}/{run}.fastq', run=RUNS),
        expand('assembled/{run}/{run}.ali.fastq', run=RUNS),
        expand('assembled/{run}/{run}.ali.assigned.fastq', run=RUNS),
        expand('assembled/{run}/{run}.unidentified.fastq', run=RUNS),
        expand('log/remove_unaligned/{run}.log',run=RUNS),
        expand('log/illuminapairedend/{run}.log',run=RUNS),
        expand('log/assign_sequences/{run}.log',run=RUNS),
        expand('log/split_sequences/{run}.log',run=RUNS)

### Paired end alignment then keep reads with quality > 40
rule illuminapairedend:
    input:
        R1='raw/{run}_R1.fastq.gz',
        R2='raw/{run}_R2.fastq.gz'
    output:
        fq='assembled/{run}/{run}.fastq'
    log:
        'log/illuminapairedend/{run}.log'
    params:
        s_min=config["illuminapairedend"]["s_min"]
    shell:
        '''illuminapairedend -r {input.R2} {input.R1} --score-min={params.s_min} > {output.fq} 2> {log}'''

### Remove unaligned sequence records
rule remove_unaligned:
    input:
        fq='assembled/{run}/{run}.fastq'
    output:
        ali='assembled/{run}/{run}.ali.fastq'
    log:
        'log/remove_unaligned/{run}.log'
    shell:
        '''obigrep -p 'mode!=\"joined\"' {input.fq} > {output.ali} 2> {log}'''

### Assign each sequence record to the corresponding sample/marker combination
rule assign_sequences:
    input:
        'assembled/{run}/{run}.ali.fastq',
        lambda wildcards: DICBARCODES[wildcards.run]
    output:
        assign='assembled/{run}/{run}.ali.assigned.fastq',
        unid='assembled/{run}/{run}.unidentified.fastq'
    log:
        'log/assign_sequences/{run}.log'
    shell:
        '''ngsfilter -t {input[1]} -u {output.unid} {input[0]} --fasta-output > {output.assign} 2> {log}'''

### Split the input sequence file in a set of subfiles according to the values of attribute `sample`
rule split_sequences:
    input:
        'assembled/{run}/{run}.ali.assigned.fastq'
    params:
        'samples/{run}_sample_'
    log:
        'log/split_sequences/{run}.log'
    shell:
        '''obisplit -p "{params}" -t sample --fasta {input} 2> {log}'''
+1 −1
Original line number Diff line number Diff line
@@ -31,7 +31,7 @@ rule remove_unaligned:
rule assign_sequences:
    input:
        'assembled/{run}/{run}.ali.fastq',
        lambda wildcards: DICBARCODES[wildcards.run]
        lambda wildcards: config["barcodeFiles"][wildcards.run]
    output:
        assign='assembled/{run}/{run}.ali.assigned.fastq',
        unid='assembled/{run}/{run}.unidentified.fastq'