Loading Snakefile +1 −2 Original line number Diff line number Diff line Loading @@ -7,7 +7,6 @@ configfile: "config.yaml" rule all: input: expand("{folder}/{fastqf}_R1.fastq.gz", fastqf=config["fastqFiles"],folder=config["fastqFolderPath"]), expand("{folder}/{barcode}.dat", barcode=config["barcodeFiles"],folder=config["fastqFolderPath"]), expand('assembled/{run}/{run}.fastq', run=RUNS), expand('assembled/{run}/{run}.ali.fastq', run=RUNS), expand('assembled/{run}/{run}.ali.assigned.fastq', run=RUNS), Loading config.yaml +11 −11 Original line number Diff line number Diff line Loading @@ -15,17 +15,17 @@ fastqFiles: - 161124_SND393_A_L005_GWM-858 - 161124_SND393_A_L005_GWM-859 barcodeFiles: - MB1016K_Teleo - MB1016L_Teleo - MB1016M_Teleo - MB1016N_Teleo - MB1016O_Teleo - MB1016P_Teleo - MB1016Q_Teleo - MB1016R_Teleo - MB1016S_Teleo - MB1016T_Teleo - MB1016U_Teleo - 161124_SND393_A_L005_GWM-849 : MB1016K_Teleo - 161124_SND393_A_L005_GWM-850 : MB1016L_Teleo - 161124_SND393_A_L005_GWM-851 : MB1016M_Teleo - 161124_SND393_A_L005_GWM-852 : MB1016N_Teleo - 161124_SND393_A_L005_GWM-853 : MB1016O_Teleo - 161124_SND393_A_L005_GWM-854 : MB1016P_Teleo - 161124_SND393_A_L005_GWM-855 : MB1016Q_Teleo - 161124_SND393_A_L005_GWM-856 : MB1016R_Teleo - 161124_SND393_A_L005_GWM-857 : MB1016S_Teleo - 161124_SND393_A_L005_GWM-858 : MB1016T_Teleo - 161124_SND393_A_L005_GWM-859 : MB1016U_Teleo illuminapairedend: s_min : 40 good_length_samples: Loading rules/step1.sf 0 → 100644 +68 −0 Original line number Diff line number Diff line configfile: "config.yaml" RUNS, = glob_wildcards('raw/{run}_R1.fastq.gz') BARCODES, = glob_wildcards('barcodes/{barcode}.dat') DICBARCODES={} i=0 for bc in BARCODES: DICBARCODES[RUNS[i]]="barcodes/"+bc+".dat" i=i+1 #print(DICBARCODES) rule all: input: expand('assembled/{run}/{run}.fastq', run=RUNS), expand('assembled/{run}/{run}.ali.fastq', run=RUNS), expand('assembled/{run}/{run}.ali.assigned.fastq', run=RUNS), expand('assembled/{run}/{run}.unidentified.fastq', run=RUNS), expand('log/remove_unaligned/{run}.log',run=RUNS), expand('log/illuminapairedend/{run}.log',run=RUNS), expand('log/assign_sequences/{run}.log',run=RUNS), expand('log/split_sequences/{run}.log',run=RUNS) ### Paired end alignment then keep reads with quality > 40 rule illuminapairedend: input: R1='raw/{run}_R1.fastq.gz', R2='raw/{run}_R2.fastq.gz' output: fq='assembled/{run}/{run}.fastq' log: 'log/illuminapairedend/{run}.log' params: s_min=config["illuminapairedend"]["s_min"] shell: '''illuminapairedend -r {input.R2} {input.R1} --score-min={params.s_min} > {output.fq} 2> {log}''' ### Remove unaligned sequence records rule remove_unaligned: input: fq='assembled/{run}/{run}.fastq' output: ali='assembled/{run}/{run}.ali.fastq' log: 'log/remove_unaligned/{run}.log' shell: '''obigrep -p 'mode!=\"joined\"' {input.fq} > {output.ali} 2> {log}''' ### Assign each sequence record to the corresponding sample/marker combination rule assign_sequences: input: 'assembled/{run}/{run}.ali.fastq', lambda wildcards: DICBARCODES[wildcards.run] output: assign='assembled/{run}/{run}.ali.assigned.fastq', unid='assembled/{run}/{run}.unidentified.fastq' log: 'log/assign_sequences/{run}.log' shell: '''ngsfilter -t {input[1]} -u {output.unid} {input[0]} --fasta-output > {output.assign} 2> {log}''' ### Split the input sequence file in a set of subfiles according to the values of attribute `sample` rule split_sequences: input: 'assembled/{run}/{run}.ali.assigned.fastq' params: 'samples/{run}_sample_' log: 'log/split_sequences/{run}.log' shell: '''obisplit -p "{params}" -t sample --fasta {input} 2> {log}''' rules/step1.smk +1 −1 Original line number Diff line number Diff line Loading @@ -31,7 +31,7 @@ rule remove_unaligned: rule assign_sequences: input: 'assembled/{run}/{run}.ali.fastq', lambda wildcards: DICBARCODES[wildcards.run] lambda wildcards: config["barcodeFiles"][wildcards.run] output: assign='assembled/{run}/{run}.ali.assigned.fastq', unid='assembled/{run}/{run}.unidentified.fastq' Loading Loading
Snakefile +1 −2 Original line number Diff line number Diff line Loading @@ -7,7 +7,6 @@ configfile: "config.yaml" rule all: input: expand("{folder}/{fastqf}_R1.fastq.gz", fastqf=config["fastqFiles"],folder=config["fastqFolderPath"]), expand("{folder}/{barcode}.dat", barcode=config["barcodeFiles"],folder=config["fastqFolderPath"]), expand('assembled/{run}/{run}.fastq', run=RUNS), expand('assembled/{run}/{run}.ali.fastq', run=RUNS), expand('assembled/{run}/{run}.ali.assigned.fastq', run=RUNS), Loading
config.yaml +11 −11 Original line number Diff line number Diff line Loading @@ -15,17 +15,17 @@ fastqFiles: - 161124_SND393_A_L005_GWM-858 - 161124_SND393_A_L005_GWM-859 barcodeFiles: - MB1016K_Teleo - MB1016L_Teleo - MB1016M_Teleo - MB1016N_Teleo - MB1016O_Teleo - MB1016P_Teleo - MB1016Q_Teleo - MB1016R_Teleo - MB1016S_Teleo - MB1016T_Teleo - MB1016U_Teleo - 161124_SND393_A_L005_GWM-849 : MB1016K_Teleo - 161124_SND393_A_L005_GWM-850 : MB1016L_Teleo - 161124_SND393_A_L005_GWM-851 : MB1016M_Teleo - 161124_SND393_A_L005_GWM-852 : MB1016N_Teleo - 161124_SND393_A_L005_GWM-853 : MB1016O_Teleo - 161124_SND393_A_L005_GWM-854 : MB1016P_Teleo - 161124_SND393_A_L005_GWM-855 : MB1016Q_Teleo - 161124_SND393_A_L005_GWM-856 : MB1016R_Teleo - 161124_SND393_A_L005_GWM-857 : MB1016S_Teleo - 161124_SND393_A_L005_GWM-858 : MB1016T_Teleo - 161124_SND393_A_L005_GWM-859 : MB1016U_Teleo illuminapairedend: s_min : 40 good_length_samples: Loading
rules/step1.sf 0 → 100644 +68 −0 Original line number Diff line number Diff line configfile: "config.yaml" RUNS, = glob_wildcards('raw/{run}_R1.fastq.gz') BARCODES, = glob_wildcards('barcodes/{barcode}.dat') DICBARCODES={} i=0 for bc in BARCODES: DICBARCODES[RUNS[i]]="barcodes/"+bc+".dat" i=i+1 #print(DICBARCODES) rule all: input: expand('assembled/{run}/{run}.fastq', run=RUNS), expand('assembled/{run}/{run}.ali.fastq', run=RUNS), expand('assembled/{run}/{run}.ali.assigned.fastq', run=RUNS), expand('assembled/{run}/{run}.unidentified.fastq', run=RUNS), expand('log/remove_unaligned/{run}.log',run=RUNS), expand('log/illuminapairedend/{run}.log',run=RUNS), expand('log/assign_sequences/{run}.log',run=RUNS), expand('log/split_sequences/{run}.log',run=RUNS) ### Paired end alignment then keep reads with quality > 40 rule illuminapairedend: input: R1='raw/{run}_R1.fastq.gz', R2='raw/{run}_R2.fastq.gz' output: fq='assembled/{run}/{run}.fastq' log: 'log/illuminapairedend/{run}.log' params: s_min=config["illuminapairedend"]["s_min"] shell: '''illuminapairedend -r {input.R2} {input.R1} --score-min={params.s_min} > {output.fq} 2> {log}''' ### Remove unaligned sequence records rule remove_unaligned: input: fq='assembled/{run}/{run}.fastq' output: ali='assembled/{run}/{run}.ali.fastq' log: 'log/remove_unaligned/{run}.log' shell: '''obigrep -p 'mode!=\"joined\"' {input.fq} > {output.ali} 2> {log}''' ### Assign each sequence record to the corresponding sample/marker combination rule assign_sequences: input: 'assembled/{run}/{run}.ali.fastq', lambda wildcards: DICBARCODES[wildcards.run] output: assign='assembled/{run}/{run}.ali.assigned.fastq', unid='assembled/{run}/{run}.unidentified.fastq' log: 'log/assign_sequences/{run}.log' shell: '''ngsfilter -t {input[1]} -u {output.unid} {input[0]} --fasta-output > {output.assign} 2> {log}''' ### Split the input sequence file in a set of subfiles according to the values of attribute `sample` rule split_sequences: input: 'assembled/{run}/{run}.ali.assigned.fastq' params: 'samples/{run}_sample_' log: 'log/split_sequences/{run}.log' shell: '''obisplit -p "{params}" -t sample --fasta {input} 2> {log}'''
rules/step1.smk +1 −1 Original line number Diff line number Diff line Loading @@ -31,7 +31,7 @@ rule remove_unaligned: rule assign_sequences: input: 'assembled/{run}/{run}.ali.fastq', lambda wildcards: DICBARCODES[wildcards.run] lambda wildcards: config["barcodeFiles"][wildcards.run] output: assign='assembled/{run}/{run}.ali.assigned.fastq', unid='assembled/{run}/{run}.unidentified.fastq' Loading