Commit b67c7e4d authored by peguerin's avatar peguerin

format snakemake

parent c28064aa
......@@ -6,8 +6,7 @@ configfile: "config.yaml"
rule all:
input:
expand("{folder}/{fastqf}_R1.fastq.gz", fastqf=config["fastqFiles"],folder=config["fastqFolderPath"]),
expand("{folder}/{barcode}.dat", barcode=config["barcodeFiles"],folder=config["fastqFolderPath"]),
expand("{folder}/{fastqf}_R1.fastq.gz", fastqf=config["fastqFiles"],folder=config["fastqFolderPath"]),
expand('assembled/{run}/{run}.fastq', run=RUNS),
expand('assembled/{run}/{run}.ali.fastq', run=RUNS),
expand('assembled/{run}/{run}.ali.assigned.fastq', run=RUNS),
......
......@@ -15,17 +15,17 @@ fastqFiles:
- 161124_SND393_A_L005_GWM-858
- 161124_SND393_A_L005_GWM-859
barcodeFiles:
- MB1016K_Teleo
- MB1016L_Teleo
- MB1016M_Teleo
- MB1016N_Teleo
- MB1016O_Teleo
- MB1016P_Teleo
- MB1016Q_Teleo
- MB1016R_Teleo
- MB1016S_Teleo
- MB1016T_Teleo
- MB1016U_Teleo
- 161124_SND393_A_L005_GWM-849 : MB1016K_Teleo
- 161124_SND393_A_L005_GWM-850 : MB1016L_Teleo
- 161124_SND393_A_L005_GWM-851 : MB1016M_Teleo
- 161124_SND393_A_L005_GWM-852 : MB1016N_Teleo
- 161124_SND393_A_L005_GWM-853 : MB1016O_Teleo
- 161124_SND393_A_L005_GWM-854 : MB1016P_Teleo
- 161124_SND393_A_L005_GWM-855 : MB1016Q_Teleo
- 161124_SND393_A_L005_GWM-856 : MB1016R_Teleo
- 161124_SND393_A_L005_GWM-857 : MB1016S_Teleo
- 161124_SND393_A_L005_GWM-858 : MB1016T_Teleo
- 161124_SND393_A_L005_GWM-859 : MB1016U_Teleo
illuminapairedend:
s_min : 40
good_length_samples:
......
configfile: "config.yaml"
RUNS, = glob_wildcards('raw/{run}_R1.fastq.gz')
BARCODES, = glob_wildcards('barcodes/{barcode}.dat')
DICBARCODES={}
i=0
for bc in BARCODES:
DICBARCODES[RUNS[i]]="barcodes/"+bc+".dat"
i=i+1
#print(DICBARCODES)
rule all:
input:
expand('assembled/{run}/{run}.fastq', run=RUNS),
expand('assembled/{run}/{run}.ali.fastq', run=RUNS),
expand('assembled/{run}/{run}.ali.assigned.fastq', run=RUNS),
expand('assembled/{run}/{run}.unidentified.fastq', run=RUNS),
expand('log/remove_unaligned/{run}.log',run=RUNS),
expand('log/illuminapairedend/{run}.log',run=RUNS),
expand('log/assign_sequences/{run}.log',run=RUNS),
expand('log/split_sequences/{run}.log',run=RUNS)
### Paired end alignment then keep reads with quality > 40
rule illuminapairedend:
input:
R1='raw/{run}_R1.fastq.gz',
R2='raw/{run}_R2.fastq.gz'
output:
fq='assembled/{run}/{run}.fastq'
log:
'log/illuminapairedend/{run}.log'
params:
s_min=config["illuminapairedend"]["s_min"]
shell:
'''illuminapairedend -r {input.R2} {input.R1} --score-min={params.s_min} > {output.fq} 2> {log}'''
### Remove unaligned sequence records
rule remove_unaligned:
input:
fq='assembled/{run}/{run}.fastq'
output:
ali='assembled/{run}/{run}.ali.fastq'
log:
'log/remove_unaligned/{run}.log'
shell:
'''obigrep -p 'mode!=\"joined\"' {input.fq} > {output.ali} 2> {log}'''
### Assign each sequence record to the corresponding sample/marker combination
rule assign_sequences:
input:
'assembled/{run}/{run}.ali.fastq',
lambda wildcards: DICBARCODES[wildcards.run]
output:
assign='assembled/{run}/{run}.ali.assigned.fastq',
unid='assembled/{run}/{run}.unidentified.fastq'
log:
'log/assign_sequences/{run}.log'
shell:
'''ngsfilter -t {input[1]} -u {output.unid} {input[0]} --fasta-output > {output.assign} 2> {log}'''
### Split the input sequence file in a set of subfiles according to the values of attribute `sample`
rule split_sequences:
input:
'assembled/{run}/{run}.ali.assigned.fastq'
params:
'samples/{run}_sample_'
log:
'log/split_sequences/{run}.log'
shell:
'''obisplit -p "{params}" -t sample --fasta {input} 2> {log}'''
......@@ -31,7 +31,7 @@ rule remove_unaligned:
rule assign_sequences:
input:
'assembled/{run}/{run}.ali.fastq',
lambda wildcards: DICBARCODES[wildcards.run]
lambda wildcards: config["barcodeFiles"][wildcards.run]
output:
assign='assembled/{run}/{run}.ali.assigned.fastq',
unid='assembled/{run}/{run}.unidentified.fastq'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment