Skip to content
GitLab
Explore
Sign in
This is an archived project. Repository and other project resources are read-only.
Commits on Source (3)
rename rules
· 044b79d6
peguerin
authored
Sep 26, 2019
044b79d6
add assembly
· eda0b522
peguerin
authored
Sep 26, 2019
eda0b522
add demultiplex
· c92db63c
peguerin
authored
Sep 26, 2019
c92db63c
Show whitespace changes
Inline
Side-by-side
rules/
step1
.smk
→
00-
rules/
assembly
.smk
View file @
c92db63c
...
...
@@ -8,11 +8,11 @@ rule illuminapairedend:
R1=config['fastqFolderPath']+'{run}_R1.fastq.gz',
R2=config['fastqFolderPath']+'{run}_R2.fastq.gz'
output:
fq='assembl
ed
/{run}/{run}.fastq'
fq='
01-
assembl
y
/{run}/{run}.fastq'
singularity:
config["container"]
log:
'log/illuminapairedend/{run}.log'
'
99-
log/illuminapairedend/{run}.log'
params:
s_min=config["illuminapairedend"]["s_min"]
shell:
...
...
@@ -21,42 +21,13 @@ rule illuminapairedend:
### Remove unaligned sequence records
rule remove_unaligned:
input:
fq='assembl
ed
/{run}/{run}.fastq'
fq='
01-
assembl
y
/{run}/{run}.fastq'
output:
ali='assembl
ed
/{run}/{run}.ali.fastq'
ali='
01-
assembl
y
/{run}/{run}.ali.fastq'
singularity:
config["container"]
log:
'log/remove_unaligned/{run}.log'
'
99-
log/remove_unaligned/{run}.log'
shell:
'''obigrep -p 'mode!=\"joined\"' {input.fq} > {output.ali} 2> {log}'''
### Assign each sequence record to the corresponding sample/marker combination
rule assign_sequences:
input:
'assembled/{run}/{run}.ali.fastq',
output:
assign='assembled/{run}/{run}.ali.assigned.fastq',
unid='assembled/{run}/{run}.unidentified.fastq'
singularity:
config["container"]
params:
barcodeFile=lambda wcs: config["barcodeFiles"][wcs.run]
barcodeFolder=config['fastqFolderPath']
log:
'log/assign_sequences/{run}.log'
shell:
'''ngsfilter -t {params.barcodeFolder}{params.barcodeFile} -u {output.unid} {input[0]} --fasta-output > {output.assign} 2> {log}'''
### Split the input sequence file in a set of subfiles according to the values of attribute `sample`
rule split_sequences:
input:
'assembled/{run}/{run}.ali.assigned.fastq'
params:
'samples/{run}_sample_'
singularity:
config["container"]
log:
'log/split_sequences/{run}.log'
shell:
'''obisplit -p "{params}" -t sample --fasta {input} 2> {log}'''
00-rules/demultiplex.smk
0 → 100644
View file @
c92db63c
### Assign each sequence record to the corresponding sample/marker combination
rule assign_sequences:
input:
'01-assembly/{run}/{run}.ali.fastq',
output:
assign='01-assembly/{run}/{run}.ali.assigned.fastq',
unid='01-assembly/{run}/{run}.unidentified.fastq'
singularity:
config["container"]
params:
barcodeFile=lambda wcs: config["barcodeFiles"][wcs.run],
barcodeFolder=config['fastqFolderPath']
log:
'99-log/assign_sequences/{run}.log'
shell:
'''ngsfilter -t {params.barcodeFolder}{params.barcodeFile} -u {output.unid} {input} --fasta-output > {output.assign} 2> {log}'''
### Split the input sequence file in a set of subfiles according to the values of attribute `sample`
rule split_sequences:
input:
'01-assembly/{run}/{run}.ali.assigned.fastq'
params:
'02-demultiplex/{run}_sample_'
singularity:
config["container"]
log:
'log/split_sequences/{run}.log'
shell:
'''obisplit -p "{params}" -t sample --fasta {input} 2> {log}'''
rules/step1.sf
→
00-
rules/step1.sf
View file @
c92db63c
File moved
rules/step2.sf
→
00-
rules/step2.sf
View file @
c92db63c
File moved
00-rules/step2.smk
0 → 100644
View file @
c92db63c
### dereplicate reads into uniq sequences
rule dereplicate_samples:
input:
'samples/{sample}.fasta'
output:
'samples/{sample}.uniq.fasta'
log:
'log/dereplicate_samples/{sample}.log'
shell:
'''obiuniq -m sample {input} > {output} 2> {log}'''
### only sequence more than 20bp with no ambiguity IUAPC with total coverage greater than 10 reads
rule goodlength_samples:
input:
'samples/{sample}.uniq.fasta'
output:
'samples/{sample}.l.u.fasta'
log:
'log/goodlength_samples/{sample}.log'
params:
count=config["good_length_samples"]["count"],
seq_length=config["good_length_samples"]["seq_length"]
shell:
'''obigrep -p 'count>{params.count}' -s '^[ACGT]+$' -p 'seq_length>{params.seq_length}' {input} > {output} 2> {log}'''
### Clean the sequences for PCR/sequencing errors (sequence variants)
rule clean_pcrerr_samples:
input:
'samples/{sample}.l.u.fasta'
output:
'samples/{sample}.r.l.u.fasta'
log:
'log/clean_pcrerr/{sample}.log'
params:
r=config["clean_pcrerr_samples"]["r"]
shell:
'''obiclean -r {params.r} {input} > {output} 2> {log}'''
### Remove sequence which are classified as 'internal' by obiclean
rule rm_internal_samples:
input:
'samples/{sample}.r.l.u.fasta'
output:
'samples/{sample}.c.r.l.u.fasta'
log:
'log/rm_internal_samples/{sample}.log'
shell:
'''obigrep -p 'obiclean_internalcount == 0' {input} > {output} 2> {log}'''
rules/step3.sf
→
00-
rules/step3.sf
View file @
c92db63c
File moved
rules/step4.sf
→
00-
rules/step4.sf
View file @
c92db63c
File moved
01-assembly/.gitkeep
0 → 100644
View file @
c92db63c
02-demultiplex/.gitkeep
0 → 100644
View file @
c92db63c