Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
edna
snakemake_rapidrun_swarm
Commits
aedac20f
Commit
aedac20f
authored
Feb 23, 2021
by
peguerin
Browse files
add fastqc rule
parent
6d0e4250
Changes
3
Hide whitespace changes
Inline
Side-by-side
Snakefile
View file @
aedac20f
...
@@ -83,6 +83,8 @@ def makeDirectory(path_to_directory):
...
@@ -83,6 +83,8 @@ def makeDirectory(path_to_directory):
def mkdir_results(results_subfolders):
def mkdir_results(results_subfolders):
if not config['fastqc']:
del results_subfolders['fastq_quality_control']
for k in results_subfolders:
for k in results_subfolders:
subfolder=results_subfolders[k]
subfolder=results_subfolders[k]
if not os.path.exists(os.path.join("results",subfolder)):
if not os.path.exists(os.path.join("results",subfolder)):
...
@@ -150,7 +152,7 @@ def str_join(df, sep, *cols):
...
@@ -150,7 +152,7 @@ def str_join(df, sep, *cols):
# MAIN
# MAIN
###############################################################################
###############################################################################
## generate results subfolders
mkdir_results(results_subfolders)
mkdir_results(results_subfolders)
## check format (CLASSIC or RAPIDRUN)
## check format (CLASSIC or RAPIDRUN)
...
@@ -170,8 +172,8 @@ if config['format'] == "CLASSIC":
...
@@ -170,8 +172,8 @@ if config['format'] == "CLASSIC":
}
}
dfrClassic = dfrClassic.append(thisRow, ignore_index=True)
dfrClassic = dfrClassic.append(thisRow, ignore_index=True)
print(dfrClassic)
print(dfrClassic)
rapidrunfile=results_subfolders['settings']+'/all_samples_classic.csv'
rapidrunfile=
'results/'+
results_subfolders['settings']+'/all_samples_classic.csv'
export_allsample = dfrClassic.to_csv (r'
./'+
rapidrunfile, index = None, header = False, sep = ';')
export_allsample = dfrClassic.to_csv (r'
{}'.format(
rapidrunfile
)
, index = None, header = False, sep = ';')
else:
else:
print("RAPIDRUN data: many markers for many runs")
print("RAPIDRUN data: many markers for many runs")
#configfile: "01_infos/config.yaml"
#configfile: "01_infos/config.yaml"
...
@@ -249,7 +251,7 @@ for run in uniqRuns:
...
@@ -249,7 +251,7 @@ for run in uniqRuns:
dfMulti = dfMulti.append( thisRow, ignore_index=True)
dfMulti = dfMulti.append( thisRow, ignore_index=True)
demultiplexFile='results/'+results_subfolders['settings']+'/all_demultiplex.csv'
demultiplexFile='results/'+results_subfolders['settings']+'/all_demultiplex.csv'
export_csv = dfMulti.to_csv (r'
./'+
demultiplexFile, index = None, header=True,sep=",")
export_csv = dfMulti.to_csv (r'
{}'.format(
demultiplexFile
)
, index = None, header=True,sep=",")
print (dfMulti)
print (dfMulti)
...
@@ -285,10 +287,10 @@ uniqRuns=dfrm.run.unique()
...
@@ -285,10 +287,10 @@ uniqRuns=dfrm.run.unique()
## projmarkrun wildcards
## projmarkrun wildcards
#dfMultiProjMarkRunSample=dfMulti[['projet','marker','run','demultiplex']]
#dfMultiProjMarkRunSample=dfMulti[['projet','marker','run','demultiplex']]
dfMultiProjMarkRunSample
=
dfMulti.loc[:, ('projet','marker','run','demultiplex')]
dfMultiProjMarkRunSample
=
dfMulti.loc[:, ('projet','marker','run','demultiplex')]
dfMultiProjMarkRunSample["demultiplex"]="results/"+results_subfolders['demultiplex_tag']+"/samples/"+dfMultiProjMarkRunSample["demultiplex"]+".fastq"
dfMultiProjMarkRunSample["demultiplex"]="results/"+results_subfolders['demultiplex_tag']+"/samples/"+dfMultiProjMarkRunSample["demultiplex"]+".fastq"
#dfMultiProjMarkRun=dfMulti[['projet','marker','run']]
#dfMultiProjMarkRun=dfMulti[['projet','marker','run']]
dfMultiProjMarkRun
=
dfMulti.loc[:, ('projet','marker','run')]
dfMultiProjMarkRun
=
dfMulti.loc[:, ('projet','marker','run')]
dfMultiProjMarkRun['projMarkRun']=str_join(dfMultiProjMarkRun, '/', 'projet', 'marker', 'run')
dfMultiProjMarkRun['projMarkRun']=str_join(dfMultiProjMarkRun, '/', 'projet', 'marker', 'run')
dfMultiProjMarkRunSample['projMarkRun']=str_join(dfMultiProjMarkRunSample, '/', 'projet', 'marker', 'run')
dfMultiProjMarkRunSample['projMarkRun']=str_join(dfMultiProjMarkRunSample, '/', 'projet', 'marker', 'run')
dfMultiProjMarkRun = dfMultiProjMarkRun.drop_duplicates()
dfMultiProjMarkRun = dfMultiProjMarkRun.drop_duplicates()
...
@@ -296,18 +298,29 @@ print(dfMultiProjMarkRun)
...
@@ -296,18 +298,29 @@ print(dfMultiProjMarkRun)
# projmark wildcards
# projmark wildcards
#dfMultiProjMark=dfMulti[['projet','marker']]
#dfMultiProjMark=dfMulti[['projet','marker']]
dfMultiProjMark
=
dfMulti.loc[:, ('projet','marker')]
dfMultiProjMark
=
dfMulti.loc[:, ('projet','marker')]
dfMultiProjMark['projMark']
=
str_join(dfMultiProjMark, '/', 'projet', 'marker')
dfMultiProjMark['projMark']
=
str_join(dfMultiProjMark, '/', 'projet', 'marker')
dfMultiProjMark = dfMultiProjMark.drop_duplicates()
dfMultiProjMark = dfMultiProjMark.drop_duplicates()
ruleAllInputList = [
'results/'+results_subfolders['flags']+'/fastq_quality_control.flag',
'results/'+results_subfolders['flags']+'/table_assigned_sequences.flag'
]
if not config['fastqc']:
ruleAllInputList.remove('results/'+results_subfolders['flags']+'/fastq_quality_control.flag')
rule all:
rule all:
input:
input:
'results/'+results_subfolders['flags']+'/table_assigned_sequences.flag'
ruleAllInputList
include: "rules/merge_fastq.smk"
if config['fastqc']:
include: "rules/fastq_quality_control.smk"
include: "rules/merge_fastq_after_fastqc.smk"
else:
include: "rules/merge_fastq.smk"
include: "rules/demultiplex_tag.smk"
include: "rules/demultiplex_tag.smk"
...
...
rules/fastq_quality_control.smk
0 → 100644
View file @
aedac20f
__author__ = "Pierre-Edouard Guerin"
__license__ = "MIT"
## Pool sequences and quality files
rule fastq_quality_control:
input:
config["fichiers"]["folder_fastq"]+'{runR}.fastq.gz'
output:
'results/'+results_subfolders['fastq_quality_control']+'/{runR}_fastqc.zip'
params:
resFolder='results/'+results_subfolders['fastq_quality_control'],
conda:
'../envs/env_fastqc.yaml'
resources:
job=1
log:
'logs/'+results_subfolders['fastq_quality_control']+'/{runR}.log'
threads:
1
shell:
'''
fastqc --outdir {params.resFolder} {input} 2> {log}
'''
rule checkpoint_fastqc:
input:
expand('results/'+results_subfolders['fastq_quality_control']+'/{runR}_fastqc.zip', runR=[str(r)+"_R2" for r in uniqRuns]+[str(r)+"_R1" for r in uniqRuns])
output:
touch('results/'+results_subfolders['flags']+'/fastq_quality_control.flag')
rules/merge_fastq_after_fastqc.smk
0 → 100644
View file @
aedac20f
__author__ = "Pierre-Edouard Guerin"
__license__ = "MIT"
## Merge read pairs but we have to trick fastqc output first
rule merge_fastq_after_fastqc:
input:
'results/'+results_subfolders['flags']+'/fastq_quality_control.flag'
output:
fq='results/'+results_subfolders['merge_fastq']+'/{run}.fastq'
singularity:
config["singularity"]["ednatools"]
conda:
'../envs/env_vsearch.yaml'
threads:
config["merging"]["vsearch"]["cores"]
resources:
job=1
log:
'logs/'+results_subfolders['merge_fastq']+'/{run}.log'
params:
R1=config["fichiers"]["folder_fastq"]+'{run}_R1.fastq.gz',
R2=config["fichiers"]["folder_fastq"]+'{run}_R2.fastq.gz',
encoding=config["merge_fastq"]["encoding"]
shell:
'''ls {input};
vsearch \
--threads {threads} \
--fastq_mergepairs {params.R1} \
--reverse {params.R2} \
--fastq_ascii {params.encoding} \
--fastqout {output} \
--fastq_allowmergestagger \
--quiet 2> {log}'''
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment