Commit 32702f43 authored by peguerin's avatar peguerin
Browse files

Add multicore option for demultiplexing

Closes #33
parent 7bf205ae
......@@ -106,6 +106,13 @@ def read_dat(filedat):
assert False not in [is_dna_sequence(sequence, "ATGCRYSWKMBDHVN") for sequence in dfdat[col]]
return dfdat
def calculate_cutadapt_available_cores(dfMulti):
nRun = int(len(dfMulti['run'].unique()))
if workflow.cores*nRun > config['resources']['job']:
return int(config['resources']['job'] / workflow.cores)
else:
return workflow.cores
def write_barcodes_fasta(dfMulti, results_subfolders):
barcodes_subfolder="results/"+results_subfolders['settings']+"/barcodes/"
......@@ -145,6 +152,7 @@ def str_join(df, sep, *cols):
# MAIN
###############################################################################
## generate results subfolders
mkdir_results(results_subfolders)
......@@ -249,6 +257,9 @@ export_csv = dfMulti.to_csv (r'{}'.format(demultiplexFile), index = None, header
print (dfMulti)
## number of cores to give to each job of cutadapt demultiplexing
nJobsDemultiplexCutadapt = calculate_cutadapt_available_cores(dfMulti)
## write barcodes fasta files for demultiplexing with cutadapt
dfMulti =pandas.read_csv(demultiplexFile, sep=",")
write_barcodes_fasta(dfMulti, results_subfolders)
......
format: RAPIDRUN
resources:
job: 4
job: 8
singularity:
ednatools: /media/superdisk/utils/conteneurs/ednatools.simg
obitools: /media/superdisk/utils/conteneurs/obitools.simg
......@@ -48,6 +48,7 @@ merging:
demultiplexing:
maxLen: 120
minLen: 20
job: 4
clustering:
swarm:
cores: 8
......
......@@ -13,7 +13,7 @@ rule demultiplex_tag:
conda:
'../envs/env_cutadapt.yaml'
resources:
job=1
job=nJobsDemultiplexCutadapt
params:
barcodes='results/'+results_subfolders['settings']+'/barcodes/{run}.fasta',
dereplicated_csv='results/'+results_subfolders['settings']+'/barcodes/{run}_duplicated.csv',
......@@ -22,11 +22,12 @@ rule demultiplex_tag:
'logs/'+results_subfolders['demultiplex_tag']+'/{run}.log'
shell:
'''
cutadapt -j 0 -m {params.minLen} --revcomp -O 8 --discard-untrimmed -g file:{params.barcodes} {input} -o {{name}}.fastq > {log}
cutadapt -j {workflow.cores} -m {params.minLen} --revcomp -O 8 --discard-untrimmed -g file:{params.barcodes} {input} -o {{name}}.fastq > {log}
wait
bash scripts/cp_duplicated_barcode.sh {params.dereplicated_csv}
wait
bash scripts/check_missing_files.sh {params.barcodes} >> {log}
echo "res" {resources.job} >> {log}
wait
touch {output}
'''
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment