Commit de2524ae authored by peguerin's avatar peguerin
Browse files

custom reference database

parent 747a0c8c
......@@ -272,6 +272,26 @@ if config['format'] == "CLASSIC":
else:
dfpmr['bdr'] = dfpmr['marker'].map(config["assign_taxon"]["bdr"])
dfpmr['fasta'] = dfpmr['marker'].map(config["assign_taxon"]["fasta"])
## also perform taxonomic assignment with custom reference database
if config['custom_bdr']['status']:
print("also perform taxonomic assignment with custom reference database")
for crmarker in config['custom_bdr']['bdr'].keys():
print(crmarker)
df_crmarker = dfpmr[(dfpmr.marker == crmarker)].copy()
df_crmarker['bdr'] = config['custom_bdr']['bdr'][crmarker]
df_crmarker['fasta'] = config['custom_bdr']['fasta'][crmarker]
df_crmarker['projmarkrun'] = df_crmarker['projmarkrun'].astype(str) + '_custom'
dfpmr = pandas.concat([dfpmr, df_crmarker], ignore_index=True)
customprojmarkrun = dfpmr[dfpmr['projmarkrun'].str.contains('_custom')]['projmarkrun']
realprojmarkrun = customprojmarkrun.str.replace('_custom','')
d_ln_custom = { 'customprojmarkrun':customprojmarkrun, 'realprojmarkrun':realprojmarkrun }
df_ln_custom = pandas.DataFrame(d_ln_custom)
print(df_ln_custom)
customLnTsvFile = 'results/01_settings/customlinkprojtmarkrun.tsv'
df_ln_custom.to_csv (r'./'+customLnTsvFile, index = None, header = False, sep = '\t')
## display selected `projet`/`marker`/`run` with related information
print(dfpmr)
......@@ -350,25 +370,41 @@ rule flag_filtering_done:
output:
touch('results/00_flags/filtering.flag')
rule cat_samples_into_runs:
input:
'results/00_flags/filtering.flag'
output:
expand('results/13_cat_samples_into_runs/{projmarkrun}.fasta',projmarkrun=dfpmr['projmarkrun'])
shell:
'''
bash scripts/cat_samples_into_runs.sh
'''
if config['custom_bdr']['status']:
rule custom_bdr_cat_samples_into_runs:
input:
'results/00_flags/filtering.flag'
output:
expand('results/13_cat_samples_into_runs/{projmarkrun}.fasta',projmarkrun=dfpmr['projmarkrun'])
params:
cltf= customLnTsvFile
shell:
'''
bash scripts/cat_samples_into_runs.sh
bash scripts/link_custom_runs.sh {params.cltf}
'''
else:
rule cat_samples_into_runs:
input:
'results/00_flags/filtering.flag'
output:
expand('results/13_cat_samples_into_runs/{projmarkrun}.fasta',projmarkrun=dfpmr['projmarkrun'])
shell:
'''
bash scripts/cat_samples_into_runs.sh
'''
include: "rules/dereplicate_runs.smk"
include: "rules/taxonomic_assignment.smk"
if config['custom_bdr']['status']:
include: "rules/custom_taxonomic_assignment.smk"
else:
include: "rules/taxonomic_assignment.smk"
include: "rules/remove_annotations.smk"
include: "rules/sort_abundance_assigned_sequences.smk"
include: "rules/table_assigned_sequences.smk"
## copy and rename final results
rule flag_table_assigned_sequences:
input:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment