Commit 36147d6f authored by peguerin's avatar peguerin
Browse files

reorganise files

parent 02887f7c
#===============================================================================
#HEADER
#===============================================================================
__author__ = "Pierre-Edouard Guerin"
__credits__ = ["Pierre-Edouard Guerin", "Virginie Marques"]
__license__ = "MIT"
__version__ = "1.1.4"
__maintainer__ = "Pierre-Edouard Guerin"
__email__ = "pierre-edouard.guerin@cefe.cnrs.fr"
__status__ = "Production"
"""
Codes for scientific papers related to metabarcoding studies
AUTHORS
=======
* Pierre-Edouard Guerin | pierre-edouard.guerin@cefe.cnrs.fr
* Virginie Marques | virginie.marques@cefe.cnrs.fr
* CNRS/CEFE, CNRS/MARBEC | Montpellier, France
* 2018-2020
DESCRIPTION
===========
This is a Snakefile using SNAKEMAKE workflow management system
From sample description files .dat files, config.yaml, rapidrun.tsv
it will return a demultiplexing.csv file. The output contains all required
wildcards en related information necessary to run the next workflow steps.
"""
###############################################################################
# MODULES
###############################################################################
import pandas
from Bio.Seq import Seq
###############################################################################
# FUNCTIONS
###############################################################################
## read a sample description .dat file and return a dataframe object
def read_dat(filedat):
dfdat = pandas.read_csv(filedat, sep="\t", header=None)
dfdat.columns=['experiment','plaque','barcode','primer5','primer3','F']
return dfdat
###############################################################################
# GLOBAL VARIABLES
###############################################################################
## check format (CLASSIC or RAPIDRUN)
if config['format'] == "CLASSIC":
print("CLASSIC data: one single marker for each run")
dfrClassic = pandas.DataFrame(columns=['plaque','run','sample','projet','marker'])
for run in config['fichiers']['dat']:
thisRunDatfile=config['fichiers']['dat'][run]
thisDat=read_dat(thisRunDatfile)
for index, datRow in thisDat.iterrows():
thisRow = {
"plaque": datRow['plaque'],
"run": run,
"sample": datRow['plaque'],
"projet": datRow['experiment'],
"marker": run
}
dfrClassic = dfrClassic.append(thisRow, ignore_index=True)
print(dfrClassic)
export_allsample = dfrClassic.to_csv (r'../results/01_settings/all_samples_classic.csv', index = None, header = False, sep = ';')
rapidrunfile="../results/01_settings/all_samples_classic.csv"
else:
print("RAPIDRUN data: many markers for many runs")
#configfile: "01_infos/config.yaml"
rapidrunfile = config['fichiers']['rapidrun']
#rapidrunfile="01_infos/all_samples.tsv"
## read 'rapidrun' .tsv file
dfr =pandas.read_csv(rapidrunfile, sep=";")
dfr.columns = ['plaque', 'run','sample','projet','marker']
## remove blacklisted projets
blacklistedProjets = config['blacklist']['projet']
dfrp= dfr[dfr.run.isin(blacklistedProjets) == False]
## remove blacklisted runs
blacklistedRuns = config['blacklist']['run']
dfrm= dfrp[dfrp.run.isin(blacklistedRuns) == False]
## get list of `run`, `projet` and `marker` wildcards
uniqRuns=dfrm.run.unique()
uniqProjets=dfrm.projet.unique()
uniqMarkers=dfrm.marker.unique()
###############################################################################
# MAIN
###############################################################################
## list of dataframe of `marker` sample description .dat file
all_dat= {}
for marker in uniqMarkers:
all_dat[marker]=read_dat(config['fichiers']['dat'][marker])
## init dataframe with all columns we want to output
dfMulti = pandas.DataFrame(columns=["demultiplex","projet", "marker","run", "plaque","sample","barcode5","barcode3","primer5","primer3","min_f","min_r","lenBarcode5","lenBarcode3"])
## fill the dataframe, each row belong to a `projet`/`marker`/`run`/`sample` wildcards combination
for run in uniqRuns:
for marker in uniqMarkers:
runMarkerDfrm=dfrm[(dfrm.run == run) & (dfrm.marker == marker)]
for plaque in runMarkerDfrm.plaque:
selectedRow=runMarkerDfrm[(runMarkerDfrm.plaque == plaque)]
projet=selectedRow['projet'].values[0]
sample=selectedRow['sample'].values[0]
plaque_dat=all_dat[marker][(all_dat[marker].plaque == plaque)]
barcode5=plaque_dat["barcode"].values[0]
barcode3=str(Seq(barcode5).reverse_complement())
primer5=plaque_dat["primer5"].values[0]
#primer3=str(Seq(plaque_dat["primer3"].values[0]).reverse_complement())
## attention ici on utilise bien le .dat de ngsfilter
primer3=plaque_dat["primer3"].values[0]
lenBarcode5=len(barcode5)
lenBarcode3=len(barcode3)
min_f=int(int(len(primer5))*2/3)
min_r=int(int(len(primer3))*2/3)
sa_fastq=projet+"_"+marker+"/"+sample+".fastq"
ru_fastq=run+".fastq"
#print(marker, run, plaque, projet,sample)
#print(ru_fastq)
#print(sa_fastq)
#print(barcode5, barcode3, primer5, primer3)
#print(min_f, min_r, lenBarcode5,lenBarcode3)
#print("========================")
demultiplex=projet+"/"+marker+"/"+run+"/"+sample
projmarkrun=projet+"/"+marker+"/"+run
thisRow= {
"demultiplex": demultiplex,
"projmarkrun": projmarkrun,
"projet": projet,
"marker": marker,
"run": run,
"plaque": plaque,
"sample": sample,
"barcode5": barcode5,
"barcode3": barcode3,
"primer5": primer5,
"primer3": primer3,
"min_f": min_f,
"min_r": min_r,
"lenBarcode5": lenBarcode5,
"lenBarcode3": lenBarcode3,
}
#print(thisRow)
dfMulti = dfMulti.append( thisRow, ignore_index=True)
## write the demultiplexing .csv file
export_csv = dfMulti.to_csv (r'../results/01_settings/all_demultiplex.csv', index = None, header=True)
## print an overview of the dataframe we wrote
print (dfMulti)
#===============================================================================
#HEADER
#===============================================================================
__author__ = "Pierre-Edouard Guerin"
__credits__ = ["Pierre-Edouard Guerin", "Virginie Marques"]
__license__ = "MIT"
__version__ = "1.1.4"
__maintainer__ = "Pierre-Edouard Guerin"
__email__ = "pierre-edouard.guerin@cefe.cnrs.fr"
__status__ = "Production"
"""
Codes for scientific papers related to metabarcoding studies
AUTHORS
=======
* Pierre-Edouard Guerin | pierre-edouard.guerin@cefe.cnrs.fr
* Virginie Marques | virginie.marques@cefe.cnrs.fr
* CNRS/CEFE, CNRS/MARBEC | Montpellier, France
* 2018-2020
DESCRIPTION
===========
This is a Snakefile using SNAKEMAKE workflow management system
From config.yaml rapidrun.tsv
it runs 2 rules :
1. Align and merge paired-end reads
2. Remove unaligned sequence records
Results are stored into results/02_assembly/
"""
###############################################################################
# MODULES
###############################################################################
import pandas
import os.path
###############################################################################
# GLOBAL VARIABLES
###############################################################################
#configfile: "01_infos/config.yaml"
#rapidrunfile = "../"+config['fichiers']['rapidrun']
if config['format'] != "CLASSIC":
rapidrunfile = config['fichiers']['rapidrun']
else:
rapidrunfile="../results/01_settings/all_samples_classic.csv"
if os.path.isfile(rapidrunfile) is not True:
raise Exception("ERROR: "+rapidrunfile+" is not a file. You must run step 01_settings first in order to generate this file for the CLASSIC format.")
## read the rapidrun file as dataframe
dfr =pandas.read_csv(rapidrunfile, sep=";")
dfr.columns = ['plaque', 'run','sample','projet','marker']
## remove blacklisted projets
blacklistedProjets = config['blacklist']['projet']
dfrp= dfr[dfr.run.isin(blacklistedProjets) == False]
## remove blacklisted runs
blacklistedRuns = config['blacklist']['run']
dfrm= dfrp[dfrp.run.isin(blacklistedRuns) == False]
## names of `run` wildcards
uniqRuns=dfrm.run.unique()
## set number of chunks
listChunks = [*range(1,(config['illuminapairedend']['nb_chunk']+1))]
###############################################################################
# RULES
###############################################################################
## scatters fastq
if config['illuminapairedend']['nb_chunk'] != 0:
rule all:
input:
expand("{folder}{run}_R1.fastq.gz", run=uniqRuns,folder=config["fichiers"]["folder_fastq"]),
expand('../results/02_assembly/01_illuminapairedend/{run}_{chunk}.fastq', run=uniqRuns, chunk=listChunks),
expand('../results/02_assembly/01_illuminapairedend/{run}.fastq', run=uniqRuns),
expand('../results/02_assembly/02_remove_unaligned/{run}.ali.fastq', run=uniqRuns),
expand('../logs/02_assembly/02_remove_unaligned/{run}.log',run=uniqRuns),
expand('../logs/02_assembly/01_illuminapairedend/{run}_{chunk}.log',run=uniqRuns, chunk=listChunks)
ruleorder: split_fastq > illuminapairedend > merge_chunks > remove_unaligned
include: "rules/split_fastq.smk"
include: "rules/chunk_illuminapairedend.smk"
include: "rules/remove_unaligned.smk"
## keep original fastq files
else:
rule all:
input:
expand("{folder}{run}_R1.fastq.gz", run=uniqRuns,folder=config["fichiers"]["folder_fastq"]),
expand('../results/02_assembly/01_illuminapairedend/{run}.fastq', run=uniqRuns),
expand('../results/02_assembly/02_remove_unaligned/{run}.ali.fastq', run=uniqRuns),
expand('../logs/02_assembly/02_remove_unaligned/{run}.log',run=uniqRuns),
expand('../logs/02_assembly/01_illuminapairedend/{run}.log',run=uniqRuns)
include: "rules/illuminapairedend.smk"
include: "rules/remove_unaligned.smk"
name: obitools
channels:
- conda-forge
- bioconda
dependencies:
- virtualenv=16.7.5
- sphinx
- cython=0.29.13
- obitools
#===============================================================================
#HEADER
#===============================================================================
__author__ = "Pierre-Edouard Guerin"
__credits__ = ["Pierre-Edouard Guerin", "Virginie Marques"]
__license__ = "MIT"
__version__ = "1.1.4"
__maintainer__ = "Pierre-Edouard Guerin"
__email__ = "pierre-edouard.guerin@cefe.cnrs.fr"
__status__ = "Production"
"""
Codes for scientific papers related to metabarcoding studies
AUTHORS
=======
* Pierre-Edouard Guerin | pierre-edouard.guerin@cefe.cnrs.fr
* Virginie Marques | virginie.marques@cefe.cnrs.fr
* CNRS/CEFE, CNRS/MARBEC | Montpellier, France
* 2018-2020
DESCRIPTION
===========
This is a Snakefile using SNAKEMAKE workflow management system
From config.yaml, rapidrun.tsv
From `run` merged sequences fastq
it will performs following rules:
1. Assign sequence to a `sample`
2. split `run` file into `run`/`sample` files
"""
###############################################################################
# MODULES
###############################################################################
import pandas
import glob
import os
###############################################################################
# FUNCTIONS
###############################################################################
## join all the element of columns named "*cols" of the dataframe "df" into
## a string separated by "sep". It returns a list of joined columns elements
## as a string.
def str_join(df, sep, *cols):
from functools import reduce
return reduce(lambda x, y: x.astype(str).str.cat(y.astype(str), sep=sep), [df[col] for col in cols])
###############################################################################
# GLOBAL VARIABLES
###############################################################################
#configfile: "01_infos/config.yaml"
## load demultiplexing dataframe
dfMulti =pandas.read_csv("../results/01_settings/all_demultiplex.csv", sep=",")
## load rapidrun.tsv file
if config['format'] != "CLASSIC":
rapidrunfile = config['fichiers']['rapidrun']
else:
rapidrunfile="../results/01_settings/all_samples_classic.csv"
if os.path.isfile(rapidrunfile) is not True:
raise Exception("ERROR: "+rapidrunfile+" is not a file. You must run step 01_settings first in order to generate this file for the CLASSIC format.")
dfr =pandas.read_csv(rapidrunfile, sep=";")
dfr.columns = ['plaque', 'run','sample','projet','marker']
## remove blacklisted projets
blacklistedProjets = config['blacklist']['projet']
dfrp= dfr[dfr.run.isin(blacklistedProjets) == False]
## remove blacklisted runs
blacklistedRuns = config['blacklist']['run']
dfrm= dfrp[dfrp.run.isin(blacklistedRuns) == False]
#dfrm=dfr
## keep only marker and run information for demultiplexing
dfRunMarker=dfrm[['marker','run']]
dfRunMarker['runMarker']=str_join(dfrm, '/', 'marker', 'run')
dfRunMarker['projMarker']=str_join(dfrm, '/', 'projet', 'marker')
dfRunMarker=dfRunMarker.drop_duplicates()
#dfRunMarker['dat']=dfRunMarker['marker'].map(dicMark)
#dfRunMarker['dat']=dfRunMarker['marker'].map(config['fichiers']['dat'])
with pandas.option_context('display.max_rows', None, 'display.max_columns', None, 'max_colwidth', 1800):
print(dfRunMarker)
listRunMarker = list(dfRunMarker.runMarker)
#dfMulti=dfMulti[(dfMulti.projet != 'Guadeloupe')]
dfRunMarker['projMarkRun'] = dfRunMarker['projMarker']+"/"+dfRunMarker['run']
## write .dat files for ngsfilter for each projet/marker
for projmarkerrun in dfRunMarker['projMarkRun'].unique():
thisProj = projmarkerrun.split("/")[0]
thisMarker = projmarkerrun.split("/")[1]
thisRun = projmarkerrun.split("/")[2]
fileName = "../results/03_demultiplex/00_dat/" + str(thisProj) + "_" + str(thisMarker) + "_" + str(thisRun) + ".tsv"
dfprojmarkerrunMulti = dfMulti[(dfMulti.projet == thisProj) & (dfMulti.marker == thisMarker) & (dfMulti.run == thisRun)]
if dfprojmarkerrunMulti.empty == False:
print("Writing sample description files for project and marker", thisProj, thisMarker, ":", fileName)
thisExp = list(dfprojmarkerrunMulti['projet'])
thisSample = list(dfprojmarkerrunMulti['sample'])
thisTags = list(dfprojmarkerrunMulti['barcode5'])
ThisForward_primer = list(dfprojmarkerrunMulti['primer5'])
ThisReverse_primer = list(dfprojmarkerrunMulti['primer3'])
This_extra_info = list(pandas.Series(["F"]).repeat(len(thisExp)))
thisDat = { '#exp': thisExp, 'sample': thisSample, 'tags': thisTags, 'forward_primer': ThisForward_primer, 'reverse_primer': ThisReverse_primer, 'extra_information': This_extra_info }
dfDat = pandas.DataFrame(thisDat, columns = ['#exp', 'sample', 'tags', 'forward_primer', 'reverse_primer', 'extra_information'])
dfDat = dfDat.drop_duplicates()
#print(dfDat)
## check if files already exists
files_present = glob.glob(fileName)
if files_present:
os.remove(fileName)
dfDat.to_csv(fileName, index=False, sep="\t")
else:
print("Writing sample description files for project and marker", thisProj, thisMarker, ":", "WARNING no information about it, check the file", rapidrunfile, "or blacklist this project")
dfRunMarker['dat'] = ["../results/03_demultiplex/00_dat/"+ele.replace('/','_')+".tsv" for ele in dfRunMarker['projMarkRun']]
###############################################################################
# RULES
###############################################################################
rule all:
input:
expand('../results/03_demultiplex/01_assign_sequences/{demultiplex}.ali.assigned.fastq', demultiplex=dfRunMarker.projMarkRun),
expand('../results/03_demultiplex/01_assign_sequences/{demultiplex}.unidentified.fastq', demultiplex=dfRunMarker.projMarkRun),
expand('../logs/03_demultiplex/01_assign_sequences/{demultiplex}.log', demultiplex=dfRunMarker.projMarkRun),
expand('../logs/03_demultiplex/02_split_sequences/{demultiplex}.log', demultiplex=dfRunMarker.projMarkRun)
include: "rules/assign_sequences.smk"
include: "rules/split_sequences.smk"
#===============================================================================
#HEADER
#===============================================================================
__author__ = "Pierre-Edouard Guerin"
__credits__ = ["Pierre-Edouard Guerin", "Virginie Marques"]
__license__ = "MIT"
__version__ = "1.1.4"
__maintainer__ = "Pierre-Edouard Guerin"
__email__ = "pierre-edouard.guerin@cefe.cnrs.fr"
__status__ = "Production"
"""
Codes for scientific papers related to metabarcoding studies
AUTHORS
=======
* Pierre-Edouard Guerin | pierre-edouard.guerin@cefe.cnrs.fr
* Virginie Marques | virginie.marques@cefe.cnrs.fr
* CNRS/CEFE, CNRS/MARBEC | Montpellier, France
* 2018-2020
DESCRIPTION
===========
This is a Snakefile using SNAKEMAKE workflow management system
From config.yaml, demultiplexing.csv
From results/03_demultiplex/02_raw/`projet`/`marker`/`run`/`sample`.fasta
it will performs following rules:
1. dereplicate sequences at `sample` level
2. filters sequences with wrong length or low coverage or IUAPC ambiguity
3. detect PCR clones
4. remove PCR clones
"""
###############################################################################
# MODULES
###############################################################################
import pandas
import os.path
###############################################################################
# GLOBAL VARIABLES
###############################################################################
#configfile: "../config.yaml"
## load demultiplexing dataframe
dfMulti =pandas.read_csv("../results/01_settings/all_demultiplex.csv", sep=",")
## check demultiplexing results
dfMultiChecked = dfMulti
for thisDemultiplex in dfMulti.demultiplex:
file_sample = "../results/03_demultiplex/02_raw/"+thisDemultiplex+".fasta"
if not os.path.exists(file_sample):
print("WARNING: ", file_sample," not found. We removed it from this analysis.")
dfMultiChecked = dfMultiChecked[dfMultiChecked.demultiplex != thisDemultiplex]
print(dfMultiChecked)
###############################################################################
# RULES
###############################################################################
rule all:
input:
expand('../results/04_filter_samples/01_dereplicated/{demultiplexs}.uniq.fasta', demultiplexs=dfMultiChecked['demultiplex']),
expand('../results/04_filter_samples/02_goodlength/{demultiplexs}.l.u.fasta', demultiplexs=dfMultiChecked['demultiplex']),
expand('../results/04_filter_samples/03_clean_pcrerr/{demultiplexs}.r.l.u.fasta', demultiplexs=dfMultiChecked['demultiplex']),
expand('../results/04_filter_samples/04_filtered/{demultiplexs}.c.r.l.u.fasta', demultiplexs=dfMultiChecked['demultiplex']),
expand('../logs/04_filter_samples/01_dereplicated/{demultiplexs}.log', demultiplexs=dfMultiChecked['demultiplex']),
expand('../logs/04_filter_samples/02_goodlength/{demultiplexs}.log', demultiplexs=dfMultiChecked['demultiplex']),
expand('../logs/04_filter_samples/03_clean_pcrerr/{demultiplexs}.log', demultiplexs=dfMultiChecked['demultiplex']),
expand('../logs/04_filter_samples/04_filtered/{demultiplexs}.log', demultiplexs=dfMultiChecked['demultiplex'])
include: "rules/dereplicate_samples.smk"
include: "rules/goodlength_samples.smk"
include: "rules/clean_pcrerr_samples.smk"
include: "rules/rm_internal_samples.smk"
name: obitools
channels:
- conda-forge
- bioconda
dependencies:
- virtualenv=16.7.5
- sphinx
- cython=0.29.13
- obitools
#===============================================================================
#HEADER
#===============================================================================
__author__ = "Pierre-Edouard Guerin"
__credits__ = ["Pierre-Edouard Guerin", "Virginie Marques"]
__license__ = "MIT"
__version__ = "1.1.4"
__maintainer__ = "Pierre-Edouard Guerin"
__email__ = "pierre-edouard.guerin@cefe.cnrs.fr"
__status__ = "Production"
"""
Codes for scientific papers related to metabarcoding studies
AUTHORS
=======
* Pierre-Edouard Guerin | pierre-edouard.guerin@cefe.cnrs.fr
* Virginie Marques | virginie.marques@cefe.cnrs.fr
* CNRS/CEFE, CNRS/MARBEC | Montpellier, France
* 2018-2020
DESCRIPTION
===========
This is a Snakefile using SNAKEMAKE workflow management system
From config.yaml, demultiplexing.csv
From results/05_assignment/01_runs/`projet`/`marker`/`run`.fasta
it will performs following rules:
1. Assign each sequence to a taxon
2. Dereplicate and merge samples together
3. Some unuseful attributes can be removed at this stage
4. The sequences can be sorted by decreasing order of count
5. Generate a final results/06_final_tables/`projet`/`marker`/`run`.csv tables
"""
#===============================================================================
#MODULES
#===============================================================================
import pandas
import os.path
#===============================================================================
#GLOBAL VARIABLES
#===============================================================================
## load demultiplexing dataframe
dfMulti =pandas.read_csv("../results/01_settings/all_demultiplex.csv", sep=",")
projetMarkerRuns = dfMulti[['projmarkrun','marker','projet']].drop_duplicates()
dfpmr = projetMarkerRuns
## remove `projet`/`marker`/`run` wildcards with empty files from this analysis
for pmr in projetMarkerRuns.projmarkrun:
file_sample = "../results/05_assignment/01_runs/"+pmr+".fasta"
if not os.path.exists(file_sample):
print("WARNING: ", file_sample," not found. We removed it from this analysis.")
dfpmr = dfpmr[dfpmr.projmarkrun != pmr]
## attribute sample description files to each row with corresponding `marker`
if config['format'] == "CLASSIC":
thisMarker=str(list(config["assign_taxon"]["bdr"])[0])
markerDic = {}
for dmarker in dfpmr['marker']:
markerDic[dmarker] = thisMarker
dfpmr['bdr'] = dfpmr['marker'].map(markerDic).map(config["assign_taxon"]["bdr"])
dfpmr['fasta'] = dfpmr['marker'].map(markerDic).map(config["assign_taxon"]["fasta"])
else:
dfpmr['bdr'] = dfpmr['marker'].map(config["assign_taxon"]["bdr"])
dfpmr['fasta'] = dfpmr['marker'].map(config["assign_taxon"]["fasta"])
## display selected `projet`/`marker`/`run` with related information
print(dfpmr)
###############################################################################
# RULES
###############################################################################
rule all:
input:
expand('../results/05_assignment/01_runs/{run}.fasta',run=dfpmr['projmarkrun']),
expand('../results/05_assignment/02_dereplicated/{run}.uniq.fasta', run=dfpmr['projmarkrun']),
expand('../results/05_assignment/03_assigned/{run}.tag.u.fasta', run=dfpmr['projmarkrun']),
expand('../results/05_assignment/04_formated/{run}.a.t.u.fasta', run=dfpmr['projmarkrun']),
expand('../results/05_assignment/04_formated/{run}.s.a.t.u.fasta', run=dfpmr['projmarkrun']),
expand('../results/06_final_tables/{run}.csv', run=dfpmr['projmarkrun']),
expand('../logs/05_assignment/02_dereplicated/{run}.log', run=dfpmr['projmarkrun']),
expand('../logs/05_assignment/03_assign_taxon/{run}.log', run=dfpmr['projmarkrun']),
expand('../logs/05_assignment/04_rm_attributes/{run}.log', run=dfpmr['projmarkrun']),
expand('../logs/05_assignment/05_sort_runs/{run}.log', run=dfpmr['projmarkrun']),
expand('../logs/05_assignment/06_table_runs/{run}.log', run=dfpmr['projmarkrun'])
include: "rules/dereplicate_runs.smk"
include: "rules/assign_taxon.smk"
include: "rules/rm_attributes.smk"
include: "rules/sort_runs.smk"
include: "rules/table_runs.smk"
name: obitools
channels:
- conda-forge
- bioconda
dependencies:
- virtualenv=16.7.5
- sphinx
- cython=0.29.13
- obitools
......@@ -3,10 +3,12 @@ __license__ = "MIT"
### Assign each sequence record to the corresponding sample/marker combination
rule assign_sequences:
rule assign_marker_sample_to_sequence:
input:
'results/05_demultiplex_flags/{demultiplex}.flag'
output:
assign='../results/03_demultiplex/01_assign_sequences/{demultiplex}.ali.assigned.fastq',
unid='../results/03_demultiplex/01_assign_sequences/{demultiplex}.unidentified.fastq'
assign='results/06_assign_marker_sample_to_sequence/{demultiplex}.ali.assigned.fastq',
unid='results/06_assign_marker_sample_to_sequence/{demultiplex}.unidentified.fastq'
conda:
'../envs/obitools_envs.yaml'
singularity:
......@@ -14,7 +16,7 @@ rule assign_sequences:
params:
dmulti= lambda wildcards: dfRunMarker[dfRunMarker.projMarkRun == wildcards.demultiplex].to_dict('records')[0],
log:
'../logs/03_demultiplex/01_assign_sequences/{demultiplex}.log'
'logs/06_assign_marker_sample_to_sequence/{demultiplex}.log'
shell: