Commit 8c6bc474 authored by khalid's avatar khalid
Browse files

dada2 taxonomy assignation now use frogs formated dbs

parent 53905977
......@@ -81,6 +81,7 @@
# label: "Marker type",
# prompt: "Choose unite as taxonomy source if your marker type is ITS"
# },
{
name: dada2_taxonomypath,
type: select,
......
......@@ -7,7 +7,13 @@ rule <step_name>__dada2_assign_taxonomy:
params:
output_dir = config["results_dir"] + "/" + config["<step_name>__dada2_assign_taxonomy_output_dir"],
step_dir = config["<step_name>__dada2_assign_taxonomy_output_dir"],
taxonomypath = config["<step_name>__dada2_assign_taxonomy_taxonomypath"],
script = config["<step_name>__dada2_assign_taxonomy_script"]
#taxonomypath = config["<step_name>__dada2_assign_taxonomy_taxonomypath"],
taxonomypath = config["<step_name>__dada2_assign_taxonomy_reference_fasta"],
taxfile = config["<step_name>__dada2_assign_taxonomy_reference_fasta"]+"/train_set.fasta",
yamlfile = config["<step_name>__FROGS_assign_taxonomy_reference_fasta"]+"/info.yaml",
#OTUabundance = config["<step_name>__dada2_assign_taxonomy_abundance"],
richness = config["results_dir"] + "/" + config["<step_name>__dada2_assign_taxonomy_output_dir"] + "/richness_mqc.png",
family = config["results_dir"] + "/" + config["<step_name>__dada2_assign_taxonomy_output_dir"] + "/top20_family_mqc.png",
......@@ -19,4 +25,5 @@ rule <step_name>__dada2_assign_taxonomy:
config["results_dir"]+'/logs/' + config["<step_name>__dada2_assign_taxonomy_output_dir"] + '/dada2_assign_taxonomy_log.txt'
script:
config["<step_name>__dada2_assign_taxonomy_script"]
\ No newline at end of file
suppressMessages(library(dada2,quietly=T));print(paste0("dada2: ",packageVersion("dada2")))
suppressMessages(library(RcppParallel,quietly=T))
#this is adapted from dada2
#use comment="" to avoid problem in reading trainedset_db_taxid which contains # in some columns
MBBmakeTaxonomyFasta_RDP <- function(fin, fdb, fout, compress=TRUE) {
# Read in the fasta and pull out the taxonomy entries
sr <- ShortRead::readFasta(fin)
id <- as.character(gsub("\"", "", id(sr)))
tax <- sapply(strsplit(id, "\\t"), `[`, 2)
tax <- gsub("^Root;", "", tax)
tax <- strsplit(tax, ";")
# Get the names of the standard 6 taxonomic levels
db <- read.table(file=fdb, sep="*", comment.char="", stringsAsFactors=FALSE)
colnames(db) <- c("Index", "Name", "L", "R", "Level")
keep <- db$Name[db$Level %in% c("domain", "phylum", "class", "order", "family", "genus")]
# Cut down to just the 6 level taxonomy
tax <- sapply(tax, function(x) x[x %in% keep])
tax <- lapply(tax, paste, collapse = ";")
tax <- unlist(tax)
# Final formatting
tax <- paste0(tax, ";") # Ending semicolon
tax <- gsub("[^;]*_incertae_sedis;$", "", tax) # Uncertain lowest-level assignment is better to leave blank
tax <- gsub(" ", "_", tax)
# Write to disk
ShortRead::writeFasta(ShortRead(ShortRead::sread(sr), BStringSet::BStringSet(tax)), fout,
width=20000L, compress=compress)
}
######################
# Assign taxonomy
......@@ -20,17 +46,26 @@ top20family = snakemake@params[["family"]]
ordination = snakemake@params[["ordination"]]
tryRC = snakemake@params[["tryRC"]]
library(yaml)
yamlfile = paste0(Taxonomypath, "/info.yaml")
taxURL = read_yaml(yamlfile)$train_set
#http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/COI/BOLD_COI-5P/BOLD_COI-5P_022019.tar.gz to BOLD_COI-5P_022019.tar.gz
frogsfile = paste0(Taxonomypath, basename(taxURL))
marker = gsub(".tar.gz", "", basename(taxURL))
taxfile = paste0(Taxonomypath, "/train_set_",marker,".fa.gz")
speciesfile = paste0(Taxonomypath, "/assignment_",marker,".fa.gz")
taxfile = paste0(Taxonomypath, "/train_set.fa.gz")
speciesfile = paste0(Taxonomypath, "/assignment.fa.gz")
if ( ! file.exists(taxfile))
{
library(yaml)
yamlfile = paste0(Taxonomypath, "/info.yaml")
taxURL = read_yaml(yamlfile)$train_set
{
download.file(taxURL, frogsfile, "wget")
cmd = paste0("cd ",Taxonomypath," && tar -xzf ",frogsfile," && rm -f ", frogsfile);
system(cmd)
unzipedDir = gsub(".tar.gz", "", frogsfile)
MBBmakeTaxonomyFasta_RDP(paste0(unzipedDir, "/",marker,".fasta"), paste0(unzipedDir, "/",marker,".tax"), taxfile, compress=TRUE)
unlink(unzipedDir, recursive = TRUE, force = TRUE)
speciesURL = read_yaml(yamlfile)$species_assignment
download.file(taxURL, taxfile, "wget")
if(! is.null(speciesURL))
{ download.file(speciesURL, speciesfile, "wget") }
}
......
......@@ -40,6 +40,32 @@
step: 1,
label: "Number of threads to use",
},
{
name: dada2_assign_taxonomy_reference_fasta,
type: select,
choices: [
BOLD_COI-5P: "/FROGS_taxonomy/COI/BOLD_COI-5P",
MIDORI: "/FROGS_taxonomy/COI/MIDORI",
DAIRYdb: "/FROGS_taxonomy/DAIRYdb",
Diat.barcode: "/FROGS_taxonomy/Diat.barcode",
EZBioCloud: "/FROGS_taxonomy/EZBioCloud",
MaarjAM: "/FROGS_taxonomy/MaarjAM",
MiDas: "/FROGS_taxonomy/MiDas",
PHYMYCO_DB: "/FROGS_taxonomy/PHYMYCO_DB",
PR2: "/FROGS_taxonomy/PR2",
rbcL: "/FROGS_taxonomy/rbcL",
rpoB: "/FROGS_taxonomy/rpoB",
silva_138.1_16S: "/FROGS_taxonomy/SILVA/16S",
silva_138.1_18S: "/FROGS_taxonomy/SILVA/18S",
silva_138.1_23S: "/FROGS_taxonomy/SILVA/23S",
silva_138.1_28S: "/FROGS_taxonomy/SILVA/28S ",
silva_138.1_LSU: "/FROGS_taxonomy/SILVA/LSU",
silva_138.1_SSU: "/FROGS_taxonomy/SILVA/SSU",
Unite: "/FROGS_taxonomy/Unite",
],
value: "/FROGS_taxonomy/SILVA/16S",
label: "Taxonomy source"
},
{
name: dada2_assign_taxonomy_taxonomypath,
type: select,
......@@ -110,7 +136,8 @@
},
data: [
{
name: taxonomy,
#name: taxonomy,
name: FROGS_taxonomy,
type: directory
}
],
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment