Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mbb
waw
Commits
8c6bc474
Commit
8c6bc474
authored
May 13, 2022
by
khalid
Browse files
dada2 taxonomy assignation now use frogs formated dbs
parent
53905977
Changes
4
Hide whitespace changes
Inline
Side-by-side
tools/dada2/dada2.yaml
View file @
8c6bc474
...
...
@@ -81,6 +81,7 @@
# label: "Marker type",
# prompt: "Choose unite as taxonomy source if your marker type is ITS"
# },
{
name
:
dada2_taxonomypath
,
type
:
select
,
...
...
tools/dada2_assign_taxonomy/dada2_assign_taxonomy.rule.snakefile
View file @
8c6bc474
...
...
@@ -7,7 +7,13 @@ rule <step_name>__dada2_assign_taxonomy:
params:
output_dir = config["results_dir"] + "/" + config["<step_name>__dada2_assign_taxonomy_output_dir"],
step_dir = config["<step_name>__dada2_assign_taxonomy_output_dir"],
taxonomypath = config["<step_name>__dada2_assign_taxonomy_taxonomypath"],
script = config["<step_name>__dada2_assign_taxonomy_script"]
#taxonomypath = config["<step_name>__dada2_assign_taxonomy_taxonomypath"],
taxonomypath = config["<step_name>__dada2_assign_taxonomy_reference_fasta"],
taxfile = config["<step_name>__dada2_assign_taxonomy_reference_fasta"]+"/train_set.fasta",
yamlfile = config["<step_name>__FROGS_assign_taxonomy_reference_fasta"]+"/info.yaml",
#OTUabundance = config["<step_name>__dada2_assign_taxonomy_abundance"],
richness = config["results_dir"] + "/" + config["<step_name>__dada2_assign_taxonomy_output_dir"] + "/richness_mqc.png",
family = config["results_dir"] + "/" + config["<step_name>__dada2_assign_taxonomy_output_dir"] + "/top20_family_mqc.png",
...
...
@@ -19,4 +25,5 @@ rule <step_name>__dada2_assign_taxonomy:
config["results_dir"]+'/logs/' + config["<step_name>__dada2_assign_taxonomy_output_dir"] + '/dada2_assign_taxonomy_log.txt'
script:
config["<step_name>__dada2_assign_taxonomy_script"]
\ No newline at end of file
tools/dada2_assign_taxonomy/dada2_assign_taxonomy.script.R
View file @
8c6bc474
suppressMessages
(
library
(
dada2
,
quietly
=
T
));
print
(
paste0
(
"dada2: "
,
packageVersion
(
"dada2"
)))
suppressMessages
(
library
(
RcppParallel
,
quietly
=
T
))
#this is adapted from dada2
#use comment="" to avoid problem in reading trainedset_db_taxid which contains # in some columns
MBBmakeTaxonomyFasta_RDP
<-
function
(
fin
,
fdb
,
fout
,
compress
=
TRUE
)
{
# Read in the fasta and pull out the taxonomy entries
sr
<-
ShortRead
::
readFasta
(
fin
)
id
<-
as.character
(
gsub
(
"\""
,
""
,
id
(
sr
)))
tax
<-
sapply
(
strsplit
(
id
,
"\\t"
),
`[`
,
2
)
tax
<-
gsub
(
"^Root;"
,
""
,
tax
)
tax
<-
strsplit
(
tax
,
";"
)
# Get the names of the standard 6 taxonomic levels
db
<-
read.table
(
file
=
fdb
,
sep
=
"*"
,
comment.char
=
""
,
stringsAsFactors
=
FALSE
)
colnames
(
db
)
<-
c
(
"Index"
,
"Name"
,
"L"
,
"R"
,
"Level"
)
keep
<-
db
$
Name
[
db
$
Level
%in%
c
(
"domain"
,
"phylum"
,
"class"
,
"order"
,
"family"
,
"genus"
)]
# Cut down to just the 6 level taxonomy
tax
<-
sapply
(
tax
,
function
(
x
)
x
[
x
%in%
keep
])
tax
<-
lapply
(
tax
,
paste
,
collapse
=
";"
)
tax
<-
unlist
(
tax
)
# Final formatting
tax
<-
paste0
(
tax
,
";"
)
# Ending semicolon
tax
<-
gsub
(
"[^;]*_incertae_sedis;$"
,
""
,
tax
)
# Uncertain lowest-level assignment is better to leave blank
tax
<-
gsub
(
" "
,
"_"
,
tax
)
# Write to disk
ShortRead
::
writeFasta
(
ShortRead
(
ShortRead
::
sread
(
sr
),
BStringSet
::
BStringSet
(
tax
)),
fout
,
width
=
20000L
,
compress
=
compress
)
}
######################
# Assign taxonomy
...
...
@@ -20,17 +46,26 @@ top20family = snakemake@params[["family"]]
ordination
=
snakemake
@
params
[[
"ordination"
]]
tryRC
=
snakemake
@
params
[[
"tryRC"
]]
library
(
yaml
)
yamlfile
=
paste0
(
Taxonomypath
,
"/info.yaml"
)
taxURL
=
read_yaml
(
yamlfile
)
$
train_set
#http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/COI/BOLD_COI-5P/BOLD_COI-5P_022019.tar.gz to BOLD_COI-5P_022019.tar.gz
frogsfile
=
paste0
(
Taxonomypath
,
basename
(
taxURL
))
marker
=
gsub
(
".tar.gz"
,
""
,
basename
(
taxURL
))
taxfile
=
paste0
(
Taxonomypath
,
"/train_set_"
,
marker
,
".fa.gz"
)
speciesfile
=
paste0
(
Taxonomypath
,
"/assignment_"
,
marker
,
".fa.gz"
)
taxfile
=
paste0
(
Taxonomypath
,
"/train_set.fa.gz"
)
speciesfile
=
paste0
(
Taxonomypath
,
"/assignment.fa.gz"
)
if
(
!
file.exists
(
taxfile
))
{
library
(
yaml
)
yamlfile
=
paste0
(
Taxonomypath
,
"/info.yaml"
)
taxURL
=
read_yaml
(
yamlfile
)
$
train_set
{
download.file
(
taxURL
,
frogsfile
,
"wget"
)
cmd
=
paste0
(
"cd "
,
Taxonomypath
,
" && tar -xzf "
,
frogsfile
,
" && rm -f "
,
frogsfile
);
system
(
cmd
)
unzipedDir
=
gsub
(
".tar.gz"
,
""
,
frogsfile
)
MBBmakeTaxonomyFasta_RDP
(
paste0
(
unzipedDir
,
"/"
,
marker
,
".fasta"
),
paste0
(
unzipedDir
,
"/"
,
marker
,
".tax"
),
taxfile
,
compress
=
TRUE
)
unlink
(
unzipedDir
,
recursive
=
TRUE
,
force
=
TRUE
)
speciesURL
=
read_yaml
(
yamlfile
)
$
species_assignment
download.file
(
taxURL
,
taxfile
,
"wget"
)
if
(
!
is.null
(
speciesURL
))
{
download.file
(
speciesURL
,
speciesfile
,
"wget"
)
}
}
...
...
tools/dada2_assign_taxonomy/dada2_assign_taxonomy.yaml
View file @
8c6bc474
...
...
@@ -40,6 +40,32 @@
step
:
1
,
label
:
"
Number
of
threads
to
use"
,
},
{
name
:
dada2_assign_taxonomy_reference_fasta
,
type
:
select
,
choices
:
[
BOLD_COI-5P
:
"
/FROGS_taxonomy/COI/BOLD_COI-5P"
,
MIDORI
:
"
/FROGS_taxonomy/COI/MIDORI"
,
DAIRYdb
:
"
/FROGS_taxonomy/DAIRYdb"
,
Diat.barcode
:
"
/FROGS_taxonomy/Diat.barcode"
,
EZBioCloud
:
"
/FROGS_taxonomy/EZBioCloud"
,
MaarjAM
:
"
/FROGS_taxonomy/MaarjAM"
,
MiDas
:
"
/FROGS_taxonomy/MiDas"
,
PHYMYCO_DB
:
"
/FROGS_taxonomy/PHYMYCO_DB"
,
PR2
:
"
/FROGS_taxonomy/PR2"
,
rbcL
:
"
/FROGS_taxonomy/rbcL"
,
rpoB
:
"
/FROGS_taxonomy/rpoB"
,
silva_138.1_16S
:
"
/FROGS_taxonomy/SILVA/16S"
,
silva_138.1_18S
:
"
/FROGS_taxonomy/SILVA/18S"
,
silva_138.1_23S
:
"
/FROGS_taxonomy/SILVA/23S"
,
silva_138.1_28S
:
"
/FROGS_taxonomy/SILVA/28S
"
,
silva_138.1_LSU
:
"
/FROGS_taxonomy/SILVA/LSU"
,
silva_138.1_SSU
:
"
/FROGS_taxonomy/SILVA/SSU"
,
Unite
:
"
/FROGS_taxonomy/Unite"
,
],
value
:
"
/FROGS_taxonomy/SILVA/16S"
,
label
:
"
Taxonomy
source"
},
{
name
:
dada2_assign_taxonomy_taxonomypath
,
type
:
select
,
...
...
@@ -110,7 +136,8 @@
},
data
:
[
{
name
:
taxonomy
,
#name: taxonomy,
name
:
FROGS_taxonomy
,
type
:
directory
}
],
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment