Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mbb
waw
Commits
12d034b9
Commit
12d034b9
authored
May 11, 2022
by
khalid
Browse files
Add lulu for post clustering curation
parent
277d26f4
Changes
2
Hide whitespace changes
Inline
Side-by-side
tools/lulu/lulu.rule.snakefile
0 → 100755
View file @
12d034b9
rule <step_name>__lulu:
input:
**<step_name>__lulu_inputs(),
output:
curated_table = config["results_dir"] + "/" + config["<step_name>__lulu_output_dir"] + "/curated_table.tsv",
otu_map = config["results_dir"] + "/" + config["<step_name>__lulu_output_dir"] + "/otu_map.tsv",
params:
output_dir = config["results_dir"] + "/" + config["<step_name>__lulu_output_dir"]+ "/",
methode = config["<step_name>__lulu_matcher_method"],
minimum_ratio = config["<step_name>__lulu_minimum_ratio"],
minimum_match = config["<step_name>__lulu_minimum_match"],
log:
config["results_dir"] + "/logs/" + config["<step_name>__lulu_output_dir"] + "/lulu_log.txt"
shell:
"""
if [ "{params.methode}" == "blast" ]
then
makeblastdb -in {input.sequence_file} -parse_seqids -dbtype nucl;
blastn -db {input.sequence_file} -outfmt '6 qseqid sseqid pident' -out {params.output_dir}/match_list.txt -qcov_hsp_perc 80 -perc_identity 84 -query {input.sequence_file} ;
else
vsearch --usearch_global {input.sequence_file} --db {input.sequence_file} --self --id .84 --iddef 1 --userout {params.output_dir}/match_list.txt -userfields query+target+id --maxaccepts 0 --query_cov .9 --maxhits 10
fi;
otutab={input.abundance_file};
matchlist={params.output_dir}/match_list.txt ;
Rscript --no-save - "{params.minimum_ratio}" "{params.minimum_match}" "{output.curated_table}" "{output.otu_map}" <<-E0F |& tee {log}
library(lulu)
cat("R loaded")
args <- commandArgs(TRUE)
cat("Script args are:")
args
otutab <- read.csv("$otutab",sep='\\t',header=TRUE,as.is=TRUE, row.names = 1)
matchlist <- read.table("$matchlist", header=FALSE,as.is=TRUE, stringsAsFactors=FALSE)
curated_result <- lulu(otutab, matchlist, minimum_ratio_type = "min", minimum_ratio = as.numeric(args[1]), minimum_match = as.numeric(args[2]), minimum_relative_cooccurence = 0.95)
write.table(curated_result\$curated_table, file=args[3] ,append = FALSE, quote = FALSE, sep = "\t", dec = ".", row.names = TRUE, col.names = NA)
write.table(curated_result\$otu_map, file=args[4] ,append = FALSE, quote = FALSE, sep = "\t", dec = ".", row.names = TRUE, col.names = NA)
E0F
"""
tools/lulu/lulu.yaml
0 → 100644
View file @
12d034b9
{
id
:
lulu
,
name
:
lulu post clustering
,
article
:
"
https://www.nature.com/articles/s41467-017-01312-x"
,
website
:
"
https://github.com/tobiasgf/lulu"
,
git
:
"
https://github.com/tobiasgf/lulu"
,
description
:
"
A
r-package
for
distribution
based
post
clustering
curation
of
amplicon
data."
,
version
:
"
latest"
,
documentation
:
"
https://github.com/tobiasgf/lulu"
,
multiqc
:
"
custom"
,
commands
:
[
{
name
:
lulu
,
cname
:
"
lulu
post
clustering"
,
command
:
"
"
,
category
:
"
metabarcoding"
,
output_dir
:
lulu
,
inputs
:
[
{
name
:
"
abundance_file"
,
type
:
"
tsv"
,
file
:
"
abundance.tsv"
,
description
:
"
OTU
table
with
samples
as
columns
and
OTUs
as
rows,
with
unique
OTU
id's
as
row
names"
},
{
name
:
sequence_file
,
type
:
"
fasta_file"
,
file
:
"
OTU_seqs.fasta"
,
description
:
"
OTU
sequences"
},
],
outputs
:
[
{
name
:
curated_table
,
type
:
"
tsv"
,
file
:
"
curated_table.tsv"
,
description
:
"
Table
of
retained
OTUs"
},
{
name
:
otu_map
,
type
:
"
tsv"
,
file
:
"
otu_map.tsv"
,
description
:
"
Table
of
information
of
which
daughters
were
mapped
to
which
parents
OTUs"
},
],
options
:
[
{
name
:
lulu_matcher_method
,
type
:
radio
,
value
:
"
blast"
,
choices
:
[
blast
:
blast
,
vsearch
:
vsearch
],
label
:
"
tool
for
pair
wise
matching
of
OTU
sequences"
,
},
{
name
:
lulu_minimum_ratio
,
type
:
numeric
,
value
:
1
,
min
:
0
,
max
:
1
,
step
:
0.01
,
label
:
"
minimim
abundance
ratio
between
a
potential
error
and
a
potential
parent"
,
},
{
name
:
lulu_minimum_match
,
type
:
numeric
,
value
:
84
,
min
:
0
,
max
:
100
,
step
:
1
,
label
:
"
minimum
threshold
(%)of
sequence
similarity
for
considering
any
OTU
as
an
error
of
another"
,
}
],
},
],
install
:
{
lulu
:
[
"
Rscript
-e
'devtools::install_github(
\"
tobiasgf/lulu
\"
)'
"
],
VSEARCH
:
[
"
cd
/opt/biotools/"
,
"
wget
https://github.com/torognes/vsearch/releases/download/v2.21.1/vsearch-2.21.1-linux-x86_64.tar.gz"
,
"
tar
-zxvf
vsearch-2.21.1-linux-x86_64.tar.gz"
,
"
mv
vsearch-2.21.1-linux-x86_64/bin/vsearch
/opt/biotools/bin"
,
"
rm
-rf
vsearch-2.21.1-linux-x86_64*"
],
blast
:
[
"
cd
/opt/biotools/"
,
"
wget
-O
ncbi-blast-2.12.0+.tar.gz
https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.12.0/ncbi-blast-2.12.0+-x64-linux.tar.gz"
,
"
tar
-xvzf
ncbi-blast-2.12.0+.tar.gz"
,
"
ENV
PATH
$PATH:/opt/biotools/ncbi-blast-2.12.0+/bin"
],
},
citations
:
{
lulu
:
[
"
Frøslev,
T.
G.,
Kjøller,
R.,
Bruun,
H.
H.,
Ejrnæs,
R.,
Brunbjerg,
A.
K.,
Pietroni,
C.,
&
Hansen,
A.
J.
(2017).
Algorithm
for
post-clustering
curation
of
DNA
amplicon
data
yields
reliable
biodiversity
estimates.
Nature
Communications,
8(1),
1188."
],
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment