Commit 230efda6 authored by khalid's avatar khalid
Browse files

process tags of randomly sheared genomic or transcriptomic data

parent 72133f37
```{r echo=FALSE, message=FALSE, warning=FALSE, fig.width=10,fig.height=8, eval=process_radtags}
library(DT)
library(reshape2)
library(ggplot2)
if (parameters$SeOrPe == "SE"){
dirs = list.dirs(path = paste0(parameters$results_dir,"/",parameters$process_radtags_SE_output_dir))[-1]
} else {
dirs = list.dirs(path = paste0(parameters$results_dir,"/",parameters$process_radtags_PE_output_dir))[-1]
}
a = readLines(paste0(dirs[1],"/process_radtags.samples.log"))
begin = grep("Barcode\tFilename\tTotal\tNoRadTag\tLowQuality\tRetained",a)
end = grep("Sequences not recorded",a)
a = read.delim(sep="\t",text=a[begin:(end-1)])
datatable(data = a[,c("Barcode","Filename")],options = list(scrollX = '300px'))
for (i in seq(2,length(dirs))){
tmp = readLines(paste0(dirs[i],"/process_radtags.samples.log"))
tmp = read.delim(sep="\t",text=tmp[begin:(end-1)])
a = cbind(Barcode=a$Barcode,Filename=a$Filename,a[,3:6]+tmp[,3:6])
}
to_plot = melt(a,measure.vars = c("NoRadTag","LowQuality","Retained"),id.vars = c("Barcode","Filename"))
ggplot(to_plot,aes(x=Filename,y=value, fill=variable)) + geom_col() + coord_flip() + ylab("Reads") + xlab("Samples") + ggtitle("Reads per sample")
```
\ No newline at end of file
def get_individus():
with open(<step_name>__process_shortreadstags_SE_inputs()["tags"], mode="r") as infile:
reader = csv.reader(infile,delimiter='\t')
return [row[-1] for row in reader]
individus = get_individus()
#!!!!! this will override SAMPLES obtained from raw_reads ...
SAMPLES = individus
if config["SeOrPe"] == "SE":
rule <step_name>__process_shortreadstags_SE:
input:
**<step_name>__process_shortreadstags_SE_inputs(),
#barcode_file = config["<step_name>__process_shortreadstags_barcode_file"]
output:
reads_demultiplexed=expand(config["results_dir"]+"/"+config["<step_name>__process_shortreadstags_SE_output_dir"]+"/{id}.fq.gz",id=individus),# fastq par individu pour un sample
read_dir = directory(config["results_dir"]+"/"+config["<step_name>__process_shortreadstags_SE_output_dir"]) #need snakemake > 5.9.1 to avoid ChildIOException !!
params:
command = config["<step_name>__process_shortreadstags_SE_command"],
output_dir = config["results_dir"]+"/"+config["<step_name>__process_shortreadstags_SE_output_dir"]+"/",
barcode_type = config["<step_name>__process_shortreadstags_barcode_type"],
log:
config["results_dir"]+"/logs/" + config["<step_name>__process_shortreadstags_SE_output_dir"] + "/process_shortreadstags_log.txt"
shell:
"{params.command} "
"-p {input.reads_dir} "
"-b {input.tags} "
"-o {params.output_dir} "
"{params.barcode_type} "
"--clean "
#"--quality "
"--rescue "
"|& tee {log}"
elif config["SeOrPe"] == "PE":
rule <step_name>__process_shortreadstags_PE:
input:
**<step_name>__process_shortreadstags_PE_inputs(),
#barcode_file = config["<step_name>__process_shortreadstags_barcode_file"]
output:
reads_forward_demultiplexed=expand(config["results_dir"]+"/"+config["<step_name>__process_shortreadstags_PE_output_dir"]+"/{id}.1.fq.gz",id=individus), # fastq par individu pour un sample
reads_reverse_demultiplexed=expand(config["results_dir"]+"/"+config["<step_name>__process_shortreadstags_PE_output_dir"]+"/{id}.2.fq.gz",id=individus), # fastq par individu pour un sample
read_dir = directory(config["results_dir"]+"/"+config["<step_name>__process_shortreadstags_PE_output_dir"])
params:
command = config["<step_name>__process_shortreadstags_PE_command"],
output_dir = config["results_dir"]+"/"+config["<step_name>__process_shortreadstags_PE_output_dir"],
barcode_type = config["<step_name>__process_shortreadstags_barcode_type"],
log:
config["results_dir"]+"/logs/" + config["<step_name>__process_shortreadstags_PE_output_dir"] + "/process_shortreadstags_log.txt"
shell:
"{params.command} "
"-p {input.reads_dir} "
"-P "
"-b {input.tags} "
"-o {params.output_dir} "
"{params.barcode_type} "
"--clean "
#"--quality "
"--rescue "
"|& tee {log}"
{
id: process_shortreadstags,
name: process_shortreadstags,
article: 10.1111/mec.15253,
website: "http://catchenlab.life.illinois.edu/stacks/comp/process_shortreads..php",
git: "",
description: "Performs the same task as process_radtags for fast cleaning of randomly sheared genomic or transcriptomic data, not for RAD data.",
version: "2.60",
documentation: "http://catchenlab.life.illinois.edu/stacks/comp/process_shortreads.php",
multiqc: "custom",
commands:
[
{
name: process_shortreadstags_SE,
cname: "Process radtags SE",
command: process_shortreads,
category: "stacks",
output_dir: process_shortreadstags/SE,
inputs:
[
{ name: "reads_dir", type: "read_dir", description: "Directory containing one or more multiplexed read files to process" },
{ name: tags, type: "tsv", description: "Barcode file" }
],
outputs:
[
{ name: reads_demultiplexed, type: "fq.gz", file: "{individu}.fastq.gz", description: "Files of reads for each individual" },
{ name: read_dir, type: "read_dir", file: "/", description: "dir containing demultiplexed files"}
],
options:
[
{
name: process_shortreadstags_barcode_type,
type: select,
choices: [
--inline_null: "--inline_null",
--index_null: "--index_null",
--inline_inline: "--inline_inline",
--index_index: "--index_index",
--inline_index: "--inline_index",
--index_inline: "--index_inline",
],
value: "--inline_null",
label: Barcode position
},
],
},
{
name: process_shortreadstags_PE,
cname: "Process radtags PE",
command: process_shortreads,
category: "stacks",
output_dir: process_shortreadstags/PE,
inputs:
[
{ name: "reads_dir", type: "read_dir", description: "Directory containing one or more multiplexed read files to process" },
{ name: tags, type: "tsv", description: "Barcode file" }
],
outputs:
[
{ name: reads_forward_demultiplexed, type: "fq.gz", file: "{individu}_R1.fastq.gz", description: "Files of forward reads for each individual" },
{ name: reads_reverse_demultiplexed, type: "fq.gz", file: "{individu}_R2.fastq.gz", description: "Files of reverse reads for each individual" },
{ name: read_dir, type: "read_dir", file: "/", description: "dir containing demultiplexed files"}
],
options:
[
{
name: process_shortreadstags_barcode_type,
type: select,
choices: [
--inline_null: "--inline_null",
--index_null: "--index_null",
--inline_inline: "--inline_inline",
--index_index: "--index_index",
--inline_index: "--inline_index",
--index_inline: "--index_inline",
],
value: "--inline_null",
label: Barcode position
},
],
},
],
install:
{
stacks: [
"cd /opt/biotools",
"wget https://catchenlab.life.illinois.edu/stacks/source/stacks-2.60.tar.gz ",
"tar -zxvf stacks-2.60.tar.gz",
"cd stacks-2.60/",
"./configure",
"make -j 10",
"make install",
"mv -t ../bin sstacks kmer_filter gstacks tsv2bam process_shortreads populations ustacks phasedstacks cstacks process_shortreadstags",
"cd .. && rm -r stacks-2.60 stacks-2.60.tar.gz"
]
},
citations: {
stacks: [
"Rochette, NC, Rivera-Colón, AG, Catchen, JM. Stacks 2: Analytical methods for paired-end sequencing improve RADseq-based population genomics. Mol Ecol. 2019; 28: 4737- 4754. https://doi.org/10.1111/mec.15253"
]
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment