Commit 1746e9d9 authored by khalid's avatar khalid
Browse files

Add hisat2

parent 19976f85
if config["SeOrPe"] == "PE":
rule <step_name>__hisat2_PE:
input:
**<step_name>__hisat2_PE_inputs()
output:
bam = config["results_dir"]+"/"+config["<step_name>__hisat2_PE_output_dir"]+"/{sample}.bam"
log:
config["results_dir"]+"/logs/" + config["<step_name>__hisat2_PE_output_dir"] + "/{sample}_hisat2_log.txt"
threads:
config["<step_name>__hisat2_threads"]
params:
command = config["<step_name>__hisat2_PE_command"],
#indexPrefix = config["<step_name>__hisat2_index_output_dir"]+"/index",
#here we have index files : from .1.ht2 to .8.ht2
indexPrefix = lambda w, input: os.path.splitext(os.path.splitext([x for x in input.index ][0])[0])[0],
minins = config["<step_name>__hisat2_minins_PE"],
maxins = config["<step_name>__hisat2_maxins_PE"],
orientation = config["<step_name>__hisat2_orientation_PE"],
shell:
"{params.command} "
"--threads {threads} "
"-x {params.indexPrefix} "
"-I {params.minins} "
"-X {params.maxins} "
"{params.orientation} "
"--rg-id ID:WAW --rg SM:{wildcards.sample} "
#"-S "#output in sam format
"-1 {input.read} "
"-2 {input.read2} 2> {log} "
"| samtools view -b 2>> {log} "
"| samtools sort -@ {threads} > {output.bam} 2>> {log} &&"
"samtools index -@ {threads} {output.bam} 2>> {log}"
elif config["SeOrPe"] == "SE":
rule <step_name>__hisat2_SE:
input:
**<step_name>__hisat2_SE_inputs()
output:
bam = config["results_dir"]+"/"+config["<step_name>__hisat2_SE_output_dir"]+"/{sample}.bam"
log:
config["results_dir"]+"/logs/" + config["<step_name>__hisat2_SE_output_dir"] + "/{sample}_hisat2_log.txt"
threads:
config["<step_name>__hisat2_threads"]
params:
command = config["<step_name>__hisat2_SE_command"],
#indexPrefix = config["<step_name>__hisat2_index_output_dir"]+"/index",
indexPrefix = lambda w, input: os.path.splitext(os.path.splitext([x for x in input.index if not 'rev' in x][0])[0])[0],
shell:
"{params.command} "
"--threads {threads} "
"-x {params.indexPrefix} "
"--rg-id ID:WAW --rg SM:{wildcards.sample} "
#"-S "#output in sam format
"{input.read} 2> {log} "
"| samtools view -b 2>> {log} "
"| samtools sort -@ {threads} > {output.bam} 2>> {log} &&"
"samtools index -@ {threads} {output.bam} 2>> {log}"
{
id: hisat2,
name: Bowtie2,
article: Kim, D., Paggi, J.M., Park, C. et al. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol 37, 907-915 (2019).,
website: "http://daehwankimlab.github.io/hisat2/",
git: "http://daehwankimlab.github.io/hisat2/",
description: "a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA)",
version: "2.2.1",
documentation: "http://daehwankimlab.github.io/hisat2/manual/",
multiqc: "hisat2",
commands:
[
{
name: hisat2_PE,
cname: "hisat2 PE",
command: hisat2,
category: "mapping",
output_dir: hisat2/PE,
inputs: [{ name: read, type: "reads" }, { name: read2, type: "reads" }, { name: index, type: "hisat2_index", file: index, description: "Index files for hisat2 alignment" }],
outputs: [{ name: bam, type: "bams", file: "{sample}.bam", description: "Alignment files" }],
options:
[
{
name: hisat2_threads,
prefix: --threads,
type: numeric,
value: 4,
min: 1,
max: NA,
step: 1,
label: "Number of threads to use to align reads",
},
{
name: hisat2_minins_PE,
prefix: -I,
type: numeric,
value: 0,
min: 0,
max: NA,
step: 1,
label: "The minimum insert size for valid paired-end alignments",
},
{
name: hisat2_maxins_PE,
prefix: -X,
type: numeric,
value: 250,
min: 0,
max: NA,
step: 1,
label: "The maximum insert size for valid paired-end alignments",
},
{
name: hisat2_orientation_PE,
type: radio,
choices:
[
Not stranded: "",
Forward Reverse: --fr,
Reverse Forward: --rf,
Forward Forward: --ff
],
value: "",
label: "The upstream/downstream mate orientations for a valid paired-end alignment against the forward reference strand.",
},
]
},
{
name: hisat2_SE,
cname: "hisat2 SE",
command: hisat2,
category: "mapping",
output_dir: hisat2/SE,
inputs: [{ name: read, type: "reads" }, { name: index, type: "hisat2_index", file: index, description: "Index files for hisat2 alignment" }],
outputs: [{ name: bam, type: "bams", file: "{sample}.bam", description: "Alignment files" }],
options:
[
{
name: hisat2_threads,
prefix: --threads,
type: numeric,
value: 4,
min: 1,
max: NA,
step: 1,
label: "Number of threads to use to align reads",
},
]
}
],
install: {
hisat2: [
"cd /tmp"
"wget -O hisat2-2.2.1-linux-x86_64.zip https://cloud.biohpc.swmed.edu/index.php/s/oTtGWbWjaxsQ2Ho/download",
"unzip hisat2-2.2.1-linux-x86_64.zip",
"mv hisat2-2.2.1 /opt/biotools/",
"rm -rf hisat2-2.2.1*"
"ENV PATH /opt/biotools/hisat2-2.2.1:$PATH
"echo "export PATH=$PATH:/opt/biotools/hisat2-2.2.1" >> /etc/environment
],
samtools: [
"cd /opt/biotools",
"wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2",
"tar -xvjf samtools-1.9.tar.bz2",
"cd samtools-1.9",
"./configure && make",
"cd ..",
"mv samtools-1.9/samtools bin/samtools",
"rm -r samtools-1.9 samtools-1.9.tar.bz2"
]
},
citations: {
hisat2: [
"Kim, D., Paggi, J.M., Park, C. et al. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol 37, 907-915 (2019)"
],
samtools: [
"Heng Li, Bob Handsaker, Alec Wysoker, Tim Fennell, Jue Ruan, Nils Homer, Gabor Marth, Goncalo Abecasis, Richard Durbin, 1000 Genome Project Data Processing Subgroup, The Sequence Alignment/Map format and SAMtools, Bioinformatics, Volume 25, Issue 16, 15 August 2009, Pages 2078-2079, https://doi.org/10.1093/bioinformatics/btp352"
]
}
}
\ No newline at end of file
rule <step_name>__hisat2_index:
input:
**<step_name>__hisat2_index_inputs()
output:
index = (
expand(config["<step_name>__hisat2_index_output_dir"]+"/index.{num}.ht2",num=[1,2,3,4,5,6,7,8]),
)
log:
config["results_dir"]+"/logs/" + config["<step_name>__hisat2_index_output_dir"] + "/index.log"
threads:
config["<step_name>__hisat2_index_threads"]
params:
command = config["<step_name>__hisat2_index_command"],
output_prefix = config["<step_name>__hisat2_index_output_dir"]+"/index"
shell:
"{params.command} "
"{input.genome_fasta} "
"{params.output_prefix} "
"--threads {threads} "
"|& tee {log}"
{
id: hisat2_index,
name: hisat2 index,
article: Kim, D., Paggi, J.M., Park, C. et al. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol 37, 907-915 (2019).,
website: "http://daehwankimlab.github.io/hisat2/",
git: "http://daehwankimlab.github.io/hisat2/",
description: "a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA)",
version: "2.2.1",
documentation: "http://daehwankimlab.github.io/hisat2/manual/",
multiqc: "hisat2",
commands:
[
{
name: hisat2_index,
cname: "hisat2 index",
command: hisat2-build,
category: "indexing",
output_dir: hisat2/index,
inputs: [{ name: genome_fasta, type: "contigs", description: "Fasta reference", from: "parameter" }],
outputs: [{ name: index, type: "hisat2_index", file: index, description: "Index files for hisat2 alignment" }],
options:
[
{
name: hisat2_index_genome_fasta,
type: input_file,
value: "",
label: "Path to reference genome fasta file",
},
{
name: hisat2_index_threads,
prefix: --threads,
type: numeric,
value: 4,
min: 1,
max: NA,
step: 1,
label: "Number of threads to use to index genome",
}
]
},
],
install: {
hisat2: [
"cd /tmp"
"wget -O hisat2-2.2.1-linux-x86_64.zip https://cloud.biohpc.swmed.edu/index.php/s/oTtGWbWjaxsQ2Ho/download",
"unzip hisat2-2.2.1-linux-x86_64.zip",
"mv hisat2-2.2.1 /opt/biotools/",
"rm -rf hisat2-2.2.1*"
"ENV PATH /opt/biotools/hisat2-2.2.1:$PATH
"echo "export PATH=$PATH:/opt/biotools/hisat2-2.2.1" >> /etc/environment
],
},
citations: {
hisat2: [
"Kim, D., Paggi, J.M., Park, C. et al. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol 37, 907-915 (2019)"
],
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment