blastn.rule.snakefile 2.38 KB
Newer Older
1
def <step_name>__db():
2
3
    db_name = ""
    db_path = ""
4
5
6
7
8
9

    if "<step_name>__blastn_blastdb" in config.keys():
        files = os.listdir(config["<step_name>__blastn_blastdb"])
        db_name = os.path.basename(os.path.splitext(files[0])[0])
        db_path = os.path.dirname(files[0])
    # if from makeblastdb step
10
    else:
11
12
13
        file = <step_name>__blastn_inputs()["blastdb"][0]
        db_name = os.path.splitext(os.path.basename(file))[0]
        db_path = os.path.dirname(file)
khalid's avatar
khalid committed
14

15
    return { "db_name": db_name, "db_path": db_path }
16

mmassaviol's avatar
mmassaviol committed
17
18
19
20
rule <step_name>__blastn:
    input:
        **<step_name>__blastn_inputs(),
    output:
21
        blastout = config["results_dir"] + "/" + config["<step_name>__blastn_output_dir"] + "/blastout_" + <step_name>__db()["db_name"] + ".tsv",
mmassaviol's avatar
mmassaviol committed
22
        selected_contigs = config["results_dir"] + "/" + config["<step_name>__blastn_output_dir"] + "/selected_contigs.fasta",
mmassaviol's avatar
mmassaviol committed
23
24
25
26
    params:
        command = config["<step_name>__blastn_command"],
        evalue = config["<step_name>__blastn_evalue"],
        max_target_sequences =  config["<step_name>__blastn_max_target_sequences"],
mmassaviol's avatar
mmassaviol committed
27
        max_hsps =  config["<step_name>__blastn_max_hsps"],
mmassaviol's avatar
mmassaviol committed
28
        min_len = config["<step_name>__blastn_min_len"],
29
        #db = lambda w,input: os.path.splitext(input.blastdb)[0]
30
        db = <step_name>__db()["db_path"] + "/" + <step_name>__db()["db_name"]
mmassaviol's avatar
mmassaviol committed
31
32
33
34
35
    threads:
        config["<step_name>__blastn_threads"]
    log:
        config["results_dir"]+'/logs/' + config["<step_name>__blastn_output_dir"] + '/blastn_log.txt'
    shell:
mmassaviol's avatar
mmassaviol committed
36
        # output header
khalid's avatar
khalid committed
37
        "echo -e 'qseqid\tsseqid\tpident\tlength\tmismach\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore\tqlen' > {output.blastout}; "
mmassaviol's avatar
mmassaviol committed
38
        # command
mmassaviol's avatar
mmassaviol committed
39
40
41
42
43
44
45
46
47
        "{params.command} "
        "-query {input.query} "
        "-db {params.db} "
        "-evalue {params.evalue} "
        "-max_target_seqs {params.max_target_sequences} "
        "-max_hsps {params.max_hsps} "
        "-outfmt '6 std qlen' "
        "-num_threads {threads} 2> {log} | "
        "awk \"{{if (\$4 >= {params.min_len}) print }}\""
mmassaviol's avatar
mmassaviol committed
48
49
50
51
52
53
54
55
        ">> {output.blastout} ;"

        "if [ $(wc -l < {output.blastout} ) -ge 2 ]; then "
            "head -n +2 {output.blastout} | cut -f 1 > /tmp/qids.txt "
            "&& grep --no-group-separator -f /tmp/qids.txt -A 1 {input.query} > {output.selected_contigs};"
        "else "
        "touch {output.selected_contigs};"
        "fi"