generate_workflow_snakefile.py 10.9 KB
Newer Older
jlopez's avatar
jlopez committed
1
2
3
4
5
6
#!/usr/bin/python3
# This script will take an workflow and generate snakefile
# Accepts 1 parameter:
# Usage: ./generate_workflow_snakefile.py workflow_name


mmassaviol's avatar
mmassaviol committed
7
from tools import read_yaml
jlopez's avatar
jlopez committed
8
import sys
jlopez's avatar
Update    
jlopez committed
9
10
11
import tempfile
import shutil
import os
jlopez's avatar
jlopez committed
12

jlopez's avatar
Update    
jlopez committed
13
dirpath = tempfile.mkdtemp()
jlopez's avatar
jlopez committed
14

jlopez's avatar
Update    
jlopez committed
15
16
17
DEFAULT_PATH_OUTPUT = "./"
DEFAULT_PATH_INPUT = "./workflows/"

mmassaviol's avatar
mmassaviol committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Return True when {sample} is in inputs files
def wildcard_in_inputs(rule_name, tool_yaml):
    found = False
    for command in tool_yaml["commands"]:
        if command["name"] == rule_name:
            found = True
            for inputt in command["inputs"]:
                if "file" in inputt:
                    if "{sample}" in inputt["file"]:
                        return True
                if inputt["type"] == "reads":
                    return True
    if found: # command found but no wildcard
        return False
    else:
        exit("Command "+rule_name+" not found")

# Return True when {sample} is in outputs files
def wildcard_in_outputs(rule_name, tool_yaml):
    found = False
    for command in tool_yaml["commands"]:
        if command["name"] == rule_name:
            found = True
            for output in command["outputs"]:
                if "file" in output:
                    if "{sample}" in output["file"]:
                        return True
    if found: # command found but no wildcard
        return False
    else:
        exit("Command "+rule_name+" not found")

jlopez's avatar
Update    
jlopez committed
50
def generate(name, path_yaml = "", path_input = DEFAULT_PATH_INPUT, path_output = DEFAULT_PATH_OUTPUT):
jlopez's avatar
jlopez committed
51

mmassaviol's avatar
mmassaviol committed
52
    result = "# File generated with generate_workflow_snakefile.py\n\n"
jlopez's avatar
jlopez committed
53

jlopez's avatar
Update    
jlopez committed
54
55
56
57
58
59
60
    yaml = None

    if(path_yaml == ""):
        yaml = read_yaml(path_input+name+"/"+name+".yaml")
    else:
        #print(path_yaml)
        yaml = read_yaml(path_yaml)
jlopez's avatar
jlopez committed
61
62
63

    # imports

mmassaviol's avatar
mmassaviol committed
64
    result += "{import global_imports}\n\n"
jlopez's avatar
jlopez committed
65
66
67
68
69
70
71
72
73

    # intputs

    result += "##########\n"
    result += "# Inputs #\n"
    result += "##########\n"

    result += "\n"

74
    if "input" in yaml:
75
76
77
78
79
80
81
        for raw_input in yaml["input"]:
                raw_inputs_yaml = read_yaml(path_input+"/raw_inputs/"+raw_input+".yaml")
                result += "# raw_inputs function call\n"
                result += raw_input+" = "+raw_inputs_yaml["function_call"]+"\n"
                result += "config.update("+raw_input+")\n"
                # !!! possibilité de définir samples plusieurs fois si il y a raw_reads et raw_vcf ... !!!
                # il faut vérifier que les samples sont les mêmes ?
khalid's avatar
khalid committed
82
                if  not raw_input.endswith("_index") and not raw_input.endswith("_popmap") and not raw_input.endswith("_vcfFile") and not raw_input.endswith("_gff3"): 
83
                     result += "SAMPLES = "+raw_input+"['samples']\n"
jlopez's avatar
jlopez committed
84
85
86
87
88

    result += "\n"

    result += "# Tools inputs functions\n"

jlopez's avatar
jlopez committed
89
    if "steps_in" in yaml:
jlopez's avatar
jlopez committed
90

mmassaviol's avatar
mmassaviol committed
91
92
        for step in yaml["steps_in"]:

mmassaviol's avatar
mmassaviol committed
93
94
            TOOL_YAML = read_yaml(path_input+"/tools/"+step["tool_name"]+"/"+step["tool_name"]+".yaml")

jlopez's avatar
jlopez committed
95
            if "rule_name" in step:
mmassaviol's avatar
mmassaviol committed
96
97
98
99
100
101
                # get rule yaml
                RULE_YAML = dict()
                for command in TOOL_YAML["commands"]:
                    if command["name"] == step["rule_name"]:
                        RULE_YAML = command

mmassaviol's avatar
mmassaviol committed
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
                # wildcards in inputs and outputs
                wildcard_in = wildcard_in_inputs(step["rule_name"],TOOL_YAML)
                wildcard_out = wildcard_in_outputs(step["rule_name"],TOOL_YAML)

                # wildcard in inputs but not in outputs => expand on inputs
                if wildcard_in and not wildcard_out:
                    expand_begin = "expand("
                    expand_end = ",sample=SAMPLES)"
                else:
                    expand_begin = ""
                    expand_end = ""

                # Liste des inputs dans la commande du yaml de l'outil
                #inputs_list_from_command = [inputt["name"] for inputt in RULE_YAML["inputs"]]
                # Liste des inputs dans le steps_in du yaml du workflow
                inputs_list_from_yaml = list()
jlopez's avatar
jlopez committed
118

119
                result += "def " + step["step_name"] + "__" + step["rule_name"] + "_inputs():\n"
jlopez's avatar
jlopez committed
120
                result += "\tinputs = dict()\n"
jlopez's avatar
jlopez committed
121

122
123
124
125
                # Parcourir le yaml du tool, les commandes et leurs inputs
                # Traiter les inputs de type liste
                # Gérer les "from params" (ex: fasta genomes)

jlopez's avatar
jlopez committed
126
                if "params" in step:
127
                    raw_inputs = list()
jlopez's avatar
jlopez committed
128
                    for param in step["params"]:
mmassaviol's avatar
mmassaviol committed
129
130
131
132
133
134

                        # get input yaml
                        INPUT_YAML = dict()
                        for inputt in RULE_YAML["inputs"]:
                            if inputt["name"] == param["input_name"]:
                                INPUT_YAML = inputt
135

136
137
138
139
140
141
142
143
                        # Inputs de type expand
                        if ("expand" in INPUT_YAML and INPUT_YAML["expand"]):
                            if param["input_name"] not in inputs_list_from_yaml:
                                result += "\tinputs[\"" + param["input_name"] + "\"] = list()\n"
                            if "raw_" in param["origin_command"]:
                                #if not param["origin_command"] in raw_inputs:
                                #   result += "\tinputs[\"" + param["input_name"] + "\"].append(" + expand_begin + param["origin_command"] + "[\"" + param["origin_name"] + "\"]" + expand_end + ")\n"
                                #   raw_inputs.append(param["origin_command"])
144
                                reslt += "\n"
145
146
                            else:
                                result += "\tinputs[\"" + param["input_name"] + "\"] = expand(" + expand_begin + "rules." + param["origin_step"] + "__" + param["origin_command"] + ".output." + param["origin_name"] + expand_end + ", sample=SAMPLES)\n"
147
                        # Inputs de type liste
148
                        elif ("list" in INPUT_YAML and INPUT_YAML["list"]):
149
                            if param["input_name"] not in inputs_list_from_yaml:
mmassaviol's avatar
mmassaviol committed
150
                                result += "\tinputs[\"" + param["input_name"] + "\"] = list()\n"
151
152
153
154
                            if "raw_" in param["origin_command"]:
                                if not param["origin_command"] in raw_inputs:
                                   result += "\tinputs[\"" + param["input_name"] + "\"].append(" + expand_begin + param["origin_command"] + "[\"" + param["origin_name"] + "\"]" + expand_end + ")\n"
                                   raw_inputs.append(param["origin_command"])
mmassaviol's avatar
mmassaviol committed
155
                            else:
mmassaviol's avatar
mmassaviol committed
156
                                result += "\tinputs[\"" + param["input_name"] + "\"].append(" + expand_begin + "rules." + param["origin_step"] + "__" + param["origin_command"] + ".output." + param["origin_name"] + expand_end + ")\n"
157
                        # inputs classiques
jlopez's avatar
jlopez committed
158
                        else:
mmassaviol's avatar
mmassaviol committed
159
                            if "raw_" in param["origin_command"] :
mmassaviol's avatar
mmassaviol committed
160
                                result += "\tinputs[\"" + param["input_name"] + "\"] = " + expand_begin + param["origin_command"] + "[\"" + param["origin_name"] + "\"] " + expand_end + "\n"
mmassaviol's avatar
mmassaviol committed
161
                            else:
mmassaviol's avatar
mmassaviol committed
162
                                result += "\tinputs[\"" + param["input_name"] + "\"] = " + expand_begin + "rules." + param["origin_step"] + "__" + param["origin_command"] + ".output." + param["origin_name"] + expand_end + "\n"
163
164
165
166
167
                        
                        inputs_list_from_yaml.append(param["input_name"]) # input traités (ou vu une fois si de type liste)
                    
                    # Pour tous les inputs d'une commande qui ne sont pas liés dans le steps_in
                    for inputt in RULE_YAML["inputs"]:
mmassaviol's avatar
Typo    
mmassaviol committed
168
                        if inputt["name"] not in inputs_list_from_yaml and "from" in inputt and inputt["from"] == "parameter":
169
                            result += "\tinputs[\"" + inputt["name"] + "\"] = config[\""+ step["step_name"] + "__" + step["rule_name"] + "_" + inputt["name"] +"\"]\n"
jlopez's avatar
jlopez committed
170
171
172

                result += "\treturn inputs\n"
                result += "\n"
jlopez's avatar
jlopez committed
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

    result += "\n"

    result += "{import global_functions}\n"

    result += "\n"

    # outputs

    result += "###########\n"
    result += "# Outputs #\n"
    result += "###########\n"

    result += "\n"

    result += "def step_outputs(step):\n"
    result += "\toutputs = list()\n"

jlopez's avatar
jlopez committed
191
    if "steps_in" in yaml:
jlopez's avatar
jlopez committed
192

jlopez's avatar
jlopez committed
193
        for step in yaml["steps_in"]:
jlopez's avatar
jlopez committed
194

195
            if ("step_name" in step) and ("rule_name" in step):
196
                # Check in tool yaml if {sample} in output files to add expand
root's avatar
Hotfix    
root committed
197
                tool_yaml = read_yaml(path_input+"/tools/"+step["tool_name"]+"/"+step["tool_name"]+".yaml")
mmassaviol's avatar
mmassaviol committed
198
                need_expand = wildcard_in_outputs(step["rule_name"],tool_yaml)
root's avatar
Hotfix    
root committed
199
                
200
201
202
203
204
205
206
                if need_expand:
                    expand_start = "expand("
                    expand_end = ", sample=SAMPLES)"
                else:
                    expand_start = ""
                    expand_end = ""

207
208
209
210
211
212
                add = ""
                if ("_SE" in step["rule_name"]):
                    add = "and config['SeOrPe'] == 'SE'"
                if ("_PE" in step["rule_name"]):
                    add = "and config['SeOrPe'] == 'PE'"
                result += "\tif (step == \"" + step["step_name"] + "\" " + add + " ):\n"
213
                result += "\t\toutputs = " + expand_start + "rules." + step["step_name"] + "__" + step["rule_name"] + ".output " + expand_end + "\n"
jlopez's avatar
jlopez committed
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
                result += "\t\t\n"


    result += "\tif (step == \"all\"):\n"
    result += "\t\toutputs = list(rules.multiqc.output)\n"

    result += "\n"

    result += "\treturn outputs\n"

    result += "\n"

    result += "# get outputs for each choosen tools\n"
    result += "def workflow_outputs(step):\n"
    result += "\toutputs = list()\n"
    result += "\toutputs.extend(step_outputs(step))\n"
    result += "\treturn outputs\n"

    result += "\n"

    # rules

    result += "#########\n"
    result += "# Rules #\n"
    result += "#########\n"

    result += "\n"

    result += "{import rules}\n"

    result += "{import global_rules}\n"

mmassaviol's avatar
mmassaviol committed
246
    #print(result)
jlopez's avatar
jlopez committed
247

jlopez's avatar
Update    
jlopez committed
248
249
    path_snakefile = path_output + name + "/" + name + ".snakefile"
    #print(path_snakefile)
mmassaviol's avatar
mmassaviol committed
250
251
252
    f = open(path_snakefile, "w")
    f.write(result)
    f.close()
jlopez's avatar
jlopez committed
253
254


jlopez's avatar
Update    
jlopez committed
255
256
257
258
259
260
261
262
263
264
def create_directory(output_path):

    access_rights = 0o755

    try:
        os.mkdir(output_path, access_rights)
    except OSError:
        print ("Creation of the directory %s failed" % output_path)


jlopez's avatar
jlopez committed
265
266

def main():
jlopez's avatar
Update    
jlopez committed
267

jlopez's avatar
jlopez committed
268
269
    if len(sys.argv) == 2:
        generate(sys.argv[1])
jlopez's avatar
Update    
jlopez committed
270
271
272
273
274
275
276
277
278
279

    elif len(sys.argv) > 2:

        name = sys.argv[1]
        output_path = sys.argv[2]
        path_yaml = output_path + name + "/" + name + ".yaml"
        input_path = sys.argv[3]
        
        generate(name, path_yaml, input_path, output_path)

jlopez's avatar
jlopez committed
280
    else:
jlopez's avatar
Update    
jlopez committed
281
        exit("""Needs 1 argument minimum: workflow_name
jlopez's avatar
jlopez committed
282
283
284
285
286
Usage: ./generate_workflow_snakefile.py workflow_name """)


if __name__ == "__main__":
    # execute only if run as a script
root's avatar
Hotfix    
root committed
287
    main()