Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mmassaviol
WAWmassaviol
Commits
be5135ea
Commit
be5135ea
authored
Jul 24, 2020
by
jlopez
Committed by
mmassaviol
Sep 04, 2020
Browse files
Update
parent
7dd4c240
Changes
47
Hide whitespace changes
Inline
Side-by-side
generate_sag_yaml.py
View file @
be5135ea
...
...
@@ -46,9 +46,10 @@ content: []
"""
# Params to remove from the interface defined in the workflow yaml
def
get_params_to_remove
(
pipeline
):
def
get_params_to_remove
(
pipeline
,
input_path
=
"./"
,
workFlowData
=
None
):
to_remove
=
list
()
workflow
=
read_yaml
(
"workflows/"
+
pipeline
+
"/"
+
pipeline
+
".yaml"
)
#workflow = read_yaml(input_path+"workflows/" + pipeline + "/" + pipeline + ".yaml")
workflow
=
workFlowData
if
"params_equals"
in
workflow
.
keys
():
params_equals
=
workflow
[
"params_equals"
]
for
line
in
params_equals
:
...
...
@@ -58,10 +59,13 @@ def get_params_to_remove(pipeline):
to_remove
.
append
(
line
[
"param_B"
])
return
to_remove
def
generate_tool
(
toolname
,
pipeline
,
step_name
):
to_remove
=
get_params_to_remove
(
pipeline
)
def
generate_tool
(
toolname
,
pipeline
,
step_name
,
input_path
=
"./"
,
workFlowData
=
None
):
to_remove
=
get_params_to_remove
(
pipeline
,
input_path
,
workFlowData
)
template
=
yaml
.
load
(
tool_template
,
Loader
=
yaml
.
FullLoader
)
tool
=
read_yaml
(
"tools/"
+
toolname
+
"/"
+
toolname
+
".yaml"
)
tool
=
read_yaml
(
input_path
+
"tools/"
+
toolname
+
"/"
+
toolname
+
".yaml"
)
template
[
"name"
]
=
tool
[
"id"
]
template
[
"title"
]
=
tool
[
"name"
]
...
...
@@ -122,8 +126,9 @@ def generate_tool(toolname, pipeline, step_name):
return
template
def
generate_workflow_params
(
pipeline
):
workflow
=
read_yaml
(
"workflows/"
+
pipeline
+
"/"
+
pipeline
+
".yaml"
)
def
generate_workflow_params
(
pipeline
,
input_path
=
"./"
,
workflowData
=
None
):
#workflow = read_yaml(input_path+ "workflows/" + pipeline + "/" + pipeline + ".yaml")
workflow
=
workflowData
template
=
yaml
.
load
(
tool_template
,
Loader
=
yaml
.
FullLoader
)
template
[
"name"
]
=
"global_params"
template
[
"title"
]
=
"Global parameters :"
...
...
@@ -131,7 +136,7 @@ def generate_workflow_params(pipeline):
template
[
"content"
].
append
(
option
)
if
"input"
in
workflow
:
raw_input
=
read_yaml
(
"raw_inputs/"
+
workflow
[
"input"
]
+
".yaml"
)
raw_input
=
read_yaml
(
input_path
+
"raw_inputs/"
+
workflow
[
"input"
]
+
".yaml"
)
for
option
in
raw_input
[
"options"
]:
template
[
"content"
].
append
(
option
)
...
...
@@ -142,12 +147,13 @@ def generate_workflow_params(pipeline):
return
template
def
generate_sag_yaml
(
pipeline
,
template
,
outdir
):
def
generate_sag_yaml
(
pipeline
,
template
,
outdir
,
input_path
=
"./"
):
# reading template
template
=
yaml
.
load
(
template
,
Loader
=
yaml
.
FullLoader
)
config
=
read_yaml
(
"workflows/"
+
pipeline
+
"/"
+
pipeline
+
".yaml"
)
#config = read_yaml(input_path + 'workflows/' + pipeline + "/" + pipeline + ".yaml")
config
=
read_yaml
(
outdir
+
pipeline
+
"/"
+
pipeline
+
".yaml"
)
template
[
"App"
][
"project"
]
=
outdir
template
[
"App"
][
"project"
]
=
outdir
+
pipeline
+
"/sagApp/"
template
[
"App"
][
"general"
][
"title"
]
=
config
[
"name"
]
# adding global parameters
...
...
@@ -159,7 +165,7 @@ def generate_sag_yaml(pipeline, template, outdir):
)
# predefined boxes
template
[
"App"
][
"pages"
][
0
][
"boxes"
].
append
(
generate_workflow_params
(
pipeline
))
template
[
"App"
][
"pages"
][
0
][
"boxes"
].
append
(
generate_workflow_params
(
pipeline
,
input_path
,
config
))
page_number
=
1
...
...
@@ -181,13 +187,28 @@ def generate_sag_yaml(pipeline, template, outdir):
# for each tool in a step, create a box
for
tool
in
step
[
"tools"
]:
# predefined boxes
template
[
"App"
][
"pages"
][
page_number
][
"boxes"
].
append
(
generate_tool
(
tool
,
pipeline
,
step
[
"name"
]))
template
[
"App"
][
"pages"
][
page_number
][
"boxes"
].
append
(
generate_tool
(
tool
,
pipeline
,
step
[
"name"
]
,
input_path
,
config
))
page_number
+=
1
return
template
def
main
():
if
len
(
sys
.
argv
)
==
2
:
# generate
data
=
generate_sag_yaml
(
sys
.
argv
[
1
],
app_template
,
sys
.
argv
[
2
])
write_yaml
(
"workflows/"
+
sys
.
argv
[
1
]
+
"/sag.yaml"
,
data
)
elif
len
(
sys
.
argv
)
>
2
:
# generate
data
=
generate_sag_yaml
(
sys
.
argv
[
1
],
app_template
,
sys
.
argv
[
2
],
sys
.
argv
[
3
])
write_yaml
(
sys
.
argv
[
2
]
+
sys
.
argv
[
1
]
+
"/sag.yaml"
,
data
)
else
:
exit
(
"""Needs 2 argument minimum; """
);
if
__name__
==
"__main__"
:
main
()
# generate
data
=
generate_sag_yaml
(
sys
.
argv
[
1
],
app_template
,
sys
.
argv
[
2
])
write_yaml
(
"workflows/"
+
sys
.
argv
[
1
]
+
"/sag.yaml"
,
data
)
generate_workflow.py
View file @
be5135ea
...
...
@@ -20,10 +20,10 @@ STEPS = collections.OrderedDict()
TOOLS
=
list
()
# Import all yaml files (workflow and tools)
def
import_yaml_files
(
workflow_name
):
def
import_yaml_files
(
workflow_name
,
output_path
=
"./workflows/"
,
input_path
=
"./"
):
# Import workflow yaml
global
WORKFLOW_YAML
WORKFLOW_YAML
=
read_yaml
(
"workflows/"
+
workflow_name
+
"/"
+
workflow_name
+
".yaml"
)
WORKFLOW_YAML
=
read_yaml
(
output_path
+
workflow_name
+
"/"
+
workflow_name
+
".yaml"
)
# Import steps
global
STEPS
...
...
@@ -39,13 +39,13 @@ def import_yaml_files(workflow_name):
global
TOOLS_YAML
for
tool
in
step
[
"tools"
]:
TOOLS
.
append
(
tool
)
TOOLS_YAML
[
tool
]
=
read_yaml
(
"tools/"
+
tool
+
"/"
+
tool
+
".yaml"
)
TOOLS_YAML
[
tool
]
=
read_yaml
(
input_path
+
"tools/"
+
tool
+
"/"
+
tool
+
".yaml"
)
# Import base yaml (versions and citations for base container)
global
BASE_CITATIONS_YAML
BASE_CITATIONS_YAML
=
read_yaml
(
"Docker_base/citations.yaml"
)
BASE_CITATIONS_YAML
=
read_yaml
(
input_path
+
"Docker_base/citations.yaml"
)
global
BASE_VERSIONS_YAML
BASE_VERSIONS_YAML
=
read_yaml
(
"Docker_base/versions.yaml"
)
BASE_VERSIONS_YAML
=
read_yaml
(
input_path
+
"Docker_base/versions.yaml"
)
def
get_params_to_remove
():
# Get params to remove
...
...
@@ -60,7 +60,7 @@ def get_params_to_remove():
return
to_remove
# Generate tool parameters with default values
def
generate_tool_params
(
workflow_name
,
to_remove
):
def
generate_tool_params
(
workflow_name
,
to_remove
,
input_path
=
"./"
):
TOOL_PARAMS
=
collections
.
OrderedDict
()
# Global workflow parameters
...
...
@@ -71,7 +71,7 @@ def generate_tool_params(workflow_name, to_remove):
# input parameters
if
"input"
in
WORKFLOW_YAML
:
raw_inputs_yaml
=
read_yaml
(
"./
raw_inputs/"
+
WORKFLOW_YAML
[
"input"
]
+
".yaml"
)
raw_inputs_yaml
=
read_yaml
(
input_path
+
"
raw_inputs/"
+
WORKFLOW_YAML
[
"input"
]
+
".yaml"
)
for
option
in
raw_inputs_yaml
[
"options"
]:
TOOL_PARAMS
[
option
[
"name"
]]
=
option
[
"value"
]
if
(
"value"
in
option
.
keys
())
else
""
...
...
@@ -96,7 +96,7 @@ def generate_tool_params(workflow_name, to_remove):
return
TOOL_PARAMS
# Generate parameters info (usefull for shiny app)
def
generate_params_info
(
to_remove
):
def
generate_params_info
(
to_remove
,
input_path
=
"./"
):
PARAMS_INFO
=
collections
.
OrderedDict
()
for
option
in
WORKFLOW_YAML
[
"options"
]:
...
...
@@ -108,7 +108,7 @@ def generate_params_info(to_remove):
# input parameters
if
"input"
in
WORKFLOW_YAML
:
raw_inputs_yaml
=
read_yaml
(
"./
raw_inputs/"
+
WORKFLOW_YAML
[
"input"
]
+
".yaml"
)
raw_inputs_yaml
=
read_yaml
(
input_path
+
"
raw_inputs/"
+
WORKFLOW_YAML
[
"input"
]
+
".yaml"
)
for
option
in
raw_inputs_yaml
[
"options"
]:
PARAMS_INFO
[
option
[
"name"
]]
=
collections
.
OrderedDict
()
PARAMS_INFO
[
option
[
"name"
]][
"type"
]
=
option
[
"type"
]
...
...
@@ -227,10 +227,10 @@ def generate_prepare_report_outputs():
return
PREPARE_REPORT_OUTPUTS
def
generate_snake_rule
(
step_name
,
tool_name
):
def
generate_snake_rule
(
step_name
,
tool_name
,
input_path
=
"./"
):
RULE
=
"
\n
"
with
open
(
"tools/"
+
tool_name
+
"/"
+
tool_name
+
".rule.snakefile"
,
"r"
)
as
rule
:
with
open
(
input_path
+
"tools/"
+
tool_name
+
"/"
+
tool_name
+
".rule.snakefile"
,
"r"
)
as
rule
:
RULE
+=
rule
.
read
()
RULE
+=
"
\n
"
...
...
@@ -240,7 +240,7 @@ def generate_snake_rule(step_name,tool_name):
return
RULE
# Generate Snakefile
def
generate_snakefile
(
workflow_name
):
def
generate_snakefile
(
workflow_name
,
input_path
=
"./"
,
output_path
=
"./workflows/"
):
SNAKEFILE
=
""
RULES
=
""
...
...
@@ -255,27 +255,27 @@ def generate_snakefile(workflow_name):
SNAKEFILE
+=
"workdir: config['params']['results_dir']
\n
"
# Open template
with
open
(
"workflows/"
+
workflow_name
+
"/"
+
workflow_name
+
".snakefile"
,
"r"
)
as
template
:
with
open
(
output_path
+
workflow_name
+
"/"
+
workflow_name
+
".snakefile"
,
"r"
)
as
template
:
SNAKEFILE
+=
template
.
read
()
# import wildcards and imports
with
open
(
"workflows/global_imports.py"
,
"r"
)
as
imports
:
with
open
(
input_path
+
"workflows/global_imports.py"
,
"r"
)
as
imports
:
GLOBAL_IMPORTS
=
imports
.
read
()
SNAKEFILE
=
SNAKEFILE
.
replace
(
"{import global_imports}"
,
GLOBAL_IMPORTS
)
# Global functions
with
open
(
"workflows/global_functions.py"
)
as
global_functions
:
with
open
(
input_path
+
"workflows/global_functions.py"
)
as
global_functions
:
GLOBAL_FUNCTIONS
=
global_functions
.
read
()
SNAKEFILE
=
SNAKEFILE
.
replace
(
"{import global_functions}"
,
GLOBAL_FUNCTIONS
)
# Global rules
with
open
(
"workflows/global_rules.snakefile"
)
as
global_rules
:
with
open
(
input_path
+
"workflows/global_rules.snakefile"
)
as
global_rules
:
GLOBAL_RULES
=
global_rules
.
read
()
SNAKEFILE
=
SNAKEFILE
.
replace
(
"{import global_rules}"
,
GLOBAL_RULES
)
for
step_name
,
step_yaml
in
STEPS
.
items
():
for
tool
in
step_yaml
[
"tools"
]:
RULES
+=
generate_snake_rule
(
step_name
,
tool
)
RULES
+=
generate_snake_rule
(
step_name
,
tool
,
input_path
)
SNAKEFILE
=
SNAKEFILE
.
replace
(
"{import rules}"
,
RULES
)
...
...
@@ -290,10 +290,10 @@ def generate_snakefile(workflow_name):
return
SNAKEFILE
# Generate Dockerfile
def
generate_dockerfile
(
workflow_name
,
output_dir
,
local_config
):
def
generate_dockerfile
(
workflow_name
,
output_dir
,
local_config
,
input_file
=
"./"
):
DOCKERFILE
=
""
# Open template
with
open
(
"Dockerfile.template"
,
"r"
)
as
template
:
with
open
(
input_file
+
"Dockerfile.template"
,
"r"
)
as
template
:
DOCKERFILE
=
template
.
read
()
DOCKERFILE
+=
"
\n
"
...
...
@@ -301,10 +301,10 @@ def generate_dockerfile(workflow_name, output_dir, local_config):
files
=
""
if
"data"
in
WORKFLOW_YAML
.
keys
():
for
data
in
WORKFLOW_YAML
[
"data"
]:
file
=
"./
workflows/"
+
workflow_name
+
"/data/"
+
data
[
"name"
]
files
+=
"COPY
.
/files/data/"
+
data
[
"name"
]
+
" /"
+
data
[
"name"
]
+
"
\n
"
file
=
input_file
+
"
workflows/"
+
workflow_name
+
"/data/"
+
data
[
"name"
]
files
+=
"COPY
"
+
output_dir
+
workflow_name
+
"
/files/data/"
+
data
[
"name"
]
+
" /"
+
data
[
"name"
]
+
"
\n
"
# Copy files
copy_dir
(
file
,
output_dir
+
'/files/data/'
+
data
[
"name"
])
copy_dir
(
file
,
output_dir
+
workflow_name
+
'/files/data/'
+
data
[
"name"
])
# COPY files if data in tools yaml
# And import install commands
...
...
@@ -313,10 +313,10 @@ def generate_dockerfile(workflow_name, output_dir, local_config):
tools_installs
.
update
(
tool_yaml
[
"install"
])
if
"data"
in
tool_yaml
.
keys
():
for
data
in
tool_yaml
[
"data"
]:
file
=
"./
tools/"
+
tool_name
+
"/data/"
+
data
[
"name"
]
files
+=
"COPY
.
/files/data/"
+
data
[
"name"
]
+
" /"
+
data
[
"name"
]
+
"
\n
"
file
=
input_file
+
"
tools/"
+
tool_name
+
"/data/"
+
data
[
"name"
]
files
+=
"COPY
"
+
output_dir
+
workflow_name
+
"
/files/data/"
+
data
[
"name"
]
+
" /"
+
data
[
"name"
]
+
"
\n
"
# Copy files
shutil
.
copytree
(
file
,
output_dir
+
'/files/data/'
+
data
[
"name"
])
shutil
.
copytree
(
file
,
output_dir
+
workflow_name
+
'/files/data/'
+
data
[
"name"
])
files
+=
"COPY files /workflow
\n
"
files
+=
"COPY sagApp /sagApp
\n\n
"
...
...
@@ -358,13 +358,13 @@ def generate_dockerfile(workflow_name, output_dir, local_config):
return
DOCKERFILE
# Generate params yaml with all parameters and informations
def
generate_params_yaml
(
workflow_name
,
to_remove
):
def
generate_params_yaml
(
workflow_name
,
to_remove
,
input_path
=
"./"
):
PARAMS
=
collections
.
OrderedDict
()
PARAMS
[
"pipeline"
]
=
workflow_name
PARAMS
[
"params"
]
=
generate_tool_params
(
workflow_name
,
to_remove
)
PARAMS
[
"params"
]
=
generate_tool_params
(
workflow_name
,
to_remove
,
input_path
)
PARAMS
[
"steps"
]
=
list
(
STEPS
.
values
())
PARAMS
[
"params_info"
]
=
generate_params_info
(
to_remove
)
PARAMS
[
"params_info"
]
=
generate_params_info
(
to_remove
,
input_path
)
PARAMS
[
"prepare_report_scripts"
]
=
list
(
generate_prepare_report_scripts
()[
"SCRIPTS"
])
PARAMS
[
"prepare_report_outputs"
]
=
generate_prepare_report_outputs
()
PARAMS
[
"outputs"
]
=
generate_outputs
()
...
...
@@ -373,37 +373,37 @@ def generate_params_yaml(workflow_name, to_remove):
return
PARAMS
def
generate_pipeline_files
(
workflow_name
,
output_dir
,
local_config
=
"default"
):
def
generate_pipeline_files
(
workflow_name
,
output_dir
,
local_config
=
"default"
,
input_path
=
"./"
):
# Create output directory if needed
if
not
os
.
path
.
isdir
(
output_dir
):
os
.
mkdir
(
output_dir
)
if
not
os
.
path
.
isdir
(
output_dir
+
"/files"
):
os
.
mkdir
(
output_dir
+
"/files"
)
if
not
os
.
path
.
isdir
(
output_dir
+
workflow_name
):
os
.
mkdir
(
output_dir
+
workflow_name
)
if
not
os
.
path
.
isdir
(
output_dir
+
workflow_name
+
"/files"
):
os
.
mkdir
(
output_dir
+
workflow_name
+
"/files"
)
### Import yaml files and generate all output files
import_yaml_files
(
workflow_name
)
import_yaml_files
(
workflow_name
,
output_dir
,
input_path
)
write_yaml
(
output_dir
+
"/files/citations.yaml"
,
generate_citations
(
workflow_name
,
output_dir
))
write_yaml
(
output_dir
+
workflow_name
+
"/files/citations.yaml"
,
generate_citations
(
workflow_name
,
output_dir
))
write_yaml
(
output_dir
+
"/files/versions.yaml"
,
generate_versions
(
workflow_name
,
output_dir
))
write_yaml
(
output_dir
+
workflow_name
+
"/files/versions.yaml"
,
generate_versions
(
workflow_name
,
output_dir
))
with
open
(
output_dir
+
"/files/Snakefile"
,
"w"
)
as
out
:
out
.
write
(
generate_snakefile
(
workflow_name
))
with
open
(
output_dir
+
workflow_name
+
"/files/Snakefile"
,
"w"
)
as
out
:
out
.
write
(
generate_snakefile
(
workflow_name
,
input_path
,
output_dir
))
with
open
(
output_dir
+
"/Dockerfile"
,
"w"
)
as
out
:
out
.
write
(
generate_dockerfile
(
workflow_name
,
output_dir
,
local_config
))
with
open
(
output_dir
+
workflow_name
+
"/Dockerfile"
,
"w"
)
as
out
:
out
.
write
(
generate_dockerfile
(
workflow_name
,
output_dir
,
local_config
,
input_path
))
write_yaml
(
output_dir
+
"/files/params.total.yml"
,
generate_params_yaml
(
workflow_name
,
get_params_to_remove
()))
###
write_yaml
(
output_dir
+
workflow_name
+
"/files/params.total.yml"
,
generate_params_yaml
(
workflow_name
,
get_params_to_remove
()
,
input_path
))
###
1
### Copy scripts and other files
NB_SCRIPTS
=
len
(
list
(
generate_prepare_report_scripts
()[
"SCRIPTS"
]))
+
len
(
list
(
generate_tools_scripts
()))
if
not
os
.
path
.
isdir
(
output_dir
+
"/files/scripts"
):
os
.
mkdir
(
output_dir
+
"/files/scripts"
)
if
not
os
.
path
.
isdir
(
output_dir
+
workflow_name
+
"/files/scripts"
):
os
.
mkdir
(
output_dir
+
workflow_name
+
"/files/scripts"
)
if
(
NB_SCRIPTS
==
0
):
with
open
(
output_dir
+
"/files/scripts/.gitignore"
,
'w'
)
as
gitignore
:
with
open
(
output_dir
+
workflow_name
+
"/files/scripts/.gitignore"
,
'w'
)
as
gitignore
:
to_write
=
"""# gitignore to force creation of scripts dir
!.gitignore
"""
...
...
@@ -415,18 +415,18 @@ def generate_pipeline_files(workflow_name, output_dir, local_config="default"):
tool
=
script_dict
[
"tool"
]
script
=
script_dict
[
"script"
]
step
=
script_dict
[
"step"
]
returned_value
=
subprocess
.
call
(
"sed 's/<step_name>/"
+
step
+
"/g'
./
tools/"
+
tool
+
"/"
+
script
+
" > "
+
output_dir
+
"/files/scripts/"
+
step
+
"__"
+
script
,
shell
=
True
)
returned_value
=
subprocess
.
call
(
"sed 's/<step_name>/"
+
step
+
"/g'
"
+
input_path
+
"
tools/"
+
tool
+
"/"
+
script
+
" > "
+
output_dir
+
"/files/scripts/"
+
step
+
"__"
+
script
,
shell
=
True
)
# search and replace <step_name>
#shutil.copy("./tools/" + tool + "/" + script, output_dir + "/files/scripts")
for
tool
,
script
in
generate_tools_scripts
().
items
():
shutil
.
copy
(
"./
tools/"
+
tool
+
"/"
+
script
,
output_dir
+
"/files/scripts"
)
shutil
.
copy
(
input_path
+
"
tools/"
+
tool
+
"/"
+
script
,
output_dir
+
workflow_name
+
"/files/scripts"
)
shutil
.
copy
(
"./
generate_multiqc_config.py"
,
output_dir
+
"/files"
)
shutil
.
copy
(
"./
tools.py"
,
output_dir
+
"/files"
)
shutil
.
copy
(
input_path
+
"
generate_multiqc_config.py"
,
output_dir
+
workflow_name
+
"/files"
)
shutil
.
copy
(
input_path
+
"
tools.py"
,
output_dir
+
workflow_name
+
"/files"
)
if
"input"
in
WORKFLOW_YAML
:
shutil
.
copy
(
"./
raw_inputs/"
+
WORKFLOW_YAML
[
"input"
]
+
".py"
,
output_dir
+
"/files"
)
shutil
.
copy
(
input_path
+
"
raw_inputs/"
+
WORKFLOW_YAML
[
"input"
]
+
".py"
,
output_dir
+
workflow_name
+
"/files"
)
if
(
local_config
!=
"default"
):
local_config_data
=
read_yaml
(
local_config
)
...
...
@@ -435,8 +435,8 @@ def generate_pipeline_files(workflow_name, output_dir, local_config="default"):
qsub_file
.
writelines
([
a
.
replace
(
"{workflow_name}"
,
WORKFLOW_YAML
[
"docker_name"
])
+
'
\n
'
for
a
in
local_config_data
[
"qsub"
]]
)
# deploys scripts
for
script
in
os
.
listdir
(
"deploys"
):
with
open
(
"deploys/"
+
script
,
"r"
)
as
infile
:
for
script
in
os
.
listdir
(
input_path
+
"deploys"
):
with
open
(
input_path
+
"deploys/"
+
script
,
"r"
)
as
infile
:
lines
=
infile
.
readlines
()
with
open
(
output_dir
+
"/"
+
script
,
"w"
)
as
outfile
:
outfile
.
writelines
([
a
.
replace
(
"{workflow_name}"
,
WORKFLOW_YAML
[
"docker_name"
])
for
a
in
lines
]
)
...
...
@@ -445,7 +445,9 @@ def generate_pipeline_files(workflow_name, output_dir, local_config="default"):
###
def
main
():
if
len
(
sys
.
argv
)
>
3
:
if
len
(
sys
.
argv
)
==
5
:
generate_pipeline_files
(
sys
.
argv
[
1
],
sys
.
argv
[
2
],
sys
.
argv
[
3
],
sys
.
argv
[
4
])
elif
len
(
sys
.
argv
)
>
3
:
generate_pipeline_files
(
sys
.
argv
[
1
],
sys
.
argv
[
2
],
sys
.
argv
[
3
])
elif
len
(
sys
.
argv
)
==
3
:
generate_pipeline_files
(
sys
.
argv
[
1
],
sys
.
argv
[
2
])
...
...
generate_workflow_snakefile.py
View file @
be5135ea
...
...
@@ -6,15 +6,26 @@
from
tools
import
read_yaml
import
sys
import
tempfile
import
shutil
import
os
PATH_OUTPUT
=
"./"
PATH_INPUT
=
"./workflows/"
dirpath
=
tempfile
.
mkdtemp
()
def
generate
(
name
):
DEFAULT_PATH_OUTPUT
=
"./"
DEFAULT_PATH_INPUT
=
"./workflows/"
def
generate
(
name
,
path_yaml
=
""
,
path_input
=
DEFAULT_PATH_INPUT
,
path_output
=
DEFAULT_PATH_OUTPUT
):
result
=
"# File generate with generate_workflow_snakefile.py
\n\n
"
yaml
=
read_yaml
(
PATH_INPUT
+
name
+
"/"
+
name
+
".yaml"
)
yaml
=
None
if
(
path_yaml
==
""
):
yaml
=
read_yaml
(
path_input
+
name
+
"/"
+
name
+
".yaml"
)
else
:
#print(path_yaml)
yaml
=
read_yaml
(
path_yaml
)
# imports
...
...
@@ -29,7 +40,7 @@ def generate(name):
result
+=
"
\n
"
if
"input"
in
yaml
:
raw_inputs_yaml
=
read_yaml
(
"
.
/raw_inputs/"
+
yaml
[
"input"
]
+
".yaml"
)
raw_inputs_yaml
=
read_yaml
(
path_input
+
"/raw_inputs/"
+
yaml
[
"input"
]
+
".yaml"
)
result
+=
"# raw_inputs function call
\n
"
result
+=
yaml
[
"input"
]
+
" = "
+
raw_inputs_yaml
[
"function_call"
]
+
"
\n
"
result
+=
"config.update("
+
yaml
[
"input"
]
+
")
\n
"
...
...
@@ -136,20 +147,40 @@ def generate(name):
#print(result)
path_snakefile
=
PATH_INPUT
+
"/"
+
name
+
"/"
+
name
+
".snakefile"
path_snakefile
=
path_output
+
name
+
"/"
+
name
+
".snakefile"
#print(path_snakefile)
f
=
open
(
path_snakefile
,
"w"
)
f
.
write
(
result
)
f
.
close
()
def
create_directory
(
output_path
):
access_rights
=
0o755
try
:
os
.
mkdir
(
output_path
,
access_rights
)
except
OSError
:
print
(
"Creation of the directory %s failed"
%
output_path
)
def
main
():
if
len
(
sys
.
argv
)
==
2
:
generate
(
sys
.
argv
[
1
])
elif
len
(
sys
.
argv
)
>
2
:
name
=
sys
.
argv
[
1
]
output_path
=
sys
.
argv
[
2
]
path_yaml
=
output_path
+
name
+
"/"
+
name
+
".yaml"
input_path
=
sys
.
argv
[
3
]
generate
(
name
,
path_yaml
,
input_path
,
output_path
)
else
:
exit
(
"""Needs 1 argument: workflow_name
exit
(
"""Needs 1 argument
minimum
: workflow_name
Usage: ./generate_workflow_snakefile.py workflow_name """
)
...
...
output/.gitignore
0 → 100644
View file @
be5135ea
# Ignore everything in this directory
*
# Except this file
!.gitignore
tools.py
View file @
be5135ea
...
...
@@ -3,7 +3,7 @@ import shutil
def
read_yaml
(
filepath
):
try
:
with
open
(
filepath
,
'r'
)
as
file
:
with
open
(
filepath
,
'r
b
'
)
as
file
:
data
=
yaml
.
load
(
file
,
Loader
=
yaml
.
FullLoader
)
return
data
except
IOError
as
e
:
...
...
tools/Picard_MarkDuplicates/Picard_MarkDuplicates.yaml
View file @
be5135ea
...
...
@@ -74,7 +74,7 @@
"
Picard
Toolkit.
2019.
Broad
Institute,
GitHub
Repository.
http://broadinstitute.github.io/picard/;
Broad
Institute"
],
samtools
:
[
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
–
2079,
https://doi.org/10.1093/bioinformatics/btp352"
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
-
2079,
https://doi.org/10.1093/bioinformatics/btp352"
]
}
}
tools/bcftools_mpileup/bcftools_mpileup.yaml
View file @
be5135ea
...
...
@@ -79,10 +79,10 @@
},
citations
:
{
bcftools
:
[
"
Heng
Li,
A
statistical
framework
for
SNP
calling,
mutation
discovery,
association
mapping
and
population
genetical
parameter
estimation
from
sequencing
data,
Bioinformatics,
Volume
27,
Issue
21,
1
November
2011,
Pages
2987
–
2993,
https://doi.org/10.1093/bioinformatics/btr509"
"
Heng
Li,
A
statistical
framework
for
SNP
calling,
mutation
discovery,
association
mapping
and
population
genetical
parameter
estimation
from
sequencing
data,
Bioinformatics,
Volume
27,
Issue
21,
1
November
2011,
Pages
2987
-
2993,
https://doi.org/10.1093/bioinformatics/btr509"
],
tabix
:
[
"
Heng
Li,
Tabix:
fast
retrieval
of
sequence
features
from
generic
TAB-delimited
files,
Bioinformatics,
Volume
27,
Issue
5,
1
March
2011,
Pages
718
–
719,
https://doi.org/10.1093/bioinformatics/btq671"
"
Heng
Li,
Tabix:
fast
retrieval
of
sequence
features
from
generic
TAB-delimited
files,
Bioinformatics,
Volume
27,
Issue
5,
1
March
2011,
Pages
718
-
719,
https://doi.org/10.1093/bioinformatics/btq671"
]
}
}
tools/bowtie/bowtie.yaml
View file @
be5135ea
...
...
@@ -194,7 +194,7 @@
"
Langmead,
B.,
Trapnell,
C.,
Pop,
M.
et
al.
Ultrafast
and
memory-efficient
alignment
of
short
DNA
sequences
to
the
human
genome.
Genome
Biol
10,
R25
(2009).
https://doi.org/10.1186/gb-2009-10-3-r25"
],
samtools
:
[
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
–
2079,
https://doi.org/10.1093/bioinformatics/btp352"
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
-
2079,
https://doi.org/10.1093/bioinformatics/btp352"
]
}
}
\ No newline at end of file
tools/bowtie2/bowtie2.yaml
View file @
be5135ea
...
...
@@ -143,10 +143,10 @@
},
citations
:
{
bowtie2
:
[
"
Langmead,
B.,
Salzberg,
S.
Fast
gapped-read
alignment
with
Bowtie
2.
Nat
Methods
9,
357
–
359
(2012).
https://doi.org/10.1038/nmeth.1923"
"
Langmead,
B.,
Salzberg,
S.
Fast
gapped-read
alignment
with
Bowtie
2.
Nat
Methods
9,
357
-
359
(2012).
https://doi.org/10.1038/nmeth.1923"
],
samtools
:
[
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
–
2079,
https://doi.org/10.1093/bioinformatics/btp352"
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
-
2079,
https://doi.org/10.1093/bioinformatics/btp352"
]
}
}
\ No newline at end of file
tools/bwa-mem2/bwa-mem2.yaml
View file @
be5135ea
...
...
@@ -120,7 +120,7 @@
"
Vasimuddin
Md,
Sanchit
Misra,
Heng
Li,
Srinivas
Aluru.
Efficient
Architecture-Aware
Acceleration
of
BWA-MEM
for
Multicore
Systems.
IEEE
Parallel
and
Distributed
Processing
Symposium
(IPDPS),
2019"
],
samtools
:
[
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
–
2079,
https://doi.org/10.1093/bioinformatics/btp352"
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
-
2079,
https://doi.org/10.1093/bioinformatics/btp352"
]
}
}
\ No newline at end of file
tools/bwa/bwa.yaml
View file @
be5135ea
...
...
@@ -155,13 +155,13 @@
},
citations
:
{
bwa
:
[
"
Heng
Li,
Richard
Durbin,
Fast
and
accurate
short
read
alignment
with
Burrows
–
Wheeler
transform,
Bioinformatics,
Volume
25,
Issue
14,
15
July
2009,
Pages
1754
–
1760,
https://doi.org/10.1093/bioinformatics/btp324"
"
Heng
Li,
Richard
Durbin,
Fast
and
accurate
short
read
alignment
with
Burrows
-
Wheeler
transform,
Bioinformatics,
Volume
25,
Issue
14,
15
July
2009,
Pages
1754
-
1760,
https://doi.org/10.1093/bioinformatics/btp324"
],
bwa-mem
:
[
"
Li
H.
(2013)
Aligning
sequence
reads,
clone
sequences
and
assembly
contigs
with
BWA-MEM.
arXiv:1303.3997v1
[q-bio.GN]"
],
samtools
:
[
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
–
2079,
https://doi.org/10.1093/bioinformatics/btp352"
"
Heng
Li,
Bob
Handsaker,
Alec
Wysoker,
Tim
Fennell,
Jue
Ruan,
Nils
Homer,
Gabor
Marth,
Goncalo
Abecasis,
Richard
Durbin,
1000
Genome
Project
Data
Processing
Subgroup,
The
Sequence
Alignment/Map
format
and
SAMtools,
Bioinformatics,
Volume
25,
Issue
16,
15
August
2009,
Pages
2078
-
2079,
https://doi.org/10.1093/bioinformatics/btp352"
]
}
}
\ No newline at end of file
tools/compare_vcfs_isec/compare_vcfs_isec.yaml
View file @
be5135ea
...
...
@@ -56,13 +56,13 @@
},
citations
:
{
bcftools
:
[
"
Heng
Li,
A
statistical
framework
for
SNP
calling,
mutation
discovery,
association
mapping
and
population
genetical
parameter
estimation
from
sequencing
data,
Bioinformatics,
Volume
27,
Issue
21,
1
November
2011,
Pages
2987
–
2993,
https://doi.org/10.1093/bioinformatics/btr509"
"
Heng
Li,
A
statistical
framework
for
SNP
calling,
mutation
discovery,
association
mapping
and
population
genetical
parameter
estimation
from
sequencing
data,
Bioinformatics,
Volume
27,
Issue
21,
1
November
2011,
Pages
2987
-
2993,
https://doi.org/10.1093/bioinformatics/btr509"
],
vcftools
:
[
"
The
Variant
Call
Format
and
VCFtools,
Petr
Danecek,
Adam
Auton,
Goncalo
Abecasis,
Cornelis
A.
Albers,
Eric
Banks,
Mark
A.
DePristo,
Robert
Handsaker,
Gerton
Lunter,
Gabor
Marth,
Stephen
T.
Sherry,
Gilean
McVean,
Richard
Durbin
and
1000
Genomes
Project
Analysis
Group,
Bioinformatics,
2011
http://dx.doi.org/10.1093/bioinformatics/btr330"
],
upsetr
:
[
"
Alexander
Lex,
Nils
Gehlenborg,
Hendrik
Strobelt,
Romain
Vuillemot,
Hanspeter
Pfister,
UpSet:
Visualization
of
Intersecting
Sets,
IEEE
Transactions
on
Visualization
and
Computer
Graphics
(InfoVis
'14),
vol.
20,
no.
12,
pp.
1983
–
1992,
2014.doi:
https://doi.org/10.1109/TVCG.2014.2346248"
"
Alexander
Lex,
Nils
Gehlenborg,
Hendrik
Strobelt,
Romain
Vuillemot,
Hanspeter
Pfister,
UpSet:
Visualization
of
Intersecting
Sets,
IEEE
Transactions
on
Visualization
and
Computer
Graphics
(InfoVis
'14),
vol.
20,
no.
12,
pp.
1983
-
1992,
2014.doi:
https://doi.org/10.1109/TVCG.2014.2346248"
]
}
}
tools/cstacks/cstacks.yaml
View file @
be5135ea
...
...
@@ -71,7 +71,7 @@
},
citations
:
{
stacks
:
[
"
Rochette,
NC,
Rivera‐Colón,
AG,
Catchen,
JM.
Stacks
2:
Analytical
methods
for
paired‐end
sequencing
improve
RADseq‐based
population
genomics.