Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 57 additions & 39 deletions tools/cwlprov_to_crate/cwlprov_to_crate.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,9 @@ def __init__(self, root, workflow_name=None, license=None):
self.cwl_defs = get_workflow(self.wf_path)
self.step_maps = self._get_step_maps(self.cwl_defs)
self.param_map = {}
self.prov = Provenance(ResearchObject(BDBag(str(root))))
self.workflow_run = self.prov.activity()
self.ro = ResearchObject(BDBag(str(root)))
self.with_prov = set(str(_) for _ in self.ro.resources_with_provenance())
self.workflow_run = Provenance(self.ro).activity()
self.roc_engine_run = None
# avoid duplicates - not handled by ro-crate-py, see
# https://github.com/ResearchObject/ro-crate-py/issues/132
Expand All @@ -185,28 +186,29 @@ def _get_step_maps(cwl_defs):
rval[k][f] = {"tool": get_fragment(s.run), "pos": pos_map[f]}
return rval

def _resolve_plan(self, job_qname):
plan = self.prov.entity(job_qname)
def _resolve_plan(self, activity):
job_qname = activity.plan()
plan = activity.provenance.entity(job_qname)
if not plan:
m = SCATTER_JOB_PATTERN.match(str(job_qname))
if m:
plan = self.prov.entity(m.groups()[0])
plan = activity.provenance.entity(m.groups()[0])
return plan

def get_members(self, entity):
membership = self.prov.record_with_attr(
membership = entity.provenance.record_with_attr(
prov.model.ProvMembership, entity.id, prov.model.PROV_ATTR_COLLECTION
)
member_ids = (_.get_attribute(prov.model.PROV_ATTR_ENTITY) for _ in membership)
return (self.prov.entity(first(_)) for _ in member_ids)
return (entity.provenance.entity(first(_)) for _ in member_ids)

def get_dict(self, entity):
d = {}
for qname in entity.record.get_attribute("prov:hadDictionaryMember"):
kvp = self.prov.entity(qname)
kvp = entity.provenance.entity(qname)
key = first(kvp.record.get_attribute("prov:pairKey"))
entity_id = first(kvp.record.get_attribute("prov:pairEntity"))
d[key] = self.prov.entity(entity_id)
d[key] = entity.provenance.entity(entity_id)
return d

def build(self):
Expand All @@ -233,39 +235,48 @@ def add_workflow(self, crate):
workflow["input"] = self.add_params(crate, cwl_workflow.inputs)
workflow["output"] = self.add_params(crate, cwl_workflow.outputs)
for s in getattr(cwl_workflow, "steps", []):
step_fragment = get_fragment(s.id)
step_id = f"{workflow.id}#{step_fragment}"
pos = self.step_maps[workflow.id][step_fragment]["pos"]
step = crate.add(ContextEntity(crate, step_id, properties={
"@type": "HowToStep",
"position": str(pos),
}))
tool = self.add_tool(crate, workflow, s.run)
step["workExample"] = tool
workflow.append_to("step", step)
self.add_step(crate, workflow, s)
return workflow

def add_step(self, crate, workflow, cwl_step):
step_fragment = get_fragment(cwl_step.id)
step_id = f"{self.wf_path.name}#{step_fragment}"
pos = self.step_maps[get_fragment(workflow.id)][step_fragment]["pos"]
step = crate.add(ContextEntity(crate, step_id, properties={
"@type": "HowToStep",
"position": str(pos),
}))
tool = self.add_tool(crate, workflow, cwl_step.run)
step["workExample"] = tool
workflow.append_to("step", step)

def add_tool(self, crate, workflow, cwl_tool):
if isinstance(cwl_tool, str):
tool_fragment = get_fragment(cwl_tool)
cwl_tool = self.cwl_defs[tool_fragment]
else:
tool_fragment = get_fragment(cwl_tool.id)
tool_id = f"{workflow.id}#{tool_fragment}"
if hasattr(cwl_tool, "expression"):
raise RuntimeError("ExpressionTool not supported yet")
tool_id = f"{self.wf_path.name}#{tool_fragment}"
tool = crate.dereference(tool_id)
if tool:
return tool
if hasattr(cwl_tool, "steps"):
raise RuntimeError("subworkflows not supported yet")
if hasattr(cwl_tool, "expression"):
raise RuntimeError("ExpressionTool not supported yet")
properties = {"name": tool_fragment}
if cwl_tool.doc:
properties["description"] = cwl_tool.doc
tool = crate.add(SoftwareApplication(crate, tool_id, properties=properties))
if hasattr(cwl_tool, "steps"):
properties["@type"] = ["SoftwareSourceCode", "ComputationalWorkflow", "HowTo"]
else:
properties["@type"] = "SoftwareApplication"
tool = crate.add(ContextEntity(crate, tool_id, properties=properties))
tool["input"] = self.add_params(crate, cwl_tool.inputs)
tool["output"] = self.add_params(crate, cwl_tool.outputs)
workflow.append_to("hasPart", tool)
if hasattr(cwl_tool, "steps"):
tool["programmingLanguage"] = workflow["programmingLanguage"]
for s in getattr(cwl_tool, "steps", []):
self.add_step(crate, tool, s)
return tool

def add_params(self, crate, cwl_params):
Expand Down Expand Up @@ -296,13 +307,13 @@ def add_engine_run(self, crate):
def add_agent(self, crate, roc_engine_run, engine):
delegate = engine.start().starter_activity()
try:
delegation = next(self.prov.record_with_attr(
delegation = next(engine.provenance.record_with_attr(
prov.model.ProvDelegation, delegate.id, prov.model.PROV_ATTR_DELEGATE
))
except StopIteration:
return
responsible = delegation.get_attribute(prov.model.PROV_ATTR_RESPONSIBLE)
agent = sum((self.prov.prov_doc.get_record(_) for _ in responsible), [])
agent = sum((engine.provenance.prov_doc.get_record(_) for _ in responsible), [])
for a in agent:
if "prov:Person" not in set(str(_) for _ in a.get_asserted_types()):
continue
Expand All @@ -320,23 +331,25 @@ def add_action(self, crate, activity, parent_instrument=None):
action = crate.add(ContextEntity(crate, properties={
"@type": "CreateAction",
"name": activity.label,
"startTime": activity.start().time.isoformat(),
"endTime": activity.end().time.isoformat(),
}))
job_qname = activity.plan()
plan = self._resolve_plan(job_qname)
plan = self._resolve_plan(activity)
plan_tag = plan.id.localpart
if str(activity.type) == "wfprov:WorkflowRun":
if plan_tag != "main":
raise RuntimeError("sub-workflows not supported yet")
if plan_tag == "main":
assert str(activity.type) == "wfprov:WorkflowRun"
instrument = workflow
self.roc_engine_run["result"] = action
crate.root_dataset["mentions"] = [action]

def to_wf_p(k):
return k
else:
tool_name = self.step_maps[parent_instrument.id][plan_tag]["tool"]
parent_instrument_fragment = get_fragment(parent_instrument.id)
if parent_instrument_fragment != WORKFLOW_BASENAME:
parts = plan_tag.split("/", 1)
if parts[0] == "main":
parts[0] = parent_instrument_fragment
plan_tag = "/".join(parts)
tool_name = self.step_maps[parent_instrument_fragment][plan_tag]["tool"]
instrument = crate.dereference(f"{workflow.id}#{tool_name}")
control_action = self.control_actions.get(plan_tag)
if not control_action:
Expand All @@ -349,10 +362,15 @@ def to_wf_p(k):
self.roc_engine_run.append_to("object", control_action, compact=True)
self.control_actions[plan_tag] = control_action
control_action.append_to("object", action, compact=True)
if activity.uri in self.with_prov:
nested_prov = Provenance(self.ro, activity.uri)
activity = nested_prov.activity()

def to_wf_p(k):
return k.replace(job_qname.localpart, tool_name)
return k.replace(activity.plan().localpart, tool_name)
action["instrument"] = instrument
action["startTime"] = activity.start().time.isoformat()
action["endTime"] = activity.end().time.isoformat()
action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage")
action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation")
for job in activity.steps():
Expand All @@ -363,8 +381,8 @@ def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
for rel in getattr(activity, ptype)():
k = get_relative_uri(rel.role.uri)
if str(activity.type) == "wfprov:WorkflowRun":
# workflow output roles have a phantom "primary" step (cwltool bug?)
if ptype == "generation" and get_step_part(k) == "primary":
# workflow output roles have a phantom step part
if ptype == "generation":
parts = k.split("/", 2)
k = parts[0] + "/" + parts[2]
# In the case of a single tool run, cwltool reports one WorkflowRun
Expand Down Expand Up @@ -423,7 +441,7 @@ def convert_param(self, prov_param, crate):
part = crate.add_file(self.root / path, dest)
action_p.append_to("hasPart", part)
return action_p
if prov_param.value:
if prov_param.value is not None:
return str(prov_param.value)
if "prov:Dictionary" in type_names:
return dict(
Expand Down
3 changes: 3 additions & 0 deletions tools/cwlprov_to_crate/revsortlcase/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# revsortlcase

Example of workflow that uses other workflows to implement steps (subworkflows, or nested workflows). The first step runs a simplified implementation of the revsort workflow used in the [CWLProv RO Example](https://github.com/common-workflow-language/cwlprov/tree/ce3f469745f4c8a2c029f872d522a4c57fba947b/examples/revsort-run-1); the second step runs a tool that changes text to lowercase.
14 changes: 14 additions & 0 deletions tools/cwlprov_to_crate/revsortlcase/lcasetool.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class: CommandLineTool
cwlVersion: v1.0

baseCommand: ["awk", "{print tolower($0)}"]

inputs:
lcase_in:
type: stdin
outputs:
lcase_out:
type: File
outputBinding:
glob: lcase_out.txt
stdout: lcase_out.txt
26 changes: 26 additions & 0 deletions tools/cwlprov_to_crate/revsortlcase/revsort.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
class: Workflow
cwlVersion: v1.0

inputs:
revsort_in:
type: File
reverse_sort:
type: boolean
default: false
outputs:
revsort_out:
type: File
outputSource: sorted/sort_out

steps:
rev:
in:
rev_in: revsort_in
out: [rev_out]
run: revtool.cwl
sorted:
in:
sort_in: rev/rev_out
reverse: reverse_sort
out: [sort_out]
run: sorttool.cwl
5 changes: 5 additions & 0 deletions tools/cwlprov_to_crate/revsortlcase/revsortlcase-job.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
revsortlcase_in:
class: File
location: revsortlcase_in.txt
descending_sort:
true
29 changes: 29 additions & 0 deletions tools/cwlprov_to_crate/revsortlcase/revsortlcase.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
class: Workflow
cwlVersion: v1.0

requirements:
SubworkflowFeatureRequirement: {}

inputs:
revsortlcase_in:
type: File
descending_sort:
type: boolean
default: false
outputs:
revsortlcase_out:
type: File
outputSource: lcase/lcase_out

steps:
revsort:
in:
revsort_in: revsortlcase_in
reverse_sort: descending_sort
out: [revsort_out]
run: revsort.cwl
lcase:
in:
lcase_in: revsort/revsort_out
out: [lcase_out]
run: lcasetool.cwl
4 changes: 4 additions & 0 deletions tools/cwlprov_to_crate/revsortlcase/revsortlcase_in.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Hello
World
Hello
CWL
15 changes: 15 additions & 0 deletions tools/cwlprov_to_crate/revsortlcase/revtool.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
class: CommandLineTool
cwlVersion: v1.0

baseCommand: rev

inputs:
rev_in:
type: File
inputBinding: {}
outputs:
rev_out:
type: File
outputBinding:
glob: rev_out.txt
stdout: rev_out.txt
21 changes: 21 additions & 0 deletions tools/cwlprov_to_crate/revsortlcase/sorttool.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
class: CommandLineTool
cwlVersion: v1.0

baseCommand: sort

inputs:
reverse:
type: boolean
inputBinding:
position: 1
prefix: "--reverse"
sort_in:
type: File
inputBinding:
position: 2
outputs:
sort_out:
type: File
outputBinding:
glob: sort_out.txt
stdout: sort_out.txt
25 changes: 25 additions & 0 deletions tools/cwlprov_to_crate/test/data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ License: [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
* © 2018 Software Freedom Conservancy (SFC) https://sfconservancy.org/


## revsortlcase-run-1

Nested workflows example. The first step uses a simplified implementation of [revsort](#revsort-run-1) as a subworkflow, while the second calls a tool that converts all lines to lower case.


## exome-alignment-packed.cwl

Packed (`cwltool --pack`) version of the [exome alignment workflow](https://github.com/inab/ipc_workflows/blob/fefede132f217184a25767fc4f42e2ae4606ff25/exome/alignment/workflow.cwl) from [inab/ipc_workflows@fefede1](https://github.com/inab/ipc_workflows/tree/fefede132f217184a25767fc4f42e2ae4606ff25), with step order altered to make it more distant from the topological order.
Expand All @@ -25,3 +30,23 @@ WorkflowHub entry: https://workflowhub.eu/workflows/239
Copy of `prov_data_annotations/example2/ro_old` from [RenskeW/cwlprov-provenance](https://github.com/RenskeW/cwlprov-provenance/tree/f5dd87a950eeaf7f96bd39dc218164832ff3cbea/prov_data_annotations/example2/ro_old).

License: [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)


## grepucase-run-1

For each file in the input directory, search for the specified pattern, convert matching lines to upper case and write results to a file in the output directory. Used to test support for directory I/O.


## echo-scatter-run-1

Write all strings in the input array to stdout. Used to test support for scatter jobs.


## no-input-run-1

Output a predefined integer. Used to test support for tools / workflows that take no input.


## type-zoo-run-1

Build a fake command line and write it to the output file (all parameter settings are passed to the echo tool). Used to test support for various parameter types.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Bag-Software-Agent: cwltool 3.1.20220628170238
BagIt-Profile-Identifier: https://w3id.org/ro/bagit/profile
Bagging-Date: 2022-09-22
External-Description: Research Object of CWL workflow run
External-Identifier: arcp://uuid,ef2d93e1-543a-499a-b22b-ba5757b91398/
Payload-Oxum: 88.4
2 changes: 2 additions & 0 deletions tools/cwlprov_to_crate/test/data/revsortlcase-run-1/bagit.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
BagIt-Version: 0.97
Tag-File-Character-Encoding: UTF-8
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
olleH
olleH
dlroW
LWC
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
olleH
dlroW
olleH
LWC
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Hello
World
Hello
CWL
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
olleh
olleh
dlrow
lwc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
7cb1a4da14ba3e91b983b30e7689e3902bcd2034 data/7c/7cb1a4da14ba3e91b983b30e7689e3902bcd2034
542758e6e55bb880c05e2de68a3897bfab37c990 data/54/542758e6e55bb880c05e2de68a3897bfab37c990
134bede4fd3827851f861713ed34168b6efb2806 data/13/134bede4fd3827851f861713ed34168b6efb2806
aaf167286572f8b5d5c592b94aff678d0997947f data/aa/aaf167286572f8b5d5c592b94aff678d0997947f
Loading