Skip to content

Commit 58cf227

Browse files
authored
Merge pull request #151 from AutomatedProcessImprovement/146-generate-schema-for-yaml-configuration
146 generate schema for yaml configuration
2 parents 45f1edb + 573f6eb commit 58cf227

24 files changed

+255
-206
lines changed

poetry.lock

Lines changed: 151 additions & 125 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "simod"
7-
version = "3.6.10"
7+
version = "3.6.11"
88
authors = [
99
"Ihar Suvorau <ihar.suvorau@gmail.com>",
1010
"David Chapela <david.chapela@ut.ee>",
@@ -18,14 +18,13 @@ packages = [{ include = "simod", from = "src" }]
1818
python = "^3.9,<3.12"
1919
click = "^8.1.3"
2020
hyperopt = "^0.2.7"
21-
jellyfish = "^0.11"
2221
lxml = "^4.9.1"
2322
matplotlib = "^3.6.0"
2423
networkx = "^3.1"
2524
numpy = "^1.23.4"
26-
pandas = "^2"
25+
pandas = "^2.1.0"
2726
pendulum = "^2.1.2"
28-
pydantic = "^1.10.3"
27+
pydantic = "^2.3.0"
2928
python-dotenv = "^1.0.0"
3029
python-multipart = "^0.0.6"
3130
pytz = "^2023.3"

resources/config/complete_configuration.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ common:
1414
start_time: "start_time"
1515
end_time: "end_time"
1616
# Use this process model and skip its discovery
17-
model_path: ../models/LoanApp_simplified.bpmn
17+
process_model_path: ../models/LoanApp_simplified.bpmn
1818
# Event log to evaluate the discovered BPS model with
1919
test_log_path: ../event_logs/LoanApp_simplified_test.csv.gz
2020
# Flag to perform evaluation (if 'test_log_path' not provided) with a test partition of the input log

resources/config/configuration_example_with_provided_process_model.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ common:
1313
start_time: "start_time"
1414
end_time: "end_time"
1515
# Use this process model and skip its discovery
16-
model_path: ../models/LoanApp_simplified.bpmn
16+
process_model_path: ../models/LoanApp_simplified.bpmn
1717
# Whether to discover case attributes or not
1818
discover_case_attributes: false
1919
#################

resources/config/sample.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ version: 4
22
common:
33
train_log_path: ../event_logs/LoanApp_simplified_train.csv.gz
44
test_log_path: ../event_logs/LoanApp_simplified_test.csv.gz
5-
# model_path: resources/models/LoanApp_simplified.bpmn # Uncomment to use this BPMN model as process model
5+
# process_model_path: resources/models/LoanApp_simplified.bpmn # Uncomment to use this BPMN model as process model
66
num_final_evaluations: 10 # Number of evaluations of the discovered BPS model.
77
evaluation_metrics: # Metrics to evaluate the discovered BPS model with.
88
- 3_gram_distance

src/simod/cli.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import json
12
from pathlib import Path
23
from typing import Optional
34

45
import click
6+
import yaml
57
from pix_framework.filesystem.file_manager import get_random_folder_id
68

79
from simod.event_log.event_log import EventLog
@@ -47,7 +49,35 @@
4749
help="Path to the event log file when using the --one-shot flag. "
4850
"Columns must be named 'case_id', 'activity', 'start_time', 'end_time', 'resource'.",
4951
)
50-
def main(configuration: Optional[Path], output: Optional[Path], one_shot: bool, event_log: Optional[Path]) -> Path:
52+
@click.option(
53+
"--schema-yaml",
54+
required=False,
55+
is_flag=True,
56+
help="Print the configuration YAML schema and exit.",
57+
)
58+
@click.option(
59+
"--schema-json",
60+
required=False,
61+
is_flag=True,
62+
help="Print the configuration JSON schema and exit.",
63+
)
64+
@click.version_option()
65+
def main(
66+
configuration: Optional[Path],
67+
output: Optional[Path],
68+
one_shot: bool,
69+
event_log: Optional[Path],
70+
schema_yaml: bool,
71+
schema_json: bool,
72+
) -> None:
73+
if schema_yaml:
74+
print(yaml.dump(SimodSettings().model_json_schema()))
75+
return
76+
77+
if schema_json:
78+
print(json.dumps(SimodSettings().model_json_schema()))
79+
return
80+
5181
if one_shot:
5282
settings = SimodSettings.one_shot()
5383
settings.common.train_log_path = event_log
@@ -70,8 +100,6 @@ def main(configuration: Optional[Path], output: Optional[Path], one_shot: bool,
70100
simod = Simod(settings, event_log=event_log, output_dir=output)
71101
simod.run()
72102

73-
return output
74-
75103

76104
if __name__ == "__main__":
77105
main()

src/simod/control_flow/optimizer.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,9 @@ def run(self) -> HyperoptIterationParams:
160160
# Process best results
161161
results = pd.DataFrame(self._bayes_trials.results).sort_values("loss")
162162
best_result = results[results.status == STATUS_OK].iloc[0]
163-
assert best_result["model_path"].exists(), f"Best model path {best_result['model_path']} does not exist"
163+
assert best_result[
164+
"process_model_path"
165+
].exists(), f"Best model path {best_result['process_model_path']} does not exist"
164166

165167
# Re-build parameters of the best hyperopt iteration
166168
best_hyperopt_parameters = HyperoptIterationParams.from_hyperopt_dict(
@@ -177,7 +179,7 @@ def run(self) -> HyperoptIterationParams:
177179
# Update best process model (save it in base directory)
178180
self.best_bps_model.process_model = get_process_model_path(self.base_directory, self.event_log.process_name)
179181
best_model_path = (
180-
best_result["model_path"] if self._need_to_discover_model else self.initial_bps_model.process_model
182+
best_result["process_model_path"] if self._need_to_discover_model else self.initial_bps_model.process_model
181183
)
182184
shutil.copyfile(best_model_path, self.best_bps_model.process_model)
183185
# Update simulation parameters (save them in base directory)
@@ -245,7 +247,7 @@ def cleanup(self):
245247

246248
@staticmethod
247249
def _define_response(
248-
status: str, evaluation_measurements: list, output_dir: Path, model_path: Path
250+
status: str, evaluation_measurements: list, output_dir: Path, process_model_path: Path
249251
) -> Tuple[str, dict]:
250252
# Compute mean distance if status is OK
251253
if status is STATUS_OK:
@@ -260,7 +262,7 @@ def _define_response(
260262
"loss": distance, # Loss value for the fmin function
261263
"status": status, # Status of the optimization iteration
262264
"output_dir": output_dir,
263-
"model_path": model_path,
265+
"process_model_path": process_model_path,
264266
}
265267
# Return updated status and processed response
266268
return status, response
@@ -309,7 +311,7 @@ def _simulate_bps_model(self, bps_model: BPSModel, output_dir: Path) -> List[dic
309311
json_parameters_path = bps_model.to_json(output_dir, self.event_log.process_name)
310312

311313
evaluation_measures = simulate_and_evaluate(
312-
model_path=bps_model.process_model,
314+
process_model_path=bps_model.process_model,
313315
parameters_path=json_parameters_path,
314316
output_dir=output_dir,
315317
simulation_cases=self.event_log.validation_partition[self.event_log.log_ids.case].nunique(),

src/simod/resource_model/optimizer.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from pix_framework.discovery.resource_profiles import discover_pool_resource_profiles
1616
from pix_framework.filesystem.file_manager import create_folder, get_random_folder_id, remove_asset
1717

18+
from .repair import repair_with_missing_activities
19+
from .settings import HyperoptIterationParams
1820
from ..batching.discovery import discover_batching_rules
1921
from ..cli_formatter import print_message, print_step, print_subsection
2022
from ..event_log.event_log import EventLog
@@ -23,8 +25,6 @@
2325
from ..simulation.parameters.BPS_model import BPSModel
2426
from ..simulation.prosimos import simulate_and_evaluate
2527
from ..utilities import get_process_model_path, get_simulation_parameters_path, hyperopt_step
26-
from .repair import repair_with_missing_activities
27-
from .settings import HyperoptIterationParams
2828

2929

3030
class ResourceModelOptimizer:
@@ -118,7 +118,7 @@ def _hyperopt_iteration(self, hyperopt_iteration_dict: dict):
118118
optimization_metric=self.settings.optimization_metric,
119119
discovery_type=self.settings.discovery_type,
120120
output_dir=output_dir,
121-
model_path=current_bps_model.process_model,
121+
process_model_path=current_bps_model.process_model,
122122
project_name=self.event_log.process_name,
123123
)
124124
print_message(f"Parameters: {hyperopt_iteration_params}")
@@ -194,14 +194,14 @@ def run(self) -> HyperoptIterationParams:
194194
discovery_type=self.settings.discovery_type,
195195
output_dir=best_result["output_dir"],
196196
project_name=self.event_log.process_name,
197-
model_path=self.initial_bps_model.process_model,
197+
process_model_path=self.initial_bps_model.process_model,
198198
)
199199

200200
# Instantiate best BPS model
201201
self.best_bps_model = self.initial_bps_model.deep_copy()
202202
# Update best process model (save it in base directory)
203203
self.best_bps_model.process_model = get_process_model_path(self.base_directory, self.event_log.process_name)
204-
shutil.copyfile(best_result["model_path"], self.best_bps_model.process_model)
204+
shutil.copyfile(best_result["process_model_path"], self.best_bps_model.process_model)
205205
# Update simulation parameters (save them in base directory)
206206
best_parameters_path = get_simulation_parameters_path(self.base_directory, self.event_log.process_name)
207207
shutil.copyfile(
@@ -314,7 +314,7 @@ def _process_measurements(self, params: HyperoptIterationParams, status: str, ev
314314

315315
@staticmethod
316316
def _define_response(
317-
status: str, evaluation_measurements: list, output_dir: Path, model_path: Path
317+
status: str, evaluation_measurements: list, output_dir: Path, process_model_path: Path
318318
) -> Tuple[str, dict]:
319319
# Compute mean distance if status is OK
320320
if status is STATUS_OK:
@@ -329,7 +329,7 @@ def _define_response(
329329
"loss": distance, # Loss value for the fmin function
330330
"status": status, # Status of the optimization iteration
331331
"output_dir": output_dir,
332-
"model_path": model_path,
332+
"process_model_path": process_model_path,
333333
}
334334
# Return updated status and processed response
335335
return status, response
@@ -340,7 +340,7 @@ def _simulate_bps_model(self, bps_model: BPSModel, output_dir: Path, granularity
340340
json_parameters_path = bps_model.to_json(output_dir, self.event_log.process_name, granule_size=granularity)
341341

342342
evaluation_measures = simulate_and_evaluate(
343-
model_path=bps_model.process_model,
343+
process_model_path=bps_model.process_model,
344344
parameters_path=json_parameters_path,
345345
output_dir=output_dir,
346346
simulation_cases=self.event_log.validation_partition[self.event_log.log_ids.case].nunique(),

src/simod/resource_model/settings.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class HyperoptIterationParams:
1616

1717
# General settings
1818
output_dir: Path # Directory where to output all the files of the current iteration
19-
model_path: Path # Path to BPMN model
19+
process_model_path: Path # Path to BPMN model
2020
project_name: str # Name of the project for file naming
2121

2222
optimization_metric: Metric # Metric to evaluate the candidate of this iteration
@@ -29,7 +29,7 @@ def to_dict(self) -> dict:
2929
# Save common params
3030
optimization_parameters = {
3131
"output_dir": str(self.output_dir),
32-
"model_path": str(self.model_path),
32+
"process_model_path": str(self.process_model_path),
3333
"project_name": str(self.project_name),
3434
"optimization_metric": str(self.optimization_metric),
3535
"discover_prioritization_rules": str(self.discover_prioritization_rules),
@@ -44,7 +44,7 @@ def from_hyperopt_dict(
4444
optimization_metric: Metric,
4545
discovery_type: CalendarType,
4646
output_dir: Path,
47-
model_path: Path,
47+
process_model_path: Path,
4848
project_name: str,
4949
) -> "HyperoptIterationParams":
5050
"""Create the params for this run from the hyperopt dictionary returned by the fmin function."""
@@ -78,7 +78,7 @@ def safe_granularity(granularity: int) -> int:
7878

7979
return HyperoptIterationParams(
8080
output_dir=output_dir,
81-
model_path=model_path,
81+
process_model_path=process_model_path,
8282
project_name=project_name,
8383
optimization_metric=optimization_metric,
8484
calendar_discovery_params=CalendarDiscoveryParameters(

src/simod/settings/common_settings.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
from dataclasses import dataclass, field
1+
from dataclasses import field
22
from enum import Enum
33
from pathlib import Path
44
from typing import Union, List, Optional
55

66
from pix_framework.io.event_log import EventLogIDs, PROSIMOS_LOG_IDS
7+
from pydantic import BaseModel
78

89
from ..utilities import get_project_dir
910

@@ -76,13 +77,12 @@ def __str__(self):
7677
return f"Unknown Metric {str(self)}"
7778

7879

79-
@dataclass
80-
class CommonSettings:
80+
class CommonSettings(BaseModel):
8181
# Log & Model parameters
8282
train_log_path: Path = Path("default_path.csv")
8383
log_ids: EventLogIDs = PROSIMOS_LOG_IDS
8484
test_log_path: Optional[Path] = None
85-
model_path: Optional[Path] = None
85+
process_model_path: Optional[Path] = None
8686
# Final evaluation parameters
8787
perform_final_evaluation: bool = False
8888
num_final_evaluations: int = 10
@@ -116,12 +116,12 @@ def from_dict(config: dict, config_dir: Optional[Path] = None) -> "CommonSetting
116116
test_log_path = None
117117

118118
# Process model path
119-
if "model_path" in config:
120-
model_path = Path(config["model_path"])
121-
if not model_path.is_absolute():
122-
model_path = base_files_dir / model_path
119+
if "process_model_path" in config:
120+
process_model_path = Path(config["process_model_path"])
121+
if not process_model_path.is_absolute():
122+
process_model_path = base_files_dir / process_model_path
123123
else:
124-
model_path = None
124+
process_model_path = None
125125

126126
# Flag to perform final evaluation (set to true if there is a test log)
127127
if test_log_path is not None:
@@ -165,7 +165,7 @@ def from_dict(config: dict, config_dir: Optional[Path] = None) -> "CommonSetting
165165
train_log_path=train_log_path,
166166
log_ids=log_ids,
167167
test_log_path=test_log_path,
168-
model_path=model_path,
168+
process_model_path=process_model_path,
169169
perform_final_evaluation=perform_final_evaluation,
170170
num_final_evaluations=num_final_evaluations,
171171
evaluation_metrics=metrics,
@@ -179,7 +179,7 @@ def to_dict(self) -> dict:
179179
"train_log_path": str(self.train_log_path),
180180
"test_log_path": str(self.test_log_path) if self.test_log_path is not None else None,
181181
"log_ids": self.log_ids.to_dict(),
182-
"model_path": str(self.model_path) if self.model_path is not None else None,
182+
"process_model_path": str(self.process_model_path) if self.process_model_path is not None else None,
183183
"num_final_evaluations": self.num_final_evaluations,
184184
"evaluation_metrics": [str(metric) for metric in self.evaluation_metrics],
185185
"use_observed_arrival_distribution": self.use_observed_arrival_distribution,

0 commit comments

Comments
 (0)