Skip to content

Commit 02afc5f

Browse files
Merge pull request #165 from AutomatedProcessImprovement/fix-discovered-bpmn-model
Translate self-loops in discovered bpmn model
2 parents d35a40a + e4c042d commit 02afc5f

File tree

9 files changed

+10872
-53
lines changed

9 files changed

+10872
-53
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "simod"
7-
version = "5.1.4"
7+
version = "5.1.5"
88
authors = [
99
"Ihar Suvorau <ihar.suvorau@gmail.com>",
1010
"David Chapela <david.chapela@ut.ee>",

src/simod/control_flow/discovery.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import uuid
12
from dataclasses import dataclass
23
from pathlib import Path
34
from typing import List, Tuple
45

6+
from lxml import etree
7+
58
from simod.cli_formatter import print_step
69
from simod.control_flow.settings import HyperoptIterationParams
710
from simod.settings.control_flow_settings import (
@@ -49,8 +52,82 @@ def discover_process_model(log_path: Path, output_model_path: Path, params: Hype
4952
discover_process_model_with_split_miner_v2(SplitMinerV2Settings(log_path, output_model_path, params.epsilon))
5053
else:
5154
raise ValueError(f"Unknown process model discovery algorithm: {params.mining_algorithm}")
52-
55+
# Assert that model file was created
5356
assert output_model_path.exists(), f"Error trying to discover the process model in '{output_model_path}'."
57+
# Post-process to transform implicit activity self-loops into explicit (modeled through gateways)
58+
print(f"Post-processing discovered process model to explicitly model self-loops through gateways.")
59+
post_process_bpmn_self_loops(output_model_path)
60+
61+
62+
def _generate_node_id():
63+
return f"node_{uuid.uuid4()}"
64+
65+
66+
def post_process_bpmn_self_loops(bpmn_model_path: Path):
67+
tree = etree.parse(bpmn_model_path)
68+
root = tree.getroot()
69+
nsmap = root.nsmap
70+
71+
bpmn_namespace = nsmap.get(None, "http://www.omg.org/spec/BPMN/20100524/MODEL")
72+
ns = {"bpmn": bpmn_namespace}
73+
74+
tasks = root.findall(".//bpmn:task", namespaces=ns)
75+
sequence_flows = root.findall(".//bpmn:sequenceFlow", namespaces=ns)
76+
process = root.find(".//bpmn:process", namespaces=ns)
77+
78+
for task in tasks:
79+
loop_characteristics = task.find("bpmn:standardLoopCharacteristics", namespaces=ns)
80+
if loop_characteristics is not None:
81+
# Task with self-loop
82+
task_id = task.get("id")
83+
# Remove loop characteristics
84+
task.remove(loop_characteristics)
85+
# Generate unique IDs
86+
gt1_id = _generate_node_id()
87+
gt2_id = _generate_node_id()
88+
sf1_id = _generate_node_id()
89+
sf2_id = _generate_node_id()
90+
sf3_id = _generate_node_id()
91+
# Create exclusive gateways with attributes
92+
gt1 = etree.Element("{%s}exclusiveGateway" % bpmn_namespace, id=gt1_id, gatewayDirection="Converging")
93+
gt2 = etree.Element("{%s}exclusiveGateway" % bpmn_namespace, id=gt2_id, gatewayDirection="Diverging")
94+
process.append(gt1)
95+
process.append(gt2)
96+
# Modify existing sequence flows
97+
incoming_gt1_1, outgoing_gt2_1 = None, None
98+
for sf in sequence_flows:
99+
if sf.get("targetRef") == task_id:
100+
sf.set("targetRef", gt1_id)
101+
incoming_gt1_1 = etree.Element("{%s}incoming" % bpmn_namespace)
102+
incoming_gt1_1.text = sf.get("id")
103+
if sf.get("sourceRef") == task_id:
104+
sf.set("sourceRef", gt2_id)
105+
outgoing_gt2_1 = etree.Element("{%s}outgoing" % bpmn_namespace)
106+
outgoing_gt2_1.text = sf.get("id")
107+
# Create new sequence flows
108+
sf1 = etree.Element("{%s}sequenceFlow" % bpmn_namespace, id=sf1_id, sourceRef=gt1_id, targetRef=task_id)
109+
process.append(sf1)
110+
sf2 = etree.Element("{%s}sequenceFlow" % bpmn_namespace, id=sf2_id, sourceRef=task_id, targetRef=gt2_id)
111+
process.append(sf2)
112+
sf3 = etree.Element("{%s}sequenceFlow" % bpmn_namespace, id=sf3_id, sourceRef=gt2_id, targetRef=gt1_id)
113+
process.append(sf3)
114+
# Add incoming and outgoing elements for gateways
115+
outgoing_gt1_1 = etree.Element("{%s}outgoing" % bpmn_namespace)
116+
outgoing_gt1_1.text = sf1_id
117+
incoming_gt1_2 = etree.Element("{%s}incoming" % bpmn_namespace)
118+
incoming_gt1_2.text = sf3_id
119+
incoming_gt2_1 = etree.Element("{%s}incoming" % bpmn_namespace)
120+
incoming_gt2_1.text = sf2_id
121+
outgoing_gt2_2 = etree.Element("{%s}outgoing" % bpmn_namespace)
122+
outgoing_gt2_2.text = sf3_id
123+
gt1.append(incoming_gt1_1)
124+
gt1.append(incoming_gt1_2)
125+
gt1.append(outgoing_gt1_1)
126+
gt2.append(incoming_gt2_1)
127+
gt2.append(outgoing_gt2_1)
128+
gt2.append(outgoing_gt2_2)
129+
# Write to file
130+
tree.write(bpmn_model_path, xml_declaration=True, encoding="UTF-8", pretty_print=True)
54131

55132

56133
def add_bpmn_diagram_to_model(bpmn_model_path: Path):

src/simod/control_flow/optimizer.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,10 @@ def __init__(self, event_log: EventLog, bps_model: BPSModel, settings: ControlFl
9191
# Not provided, create path to best discovered model
9292
self._need_to_discover_model = True
9393
# Export training log (XES format) for SplitMiner
94-
self._xes_train_log_path = self.base_directory / (self.event_log.process_name + ".xes")
95-
self.event_log.train_to_xes(self._xes_train_log_path)
94+
self._xes_train_both_timestamps_log_path = self.base_directory / (self.event_log.process_name + ".xes")
95+
self.event_log.train_to_xes(self._xes_train_both_timestamps_log_path)
96+
self._xes_train_only_end_log_path = self.base_directory / (self.event_log.process_name + "_only_end.xes")
97+
self.event_log.train_to_xes(self._xes_train_only_end_log_path, only_complete_events=True)
9698
else:
9799
# Process model provided
98100
self._need_to_discover_model = False
@@ -360,7 +362,10 @@ def _process_measurements(self, params: HyperoptIterationParams, status, evaluat
360362
def _discover_process_model(self, params: HyperoptIterationParams) -> Path:
361363
print_step(f"Discovering Process Model with {params.mining_algorithm.value}")
362364
output_model_path = get_process_model_path(params.output_dir, self.event_log.process_name)
363-
discover_process_model(self._xes_train_log_path, output_model_path, params)
365+
if params.mining_algorithm is ProcessModelDiscoveryAlgorithm.SPLIT_MINER_V1:
366+
discover_process_model(self._xes_train_only_end_log_path, output_model_path, params)
367+
else:
368+
discover_process_model(self._xes_train_both_timestamps_log_path, output_model_path, params)
364369
return output_model_path
365370

366371
def _discover_branch_rules(self, process_model: Path, params: HyperoptIterationParams) -> List[BranchRules]:

src/simod/event_log/event_log.py

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
from typing import Optional
33

44
import pandas as pd
5+
import pendulum
6+
from openxes_cli.lib import csv_to_xes
57
from pix_framework.io.event_log import DEFAULT_XES_IDS, EventLogIDs, read_csv_log
68
from pix_framework.io.event_log import split_log_training_validation_trace_wise as split_log
79

810
from .preprocessor import Preprocessor
9-
from .utilities import convert_df_to_xes
1011
from ..settings.preprocessing_settings import PreprocessingSettings
1112
from ..utilities import get_process_name_from_log_path
1213

@@ -150,79 +151,99 @@ def from_path(
150151
process_name=get_process_name_from_log_path(train_log_path) if process_name is None else process_name,
151152
)
152153

153-
def train_to_xes(self, path: Path):
154+
def train_to_xes(self, path: Path, only_complete_events: bool = False):
154155
"""
155156
Saves the training log to an XES file.
156157
157158
Parameters
158159
----------
159160
path : :class:`pathlib.Path`
160161
Destination path for the XES file.
162+
only_complete_events : bool
163+
If true, generate XES file containing only events corresponding to
164+
the end of each activity instance.
161165
"""
162-
write_xes(self.train_partition, self.log_ids, path)
166+
write_xes(self.train_partition, self.log_ids, path, only_complete_events=only_complete_events)
163167

164-
def validation_to_xes(self, path: Path):
168+
def validation_to_xes(self, path: Path, only_complete_events: bool = False):
165169
"""
166170
Saves the validation log to an XES file.
167171
168172
Parameters
169173
----------
170174
path : :class:`pathlib.Path`
171175
Destination path for the XES file.
176+
only_complete_events : bool
177+
If true, generate XES file containing only events corresponding to
178+
the end of each activity instance.
172179
"""
173-
write_xes(self.validation_partition, self.log_ids, path)
180+
write_xes(self.validation_partition, self.log_ids, path, only_complete_events=only_complete_events)
174181

175-
def train_validation_to_xes(self, path: Path):
182+
def train_validation_to_xes(self, path: Path, only_complete_events: bool = False):
176183
"""
177184
Saves the combined training and validation log to an XES file.
178185
179186
Parameters
180187
----------
181188
path : :class:`pathlib.Path`
182189
Destination path for the XES file.
190+
only_complete_events : bool
191+
If true, generate XES file containing only events corresponding to
192+
the end of each activity instance.
183193
"""
184-
write_xes(self.train_validation_partition, self.log_ids, path)
194+
write_xes(self.train_validation_partition, self.log_ids, path, only_complete_events=only_complete_events)
185195

186-
def test_to_xes(self, path: Path):
196+
def test_to_xes(self, path: Path, only_complete_events: bool = False):
187197
"""
188198
Saves the test log to an XES file.
189199
190200
Parameters
191201
----------
192202
path : :class:`pathlib.Path`
193203
Destination path for the XES file.
204+
only_complete_events : bool
205+
If true, generate XES file containing only events corresponding to
206+
the end of each activity instance.
194207
"""
195-
write_xes(self.test_partition, self.log_ids, path)
208+
write_xes(self.test_partition, self.log_ids, path, only_complete_events=only_complete_events)
196209

197210

198211
def write_xes(
199-
log: pd.DataFrame,
212+
event_log: pd.DataFrame,
200213
log_ids: EventLogIDs,
201214
output_path: Path,
215+
only_complete_events: bool = False,
202216
):
203217
"""
204218
Writes the log to a file in XES format.
205219
"""
206-
df = log.rename(
220+
# Copy event log to modify
221+
df = event_log.copy()
222+
# Transform timestamps to expected format
223+
xes_datetime_format = "YYYY-MM-DDTHH:mm:ss.SSSZ"
224+
# Start time
225+
if only_complete_events:
226+
df[log_ids.start_time] = ""
227+
else:
228+
df[log_ids.start_time] = df[log_ids.start_time].apply(
229+
lambda x: pendulum.parse(x.isoformat()).format(xes_datetime_format)
230+
)
231+
# End time
232+
df[log_ids.end_time] = df[log_ids.end_time].apply(
233+
lambda x: pendulum.parse(x.isoformat()).format(xes_datetime_format)
234+
)
235+
# Rename columns to XES expected
236+
df = df.rename(
207237
columns={
208238
log_ids.activity: "concept:name",
209239
log_ids.case: "case:concept:name",
210240
log_ids.resource: "org:resource",
211241
log_ids.start_time: "start_timestamp",
212242
log_ids.end_time: "time:timestamp",
213243
}
214-
)
215-
216-
df = df[
217-
[
218-
"case:concept:name",
219-
"concept:name",
220-
"org:resource",
221-
"start_timestamp",
222-
"time:timestamp",
223-
]
224-
]
225-
244+
)[["case:concept:name", "concept:name", "org:resource", "start_timestamp", "time:timestamp", ]]
245+
# Fill null values
226246
df.fillna("UNDEFINED", inplace=True)
227-
228-
convert_df_to_xes(df, DEFAULT_XES_IDS, output_path)
247+
# Write and convert
248+
df.to_csv(output_path, index=False)
249+
csv_to_xes(output_path, output_path)

src/simod/event_log/utilities.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

src/simod/settings/common_settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def _from_str(cls, value: str) -> "Metric":
8686
return cls.THREE_GRAM_DISTANCE
8787
elif value.lower() in ["circadian_event_distribution", "circadian_emd"]:
8888
return cls.CIRCADIAN_EMD
89-
elif value.lower() in ["circadian_workforce_distribution", "workforce_emd", "workforce_distribution"]:
89+
elif value.lower() in ["circadian_workforce_distribution", "workforce_emd", "circadian_workforce"]:
9090
return cls.CIRCADIAN_WORKFORCE_EMD
9191
elif value.lower() in ["arrival_event_distribution", "arrival_emd"]:
9292
return cls.ARRIVAL_EMD

0 commit comments

Comments
 (0)