|
2 | 2 | from typing import Optional |
3 | 3 |
|
4 | 4 | import pandas as pd |
| 5 | +import pendulum |
| 6 | +from openxes_cli.lib import csv_to_xes |
5 | 7 | from pix_framework.io.event_log import DEFAULT_XES_IDS, EventLogIDs, read_csv_log |
6 | 8 | from pix_framework.io.event_log import split_log_training_validation_trace_wise as split_log |
7 | 9 |
|
8 | 10 | from .preprocessor import Preprocessor |
9 | | -from .utilities import convert_df_to_xes |
10 | 11 | from ..settings.preprocessing_settings import PreprocessingSettings |
11 | 12 | from ..utilities import get_process_name_from_log_path |
12 | 13 |
|
@@ -150,79 +151,99 @@ def from_path( |
150 | 151 | process_name=get_process_name_from_log_path(train_log_path) if process_name is None else process_name, |
151 | 152 | ) |
152 | 153 |
|
153 | | - def train_to_xes(self, path: Path): |
| 154 | + def train_to_xes(self, path: Path, only_complete_events: bool = False): |
154 | 155 | """ |
155 | 156 | Saves the training log to an XES file. |
156 | 157 |
|
157 | 158 | Parameters |
158 | 159 | ---------- |
159 | 160 | path : :class:`pathlib.Path` |
160 | 161 | Destination path for the XES file. |
| 162 | + only_complete_events : bool |
| 163 | + If true, generate XES file containing only events corresponding to |
| 164 | + the end of each activity instance. |
161 | 165 | """ |
162 | | - write_xes(self.train_partition, self.log_ids, path) |
| 166 | + write_xes(self.train_partition, self.log_ids, path, only_complete_events=only_complete_events) |
163 | 167 |
|
164 | | - def validation_to_xes(self, path: Path): |
| 168 | + def validation_to_xes(self, path: Path, only_complete_events: bool = False): |
165 | 169 | """ |
166 | 170 | Saves the validation log to an XES file. |
167 | 171 |
|
168 | 172 | Parameters |
169 | 173 | ---------- |
170 | 174 | path : :class:`pathlib.Path` |
171 | 175 | Destination path for the XES file. |
| 176 | + only_complete_events : bool |
| 177 | + If true, generate XES file containing only events corresponding to |
| 178 | + the end of each activity instance. |
172 | 179 | """ |
173 | | - write_xes(self.validation_partition, self.log_ids, path) |
| 180 | + write_xes(self.validation_partition, self.log_ids, path, only_complete_events=only_complete_events) |
174 | 181 |
|
175 | | - def train_validation_to_xes(self, path: Path): |
| 182 | + def train_validation_to_xes(self, path: Path, only_complete_events: bool = False): |
176 | 183 | """ |
177 | 184 | Saves the combined training and validation log to an XES file. |
178 | 185 |
|
179 | 186 | Parameters |
180 | 187 | ---------- |
181 | 188 | path : :class:`pathlib.Path` |
182 | 189 | Destination path for the XES file. |
| 190 | + only_complete_events : bool |
| 191 | + If true, generate XES file containing only events corresponding to |
| 192 | + the end of each activity instance. |
183 | 193 | """ |
184 | | - write_xes(self.train_validation_partition, self.log_ids, path) |
| 194 | + write_xes(self.train_validation_partition, self.log_ids, path, only_complete_events=only_complete_events) |
185 | 195 |
|
186 | | - def test_to_xes(self, path: Path): |
| 196 | + def test_to_xes(self, path: Path, only_complete_events: bool = False): |
187 | 197 | """ |
188 | 198 | Saves the test log to an XES file. |
189 | 199 |
|
190 | 200 | Parameters |
191 | 201 | ---------- |
192 | 202 | path : :class:`pathlib.Path` |
193 | 203 | Destination path for the XES file. |
| 204 | + only_complete_events : bool |
| 205 | + If true, generate XES file containing only events corresponding to |
| 206 | + the end of each activity instance. |
194 | 207 | """ |
195 | | - write_xes(self.test_partition, self.log_ids, path) |
| 208 | + write_xes(self.test_partition, self.log_ids, path, only_complete_events=only_complete_events) |
196 | 209 |
|
197 | 210 |
|
198 | 211 | def write_xes( |
199 | | - log: pd.DataFrame, |
| 212 | + event_log: pd.DataFrame, |
200 | 213 | log_ids: EventLogIDs, |
201 | 214 | output_path: Path, |
| 215 | + only_complete_events: bool = False, |
202 | 216 | ): |
203 | 217 | """ |
204 | 218 | Writes the log to a file in XES format. |
205 | 219 | """ |
206 | | - df = log.rename( |
| 220 | + # Copy event log to modify |
| 221 | + df = event_log.copy() |
| 222 | + # Transform timestamps to expected format |
| 223 | + xes_datetime_format = "YYYY-MM-DDTHH:mm:ss.SSSZ" |
| 224 | + # Start time |
| 225 | + if only_complete_events: |
| 226 | + df[log_ids.start_time] = "" |
| 227 | + else: |
| 228 | + df[log_ids.start_time] = df[log_ids.start_time].apply( |
| 229 | + lambda x: pendulum.parse(x.isoformat()).format(xes_datetime_format) |
| 230 | + ) |
| 231 | + # End time |
| 232 | + df[log_ids.end_time] = df[log_ids.end_time].apply( |
| 233 | + lambda x: pendulum.parse(x.isoformat()).format(xes_datetime_format) |
| 234 | + ) |
| 235 | + # Rename columns to XES expected |
| 236 | + df = df.rename( |
207 | 237 | columns={ |
208 | 238 | log_ids.activity: "concept:name", |
209 | 239 | log_ids.case: "case:concept:name", |
210 | 240 | log_ids.resource: "org:resource", |
211 | 241 | log_ids.start_time: "start_timestamp", |
212 | 242 | log_ids.end_time: "time:timestamp", |
213 | 243 | } |
214 | | - ) |
215 | | - |
216 | | - df = df[ |
217 | | - [ |
218 | | - "case:concept:name", |
219 | | - "concept:name", |
220 | | - "org:resource", |
221 | | - "start_timestamp", |
222 | | - "time:timestamp", |
223 | | - ] |
224 | | - ] |
225 | | - |
| 244 | + )[["case:concept:name", "concept:name", "org:resource", "start_timestamp", "time:timestamp", ]] |
| 245 | + # Fill null values |
226 | 246 | df.fillna("UNDEFINED", inplace=True) |
227 | | - |
228 | | - convert_df_to_xes(df, DEFAULT_XES_IDS, output_path) |
| 247 | + # Write and convert |
| 248 | + df.to_csv(output_path, index=False) |
| 249 | + csv_to_xes(output_path, output_path) |
0 commit comments