Skip to content

Commit 686ad64

Browse files
Fix timeseries dataset integration into workflows (#2)
1 parent b1c54a3 commit 686ad64

File tree

4 files changed

+28
-10
lines changed

4 files changed

+28
-10
lines changed

.vscode/settings.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
{
22
"editor.formatOnSave": true,
3-
"editor.rulers": [120],
3+
"editor.rulers": [
4+
120
5+
],
46
"[python]": {
57
"editor.defaultFormatter": "charliermarsh.ruff",
68
"editor.formatOnSave": true,

CHANGELOG.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.37.1] - 2025-06-10
11+
1012
### Fixed
1113

14+
- `tilebox-datasets`: Fixed a bug in `TimeseriesDatasetChunk.from_message` relying on incorrect bool assumptions about
15+
missing protobuf fields.
1216
- `tilebox-grpc`: More robust parsing of GRPC channel URLs.
17+
- `tilebox-workflows`: Fixed a bug in the timeseries interceptor that resulted in an error when accessing a collection.
1318

1419
## [0.37.0] - 2025-06-06
1520

@@ -185,7 +190,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
185190
- Released packages: `tilebox-datasets`, `tilebox-workflows`, `tilebox-storage`, `tilebox-grpc`
186191

187192

188-
[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.37.0...HEAD
193+
[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.37.1...HEAD
194+
[0.37.1]: https://github.com/tilebox/tilebox-python/compare/v0.37.0...v0.37.1
189195
[0.37.0]: https://github.com/tilebox/tilebox-python/compare/v0.36.1...v0.37.0
190196
[0.36.1]: https://github.com/tilebox/tilebox-python/compare/v0.36.0...v0.36.1
191197
[0.36.0]: https://github.com/tilebox/tilebox-python/compare/v0.35.0...v0.36.0

tilebox-datasets/tilebox/datasets/data/timeseries.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,13 @@ class TimeseriesDatasetChunk:
2121
@classmethod
2222
def from_message(cls, chunk: timeseries_pb2.TimeseriesDatasetChunk) -> "TimeseriesDatasetChunk":
2323
datapoint_interval = None
24-
if chunk.datapoint_interval and chunk.datapoint_interval.start_id and chunk.datapoint_interval.end_id:
24+
if (
25+
chunk.datapoint_interval
26+
and chunk.datapoint_interval.start_id
27+
and chunk.datapoint_interval.end_id
28+
and chunk.datapoint_interval.start_id.uuid
29+
and chunk.datapoint_interval.end_id.uuid
30+
):
2531
datapoint_interval = DatapointInterval.from_message(chunk.datapoint_interval)
2632

2733
time_interval = None

tilebox-workflows/tilebox/workflows/timeseries.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,20 @@
2222

2323

2424
@execution_interceptor
25-
def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context: ExecutionContext) -> None:
25+
def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context: ExecutionContext) -> None: # noqa: C901
2626
if not isinstance(task, TimeseriesTask):
2727
raise TypeError("Task is not a timeseries task. Inherit from TimeseriesTask to mark it as such.")
2828

2929
chunk: TimeseriesDatasetChunk = task.timeseries_data # type: ignore[attr-defined]
3030

31-
# let's get the collection object
32-
dataset = context.runner_context.datasets_client._dataset_by_id(str(chunk.dataset_id)) # type: ignore[attr-defined] # noqa: SLF001
33-
collection = dataset.collection("unknown") # dummy collection, we will inject the right id below:
31+
# let's get a collection client
32+
datasets_client = context.runner_context.datasets_client
33+
dataset = datasets_client._dataset_by_id(str(chunk.dataset_id)) # type: ignore[attr-defined] # noqa: SLF001
3434
# we already know the collection id, so we can skip the lookup (we don't know the name, but don't need it)
35-
collection._info = CollectionInfo(Collection(chunk.collection_id, "unknown"), None, None) # noqa: SLF001
35+
collection_info = CollectionInfo(Collection(chunk.collection_id, "unknown"), None, None)
36+
collection = CollectionClient(dataset, collection_info)
3637

37-
# leaf case: we are already executing a specific batch of datapoints fitting in the chunk size, so let's load them and process them
38+
# leaf case: we are already executing a specific batch of datapoints fitting in the chunk size, so let's load them
3839
if chunk.datapoint_interval:
3940
datapoint_interval = (chunk.datapoint_interval.start_id, chunk.datapoint_interval.end_id)
4041
# we already are a leaf task executing for a specific datapoint interval:
@@ -44,6 +45,9 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:
4445
skip_data=False,
4546
show_progress=False,
4647
)
48+
if not datapoints:
49+
return # no datapoints in the interval -> we are done
50+
4751
for i in range(datapoints.sizes["time"]):
4852
datapoint = datapoints.isel(time=i)
4953
call_next(context, datapoint) # type: ignore[call-arg]
@@ -88,7 +92,7 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:
8892

8993
subtasks = [replace(task, timeseries_data=sub_chunk) for sub_chunk in sub_chunks] # type: ignore[misc]
9094
if len(subtasks) > 0:
91-
context.submit_batch(subtasks)
95+
context.submit_subtasks(subtasks)
9296

9397
return
9498

0 commit comments

Comments
 (0)