From b3a16dabd0fd490b73ee5695e4427ce132a5c183 Mon Sep 17 00:00:00 2001 From: Corentin Musard Date: Wed, 3 Dec 2025 15:18:50 +0100 Subject: [PATCH 1/2] Add create_dataset method --- CHANGELOG.md | 4 + tilebox-datasets/tests/data/datasets.py | 49 ++++++- tilebox-datasets/tests/data/test_datasets.py | 30 +++- .../tilebox/datasets/aio/client.py | 17 +++ tilebox-datasets/tilebox/datasets/client.py | 28 +++- .../tilebox/datasets/data/datasets.py | 131 +++++++++++++++++- tilebox-datasets/tilebox/datasets/service.py | 102 +++++++++++++- .../tilebox/datasets/sync/client.py | 17 +++ 8 files changed, 366 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5358214..d77a9b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- `tilebox-datasets`: Added `create_dataset` method to `Client` to create a new dataset. + ## [0.45.0] - 2025-11-17 ### Added diff --git a/tilebox-datasets/tests/data/datasets.py b/tilebox-datasets/tests/data/datasets.py index dd271fb..2b9588b 100644 --- a/tilebox-datasets/tests/data/datasets.py +++ b/tilebox-datasets/tests/data/datasets.py @@ -2,21 +2,32 @@ from dataclasses import replace from functools import lru_cache -from google.protobuf.descriptor_pb2 import FileDescriptorProto, FileDescriptorSet +from google.protobuf.descriptor_pb2 import FieldDescriptorProto, FileDescriptorProto, FileDescriptorSet from hypothesis.strategies import ( DrawFn, + booleans, composite, integers, just, lists, none, one_of, + sampled_from, text, uuids, ) from tests.example_dataset.example_dataset_pb2 import DESCRIPTOR_PROTO -from tilebox.datasets.data.datasets import AnnotatedType, Dataset, DatasetGroup, FieldAnnotation, ListDatasetsResponse +from tilebox.datasets.data.datasets import ( + AnnotatedType, + Dataset, + DatasetGroup, + DatasetKind, + DatasetType, + Field, + FieldAnnotation, + ListDatasetsResponse, +) from tilebox.datasets.message_pool import register_once @@ -28,6 +39,40 @@ def field_annotations(draw: DrawFn) -> FieldAnnotation: return FieldAnnotation(description, example_value) +@composite +def fields(draw: DrawFn) -> Field: + """A hypothesis strategy for generating random fields""" + name = draw(text(alphabet=string.ascii_lowercase + "_", min_size=3, max_size=25)) + field_type = draw( + one_of( + just(FieldDescriptorProto.Type.TYPE_STRING), + just(FieldDescriptorProto.Type.TYPE_BYTES), + just(FieldDescriptorProto.Type.TYPE_BOOL), + just(FieldDescriptorProto.Type.TYPE_INT64), + just(FieldDescriptorProto.Type.TYPE_UINT64), + just(FieldDescriptorProto.Type.TYPE_DOUBLE), + just(FieldDescriptorProto.Type.TYPE_MESSAGE), + ) + ) + type_name = f".datasets.v1.{name}" if field_type == FieldDescriptorProto.Type.TYPE_MESSAGE else None + label = draw( + one_of(just(FieldDescriptorProto.Label.LABEL_OPTIONAL), just(FieldDescriptorProto.Label.LABEL_REPEATED)) + ) + descriptor = FieldDescriptorProto(name=name, type=field_type, type_name=type_name, label=label) + + annotation = draw(field_annotations()) + queryable = draw(booleans()) + return Field(descriptor, annotation, queryable) + + +@composite +def dataset_types(draw: DrawFn) -> DatasetType: + """A hypothesis strategy for generating random dataset types""" + kind = draw(sampled_from(DatasetKind) | none()) + dataset_fields = draw(lists(fields(), min_size=1, max_size=5)) + return DatasetType(kind, dataset_fields) + + @lru_cache def example_dataset_type() -> AnnotatedType: descriptor = FileDescriptorProto.FromString(DESCRIPTOR_PROTO) diff --git a/tilebox-datasets/tests/data/test_datasets.py b/tilebox-datasets/tests/data/test_datasets.py index ac94f7b..05cdb9a 100644 --- a/tilebox-datasets/tests/data/test_datasets.py +++ b/tilebox-datasets/tests/data/test_datasets.py @@ -1,7 +1,23 @@ from hypothesis import given -from tests.data.datasets import annotated_types, dataset_groups, datasets, field_annotations, list_datasets_responses -from tilebox.datasets.data.datasets import AnnotatedType, Dataset, DatasetGroup, FieldAnnotation, ListDatasetsResponse +from tests.data.datasets import ( + annotated_types, + dataset_groups, + dataset_types, + datasets, + field_annotations, + fields, + list_datasets_responses, +) +from tilebox.datasets.data.datasets import ( + AnnotatedType, + Dataset, + DatasetGroup, + DatasetType, + Field, + FieldAnnotation, + ListDatasetsResponse, +) @given(field_annotations()) @@ -9,6 +25,16 @@ def test_field_annotations_to_message_and_back(annotation: FieldAnnotation) -> N assert FieldAnnotation.from_message(annotation.to_message()) == annotation +@given(fields()) +def test_fields_to_message_and_back(field: Field) -> None: + assert Field.from_message(field.to_message()) == field + + +@given(dataset_types()) +def test_dataset_types_to_message_and_back(dataset_type: DatasetType) -> None: + assert DatasetType.from_message(dataset_type.to_message()) == dataset_type + + @given(annotated_types()) def test_annotated_types_to_message_and_back(annotated_type: AnnotatedType) -> None: assert AnnotatedType.from_message(annotated_type.to_message()) == annotated_type diff --git a/tilebox-datasets/tilebox/datasets/aio/client.py b/tilebox-datasets/tilebox/datasets/aio/client.py index 1809293..1750aa0 100644 --- a/tilebox-datasets/tilebox/datasets/aio/client.py +++ b/tilebox-datasets/tilebox/datasets/aio/client.py @@ -5,6 +5,7 @@ from tilebox.datasets.aio.dataset import DatasetClient from tilebox.datasets.client import Client as BaseClient from tilebox.datasets.client import token_from_env +from tilebox.datasets.data.datasets import DatasetKind, FieldDict from tilebox.datasets.datasets.v1.collections_pb2_grpc import CollectionServiceStub from tilebox.datasets.datasets.v1.data_access_pb2_grpc import DataAccessServiceStub from tilebox.datasets.datasets.v1.data_ingestion_pb2_grpc import DataIngestionServiceStub @@ -32,6 +33,22 @@ def __init__(self, *, url: str = "https://api.tilebox.com", token: str | None = ) self._client = BaseClient(service) + async def create_dataset( + self, + kind: DatasetKind, + code_name: str, + fields: list[FieldDict], + *, + name: str | None = None, + summary: str | None = None, + ) -> DatasetClient: + if name is None: + name = code_name + if summary is None: + summary = "" + + return await self._client.create_dataset(kind, code_name, fields, name, summary, DatasetClient) + async def datasets(self) -> Group: return await self._client.datasets(DatasetClient) diff --git a/tilebox-datasets/tilebox/datasets/client.py b/tilebox-datasets/tilebox/datasets/client.py index a781ac1..176b2da 100644 --- a/tilebox-datasets/tilebox/datasets/client.py +++ b/tilebox-datasets/tilebox/datasets/client.py @@ -7,7 +7,7 @@ from promise import Promise from _tilebox.grpc.channel import parse_channel_info -from tilebox.datasets.data.datasets import Dataset, DatasetGroup, ListDatasetsResponse +from tilebox.datasets.data.datasets import Dataset, DatasetGroup, DatasetKind, FieldDict, ListDatasetsResponse from tilebox.datasets.group import Group from tilebox.datasets.message_pool import register_once from tilebox.datasets.service import TileboxDatasetService @@ -26,6 +26,27 @@ class Client: def __init__(self, service: TileboxDatasetService) -> None: self._service = service + def create_dataset( # noqa: PLR0913 + self, kind: DatasetKind, code_name: str, fields: list[FieldDict], name: str, summary: str, dataset_type: type[T] + ) -> Promise[T]: + """Create a new dataset. + + Args: + kind: The kind of the dataset. + code_name: The code name of the dataset. + fields: The fields of the dataset. + name: The name of the dataset. Defaults to the code name. + summary: A short summary of the dataset. Optional. + + Returns: + The created dataset. + """ + return ( + self._service.create_dataset(kind, code_name, fields, name, summary) + .then(_ensure_registered) + .then(lambda dataset: dataset_type(self._service, dataset)) + ) + def datasets(self, dataset_type: type[T]) -> Promise[Group]: """Fetch all available datasets.""" return ( @@ -40,11 +61,10 @@ def datasets(self, dataset_type: type[T]) -> Promise[Group]: ) def dataset(self, slug: str, dataset_type: type[T]) -> Promise[T]: - """ - Get a dataset by its slug, e.g. `open_data.copernicus.sentinel1_sar`. + """Get a dataset by its slug, e.g. `open_data.copernicus.sentinel1_sar`. Args: - slug: The slug of the dataset + slug: The slug of the dataset. Returns: The dataset if it exists. diff --git a/tilebox-datasets/tilebox/datasets/data/datasets.py b/tilebox-datasets/tilebox/datasets/data/datasets.py index 580f484..fdc1d6a 100644 --- a/tilebox-datasets/tilebox/datasets/data/datasets.py +++ b/tilebox-datasets/tilebox/datasets/data/datasets.py @@ -1,12 +1,29 @@ from dataclasses import dataclass +from datetime import datetime, timedelta +from enum import Enum +from typing import TypedDict, get_args, get_origin from uuid import UUID -from google.protobuf.descriptor_pb2 import FileDescriptorSet +import numpy as np +from google.protobuf import duration_pb2, timestamp_pb2 +from google.protobuf.descriptor_pb2 import FieldDescriptorProto, FileDescriptorSet +from shapely import Geometry +from typing_extensions import NotRequired, Required -from tilebox.datasets.datasets.v1 import core_pb2, dataset_type_pb2, datasets_pb2 +from tilebox.datasets.datasets.v1 import core_pb2, dataset_type_pb2, datasets_pb2, well_known_types_pb2 from tilebox.datasets.uuid import uuid_message_to_optional_uuid, uuid_message_to_uuid, uuid_to_uuid_message +class DatasetKind(Enum): + TEMPORAL = dataset_type_pb2.DATASET_KIND_TEMPORAL + """A dataset that contains a timestamp field.""" + SPATIOTEMPORAL = dataset_type_pb2.DATASET_KIND_SPATIOTEMPORAL + """A dataset that contains a timestamp field and a geometry field.""" + + +_dataset_kind_int_to_enum = {kind.value: kind for kind in DatasetKind} + + @dataclass(frozen=True) class FieldAnnotation: description: str @@ -20,6 +37,116 @@ def to_message(self) -> dataset_type_pb2.FieldAnnotation: return dataset_type_pb2.FieldAnnotation(description=self.description, example_value=self.example_value) +class FieldDict(TypedDict): + name: Required[str] + type: Required[ + type[str] + | type[list[str]] + | type[bytes] + | type[list[bytes]] + | type[bool] + | type[list[bool]] + | type[int] + | type[list[int]] + | type[np.uint64] + | type[list[np.uint64]] + | type[float] + | type[list[float]] + | type[timedelta] + | type[list[timedelta]] + | type[datetime] + | type[list[datetime]] + | type[UUID] + | type[list[UUID]] + | type[Geometry] + | type[list[Geometry]] + ] + description: NotRequired[str] + example_value: NotRequired[str] + + +_TYPE_INFO: dict[type, tuple[FieldDescriptorProto.Type.ValueType, str | None]] = { + str: (FieldDescriptorProto.TYPE_STRING, None), + bytes: (FieldDescriptorProto.TYPE_BYTES, None), + bool: (FieldDescriptorProto.TYPE_BOOL, None), + int: (FieldDescriptorProto.TYPE_INT64, None), + np.uint64: (FieldDescriptorProto.TYPE_UINT64, None), + float: (FieldDescriptorProto.TYPE_DOUBLE, None), + timedelta: (FieldDescriptorProto.TYPE_MESSAGE, f".{duration_pb2.Duration.DESCRIPTOR.full_name}"), + datetime: (FieldDescriptorProto.TYPE_MESSAGE, f".{timestamp_pb2.Timestamp.DESCRIPTOR.full_name}"), + UUID: (FieldDescriptorProto.TYPE_MESSAGE, f".{well_known_types_pb2.UUID.DESCRIPTOR.full_name}"), + Geometry: (FieldDescriptorProto.TYPE_MESSAGE, f".{well_known_types_pb2.Geometry.DESCRIPTOR.full_name}"), +} + + +@dataclass(frozen=True) +class Field: + descriptor: FieldDescriptorProto + annotation: FieldAnnotation + queryable: bool + + @classmethod + def from_message(cls, field: dataset_type_pb2.Field) -> "Field": + return cls( + descriptor=field.descriptor, + annotation=FieldAnnotation.from_message(field.annotation), + queryable=field.queryable, + ) + + @classmethod + def from_dict(cls, field: FieldDict) -> "Field": + origin = get_origin(field["type"]) + if origin is list: + label = FieldDescriptorProto.Label.LABEL_REPEATED + args = get_args(field["type"]) + inner_type = args[0] if args else field["type"] + else: + label = FieldDescriptorProto.Label.LABEL_OPTIONAL + inner_type = field["type"] + + (field_type, field_type_name) = _TYPE_INFO[inner_type] + + return cls( + descriptor=FieldDescriptorProto( + name=field["name"], + type=field_type, + type_name=field_type_name, + label=label, + ), + annotation=FieldAnnotation( + description=field.get("description", ""), + example_value=field.get("example_value", ""), + ), + queryable=False, + ) + + def to_message(self) -> dataset_type_pb2.Field: + return dataset_type_pb2.Field( + descriptor=self.descriptor, + annotation=self.annotation.to_message(), + queryable=self.queryable, + ) + + +@dataclass(frozen=True) +class DatasetType: + kind: DatasetKind | None + fields: list[Field] + + @classmethod + def from_message(cls, dataset_type: dataset_type_pb2.DatasetType) -> "DatasetType": + return cls( + kind=_dataset_kind_int_to_enum.get(dataset_type.kind, None), + fields=[Field.from_message(f) for f in dataset_type.fields], + ) + + def to_message(self) -> dataset_type_pb2.DatasetType: + return dataset_type_pb2.DatasetType( + kind=self.kind.value if self.kind else dataset_type_pb2.DATASET_KIND_UNSPECIFIED, + fields=[f.to_message() for f in self.fields], + ) + + @dataclass(frozen=True) class AnnotatedType: descriptor_set: FileDescriptorSet diff --git a/tilebox-datasets/tilebox/datasets/service.py b/tilebox-datasets/tilebox/datasets/service.py index 1224a65..332dc9e 100644 --- a/tilebox-datasets/tilebox/datasets/service.py +++ b/tilebox-datasets/tilebox/datasets/service.py @@ -3,12 +3,23 @@ from importlib.metadata import distributions from uuid import UUID +from google.protobuf import timestamp_pb2 +from google.protobuf.descriptor_pb2 import FieldDescriptorProto from promise import Promise from tilebox.datasets.data.collection import CollectionInfo from tilebox.datasets.data.data_access import QueryFilters from tilebox.datasets.data.datapoint import AnyMessage, IngestResponse, QueryResultPage -from tilebox.datasets.data.datasets import Dataset, ListDatasetsResponse +from tilebox.datasets.data.datasets import ( + Dataset, + DatasetKind, + DatasetType, + Field, + FieldAnnotation, + FieldDict, + ListDatasetsResponse, +) +from tilebox.datasets.datasets.v1 import well_known_types_pb2 from tilebox.datasets.datasets.v1.collections_pb2 import ( CreateCollectionRequest, DeleteCollectionRequest, @@ -20,7 +31,13 @@ from tilebox.datasets.datasets.v1.data_access_pb2_grpc import DataAccessServiceStub from tilebox.datasets.datasets.v1.data_ingestion_pb2 import DeleteRequest, IngestRequest from tilebox.datasets.datasets.v1.data_ingestion_pb2_grpc import DataIngestionServiceStub -from tilebox.datasets.datasets.v1.datasets_pb2 import ClientInfo, GetDatasetRequest, ListDatasetsRequest, Package +from tilebox.datasets.datasets.v1.datasets_pb2 import ( + ClientInfo, + CreateDatasetRequest, + GetDatasetRequest, + ListDatasetsRequest, + Package, +) from tilebox.datasets.datasets.v1.datasets_pb2_grpc import DatasetServiceStub from tilebox.datasets.query.pagination import Pagination from tilebox.datasets.tilebox.v1 import id_pb2 @@ -46,6 +63,25 @@ def __init__( self._data_access_service = data_access_service_stub self._data_ingestion_service = data_ingestion_service_stub + def create_dataset( + self, kind: DatasetKind, code_name: str, fields: list[FieldDict], name: str, summary: str + ) -> Promise[Dataset]: + """Create a new dataset. + + Args: + kind: The kind of the dataset. + code_name: The code name of the dataset. + fields: The fields of the dataset. + name: The name of the dataset. + summary: A short summary of the dataset. + + Returns: + The created dataset. + """ + dataset_type = DatasetType(kind, _REQUIRED_FIELDS_PER_DATASET_KIND[kind] + [Field.from_dict(f) for f in fields]) + req = CreateDatasetRequest(name=name, type=dataset_type.to_message(), summary=summary, code_name=code_name) + return Promise.resolve(self._dataset_service.CreateDataset(req)).then(Dataset.from_message) + def list_datasets(self) -> Promise[ListDatasetsResponse]: """List all datasets and dataset groups.""" return Promise.resolve( @@ -202,3 +238,65 @@ def _environment_info() -> str: return f"Google Colab using python {python_version}" return f"Unknown IPython using python {python_version}" + + +_time_field = Field( + descriptor=FieldDescriptorProto( + name="time", + label=FieldDescriptorProto.Label.LABEL_OPTIONAL, + type=FieldDescriptorProto.TYPE_MESSAGE, + type_name=f".{timestamp_pb2.Timestamp.DESCRIPTOR.full_name}", + ), + annotation=FieldAnnotation( + description="The timestamp associated with each data point.", + example_value="2022-10-17T14:35:28Z", + ), + queryable=False, +) + +_id_field = Field( + descriptor=FieldDescriptorProto( + name="id", + label=FieldDescriptorProto.Label.LABEL_OPTIONAL, + type=FieldDescriptorProto.TYPE_MESSAGE, + type_name=f".{well_known_types_pb2.UUID.DESCRIPTOR.full_name}", + ), + annotation=FieldAnnotation( + description="A universally unique identifier (UUID) that uniquely identifies each data point, automatically generated by Tilebox.", + example_value="4e8a2836-72f8-4ac2-a9e9-cbe3492ef60c", + ), + queryable=False, +) + +_ingestion_time_field = Field( + descriptor=FieldDescriptorProto( + name="ingestion_time", + label=FieldDescriptorProto.Label.LABEL_OPTIONAL, + type=FieldDescriptorProto.TYPE_MESSAGE, + type_name=f".{timestamp_pb2.Timestamp.DESCRIPTOR.full_name}", + ), + annotation=FieldAnnotation( + description="The time the data point was ingested into the Tilebox API, automatically generated by Tilebox.", + example_value="2022-10-17T14:35:28Z", + ), + queryable=False, +) + +_geometry_field = Field( + descriptor=FieldDescriptorProto( + name="geometry", + label=FieldDescriptorProto.Label.LABEL_OPTIONAL, + type=FieldDescriptorProto.TYPE_MESSAGE, + type_name=f".{well_known_types_pb2.Geometry.DESCRIPTOR.full_name}", + ), + annotation=FieldAnnotation( + description="The geometry associated with each data point.", + example_value="POLYGON ((112.345 -36.789, ...))", + ), + queryable=False, +) + +_REQUIRED_FIELDS_PER_DATASET_KIND: dict[DatasetKind, list[Field]] = { + DatasetKind.TEMPORAL: [_time_field, _id_field, _ingestion_time_field], + DatasetKind.SPATIOTEMPORAL: [_time_field, _id_field, _ingestion_time_field, _geometry_field], +} diff --git a/tilebox-datasets/tilebox/datasets/sync/client.py b/tilebox-datasets/tilebox/datasets/sync/client.py index 830f714..40ea767 100644 --- a/tilebox-datasets/tilebox/datasets/sync/client.py +++ b/tilebox-datasets/tilebox/datasets/sync/client.py @@ -4,6 +4,7 @@ from _tilebox.grpc.error import with_pythonic_errors from tilebox.datasets.client import Client as BaseClient from tilebox.datasets.client import token_from_env +from tilebox.datasets.data.datasets import DatasetKind, FieldDict from tilebox.datasets.datasets.v1.collections_pb2_grpc import CollectionServiceStub from tilebox.datasets.datasets.v1.data_access_pb2_grpc import DataAccessServiceStub from tilebox.datasets.datasets.v1.data_ingestion_pb2_grpc import DataIngestionServiceStub @@ -32,6 +33,22 @@ def __init__(self, *, url: str = "https://api.tilebox.com", token: str | None = ) self._client = BaseClient(service) + def create_dataset( + self, + kind: DatasetKind, + code_name: str, + fields: list[FieldDict], + *, + name: str | None = None, + summary: str | None = None, + ) -> DatasetClient: + if name is None: + name = code_name + if summary is None: + summary = "" + + return self._client.create_dataset(kind, code_name, fields, name, summary, DatasetClient).get() + def datasets(self) -> Group: return self._client.datasets(DatasetClient).get() From 91a2aa17d0a1a2d63b8aafe181ef667eca7c9dae Mon Sep 17 00:00:00 2001 From: Corentin Musard Date: Thu, 4 Dec 2025 10:48:03 +0100 Subject: [PATCH 2/2] move docstring to the public interface --- .../tilebox/datasets/aio/client.py | 29 ++++++++++++++++--- tilebox-datasets/tilebox/datasets/client.py | 22 -------------- .../tilebox/datasets/sync/client.py | 29 ++++++++++++++++--- 3 files changed, 50 insertions(+), 30 deletions(-) diff --git a/tilebox-datasets/tilebox/datasets/aio/client.py b/tilebox-datasets/tilebox/datasets/aio/client.py index 1750aa0..5fc2ba2 100644 --- a/tilebox-datasets/tilebox/datasets/aio/client.py +++ b/tilebox-datasets/tilebox/datasets/aio/client.py @@ -40,19 +40,40 @@ async def create_dataset( fields: list[FieldDict], *, name: str | None = None, - summary: str | None = None, + description: str | None = None, ) -> DatasetClient: + """Create a new dataset. + + Args: + kind: The kind of the dataset. + code_name: The code name of the dataset. + fields: The fields of the dataset. + name: The name of the dataset. Defaults to the code name. + description: A short description of the dataset. Optional. + + Returns: + The created dataset. + """ if name is None: name = code_name - if summary is None: - summary = "" + if description is None: + description = "" - return await self._client.create_dataset(kind, code_name, fields, name, summary, DatasetClient) + return await self._client.create_dataset(kind, code_name, fields, name, description, DatasetClient) async def datasets(self) -> Group: + """Fetch all available datasets.""" return await self._client.datasets(DatasetClient) async def dataset(self, slug: str) -> DatasetClient: + """Get a dataset by its slug, e.g. `open_data.copernicus.sentinel1_sar`. + + Args: + slug: The slug of the dataset. + + Returns: + The dataset if it exists. + """ return await self._client.dataset(slug, DatasetClient) async def _dataset_by_id(self, dataset_id: str | UUID) -> DatasetClient: diff --git a/tilebox-datasets/tilebox/datasets/client.py b/tilebox-datasets/tilebox/datasets/client.py index 176b2da..eb1dd6e 100644 --- a/tilebox-datasets/tilebox/datasets/client.py +++ b/tilebox-datasets/tilebox/datasets/client.py @@ -29,18 +29,6 @@ def __init__(self, service: TileboxDatasetService) -> None: def create_dataset( # noqa: PLR0913 self, kind: DatasetKind, code_name: str, fields: list[FieldDict], name: str, summary: str, dataset_type: type[T] ) -> Promise[T]: - """Create a new dataset. - - Args: - kind: The kind of the dataset. - code_name: The code name of the dataset. - fields: The fields of the dataset. - name: The name of the dataset. Defaults to the code name. - summary: A short summary of the dataset. Optional. - - Returns: - The created dataset. - """ return ( self._service.create_dataset(kind, code_name, fields, name, summary) .then(_ensure_registered) @@ -48,7 +36,6 @@ def create_dataset( # noqa: PLR0913 ) def datasets(self, dataset_type: type[T]) -> Promise[Group]: - """Fetch all available datasets.""" return ( self._service.list_datasets() .then(_log_server_message) @@ -61,15 +48,6 @@ def datasets(self, dataset_type: type[T]) -> Promise[Group]: ) def dataset(self, slug: str, dataset_type: type[T]) -> Promise[T]: - """Get a dataset by its slug, e.g. `open_data.copernicus.sentinel1_sar`. - - Args: - slug: The slug of the dataset. - - Returns: - The dataset if it exists. - """ - return ( self._service.get_dataset_by_slug(slug) .then(_ensure_registered) diff --git a/tilebox-datasets/tilebox/datasets/sync/client.py b/tilebox-datasets/tilebox/datasets/sync/client.py index 40ea767..b9b4b10 100644 --- a/tilebox-datasets/tilebox/datasets/sync/client.py +++ b/tilebox-datasets/tilebox/datasets/sync/client.py @@ -40,19 +40,40 @@ def create_dataset( fields: list[FieldDict], *, name: str | None = None, - summary: str | None = None, + description: str | None = None, ) -> DatasetClient: + """Create a new dataset. + + Args: + kind: The kind of the dataset. + code_name: The code name of the dataset. + fields: The fields of the dataset. + name: The name of the dataset. Defaults to the code name. + description: A short description of the dataset. Optional. + + Returns: + The created dataset. + """ if name is None: name = code_name - if summary is None: - summary = "" + if description is None: + description = "" - return self._client.create_dataset(kind, code_name, fields, name, summary, DatasetClient).get() + return self._client.create_dataset(kind, code_name, fields, name, description, DatasetClient).get() def datasets(self) -> Group: + """Fetch all available datasets.""" return self._client.datasets(DatasetClient).get() def dataset(self, slug: str) -> DatasetClient: + """Get a dataset by its slug, e.g. `open_data.copernicus.sentinel1_sar`. + + Args: + slug: The slug of the dataset. + + Returns: + The dataset if it exists. + """ return self._client.dataset(slug, DatasetClient).get() def _dataset_by_id(self, dataset_id: str | UUID) -> DatasetClient: