From cb83647263888007f9af9bfa3cae1b782a03b3a1 Mon Sep 17 00:00:00 2001 From: peopleig Date: Tue, 20 Jan 2026 06:59:36 +0530 Subject: [PATCH] Add initial Python client --- .gitignore | 6 + client/python/README.md | 299 ++++++++++++++++++ client/python/examples/basic_usage.py | 45 +++ .../python/examples/context_manager_usage.py | 39 +++ client/python/proto/vector-db.proto | 73 +++++ client/python/pyproject.toml | 35 ++ client/python/tests/test_client.py | 137 ++++++++ client/python/tests/test_config.py | 74 +++++ client/python/tests/test_connection.py | 113 +++++++ client/python/tests/test_models.py | 131 ++++++++ client/python/vortexdb/__init__.py | 33 ++ client/python/vortexdb/client.py | 130 ++++++++ client/python/vortexdb/config.py | 52 +++ client/python/vortexdb/connection.py | 73 +++++ client/python/vortexdb/exceptions.py | 30 ++ client/python/vortexdb/grpc/__init__.py | 1 + client/python/vortexdb/grpc/vector_db_pb2.py | 57 ++++ .../vortexdb/grpc/vector_db_pb2_grpc.py | 231 ++++++++++++++ client/python/vortexdb/models.py | 134 ++++++++ client/python/vortexdb/protoutils.py | 32 ++ 20 files changed, 1725 insertions(+) create mode 100644 client/python/README.md create mode 100644 client/python/examples/basic_usage.py create mode 100644 client/python/examples/context_manager_usage.py create mode 100644 client/python/proto/vector-db.proto create mode 100644 client/python/pyproject.toml create mode 100644 client/python/tests/test_client.py create mode 100644 client/python/tests/test_config.py create mode 100644 client/python/tests/test_connection.py create mode 100644 client/python/tests/test_models.py create mode 100644 client/python/vortexdb/__init__.py create mode 100644 client/python/vortexdb/client.py create mode 100644 client/python/vortexdb/config.py create mode 100644 client/python/vortexdb/connection.py create mode 100644 client/python/vortexdb/exceptions.py create mode 100644 client/python/vortexdb/grpc/__init__.py create mode 100644 client/python/vortexdb/grpc/vector_db_pb2.py create mode 100644 client/python/vortexdb/grpc/vector_db_pb2_grpc.py create mode 100644 client/python/vortexdb/models.py create mode 100644 client/python/vortexdb/protoutils.py diff --git a/.gitignore b/.gitignore index 7eb2e3b..b40848c 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,9 @@ .TODO /databases .env +__pycache__/ +build/ +.venv/ +venv/ +.pytest_cache/ +*.egg-info/ \ No newline at end of file diff --git a/client/python/README.md b/client/python/README.md new file mode 100644 index 0000000..ad6f5a9 --- /dev/null +++ b/client/python/README.md @@ -0,0 +1,299 @@ +# VortexDB Python Client + +This is the official Python client for **VortexDB**, a vector database exposed via a gRPC API. + +The client provides a thin, typed, Pythonic wrapper over the VortexDB gRPC interface, handling: +- connection setup +- authentication +- request/response mapping +- error translation +- resource cleanup + +The client is designed to be minimal, explicit, and easy to extend. + +--- + +## Requirements + +- Python 3.9+ +- A running VortexDB gRPC server + +--- + +## Installation (local / development) + +At the moment, the client lives inside the main VortexDB repository. + +From `client/python`: + +```bash +python -m venv .venv +source .venv/bin/activate + +pip install -e . +``` + +--- + +## Configuration and Authentication + +The client communicates with VortexDB over gRPC and requires: + +- gRPC endpoint (host:port) +- API key (maps to `GRPC_ROOT_PASSWORD` on the server) + +These can be provided either: + +- explicitly when constructing the client, or +- via environment variables + +--- + +## Basic Usage + +All examples and methods of using the client are in the `examples` directory, in `*_usage.py` files +A full working example of the basic ways to use the client is available in: +`examples/basic_usage.py` + +### Context Manager Support + +The client supports usage as a context manager, which automatically closes the underlying gRPC channel connection +Example available in: +```examples/context_manager_usage.py``` + +--- + +## Client API + +### `VortexDB` + +Main client class for interacting with the VortexDB gRPC server. + +#### **Constructor** + +``` +VortexDB( + grpc_url: str | None = None, + api_key: str | None = None, + timeout: float | None = None, +) +``` +`grpc_url`: gRPC server address (`host:port`) +`api_key`: API key for authentication +`timeout`: per-request timeout in seconds + +--- + +#### **Insert** + +Insert a vector with an associated payload +``` +insert(*, vector: DenseVector, payload: Payload) -> str +``` + +Returns +- `point_id` (UUID string) + +Raises +- `TypeError` if `vector` is not a `DenseVector` +- gRPC-mapped errors (see Error Handling) + +--- + +#### **Get** + +Fetch a point by its ID +``` +get(*, point_id: str) -> Point | None +``` + +Returns +- `Point` if found +- `None` if the point does not exist + +--- + +#### **Search** + +Search for nearest neighbours to a query vector +``` +search( + *, + vector: DenseVector, + similarity: Similarity, + limit: int, +) -> list[str] +``` + +Returns +- List of `point_id` strings + +Raises +- `TypeError` if `vector` is not a `DenseVector` +- `InvalidArgumentError` for invalid parameters + +--- + +#### **Delete** + +Delete a point by its ID +``` +delete(*, point_id: str) -> None +``` + +Raises +- `NotFoundError` if the point does not exist + +--- + +#### **Close** + +Close the underlying gRPC channel +``` +close() -> None +``` + +--- + +## Models + +The client exposes typed models thatt represent VortexDB concepts and handle +validation and protobuf conversion internally + +### `DenseVector` + +``` +DenseVector(values: list[float] | tuple[float, ...]) +``` +- Validates numeric input +- Normalizes values to `float` +- Immutable (`frozen=True`) + +--- + +### `Payload` + +``` +Payload(content_type: ContentType, content:str) +``` +Factory Helpers: +- `Payload.text(content: str)` +- `Payload.image(content: str)` + +--- + +### `Point` + +``` +Point( + id: str, + vector: DenseVector, + payload: Payload, +) +``` +Additional `pretty()` method provided to properly format output +All fields are directly accessible: +- `point.id` +- `point.vector` +- `point.payload` + +--- + +### `Similarity` + +Enum representing distance functions: +- `EUCLIDEAN` +- `MANHATTAN` +- `HAMMING` +- `COSINE` + +--- + +### `ContentType` + +Enum representing payload type: +- `TEXT` +- `IMAGE` + +--- + +## Error Handling + +The client maps gRPC status codes to Python exceptions to provide a clean, Pythonic error-handling experience. +All client exceptions inherit from `VortexDBError` + +### Exception Mapping + +| gRPC Status Code | Python Exception | +| :--- | :--- | +| `UNAUTHENTICATED` | `AuthenticationError` | +| `NOT_FOUND` | `NotFoundError` | +| `INVALID_ARGUMENT` | `InvalidArgumentError` | +| `DEADLINE_EXCEEDED` | `TimeoutError` | +| `UNAVAILABLE` | `ServiceUnavailableError` | +| Any other error | `InternalServerError` | + +--- + +## Testing + +Tests are written using **pytest** +Test coverage includes: +- models +- configuration loading +- gRPC connection layer +- client API + +Tests live in the `tests/` directory + +To run the tests: `pytest -v` + +--- + +## Proto and gRPC Stubs + +The gRPC interface is defined using a Protocol Buffers (`.proto`) file, from which Python gRPC stubs are generated. + +### Proto + +The `.proto` file is kept here for transparency and reproducibility reasons. + +**Location:** `proto/vector-db.proto` + + +Even though the gRPC server is already running and exposes these methods, the client still needs the proto to: +- Generate strongly-typed request / response classes +- Generate the gRPC client stub (`VectorDBStub`) + +--- + +### Generated Python stubs + +**Location:** `vortexdb/grpc/` + - `vector_db_pb2.py` + - `vector_db_pb2_grpc.py` + +These files are **auto-generated** from `vector-db.proto` and should not be edited manually. +The client internally wraps this stub. End users never interact with it directly. + +--- + +### Regenerating the stubs + +Regeneration will be only required if the `.proto` file is changed. For example, if: +- a new RPC is added +- enums are updated + +From the client's top-level directory, run: + +```bash +python -m grpc_tools.protoc \ + -I proto \ + --python_out=vortexdb/grpc \ + --grpc_python_out=vortexdb/grpc \ + proto/vector-db.proto +``` + +After running this: +- `vector_db_pb2_grpc.py` and `vector_db_pb2.py` will be updated +- No other client code should need changes diff --git a/client/python/examples/basic_usage.py b/client/python/examples/basic_usage.py new file mode 100644 index 0000000..06c3f52 --- /dev/null +++ b/client/python/examples/basic_usage.py @@ -0,0 +1,45 @@ +from vortexdb import VortexDB +from vortexdb import DenseVector, Payload, Similarity # from vortexdb.models + +# This file is written to show the user an example of how VortexDB can be used +# Further feature updates will be reflected in different *_usage.py files +# This one shows client initialization and insertion, fetching, search and deletion of vectors +# Along with that, it shows the usage of a few helper methods in the DenseVector and Point classes + +def main(): + # Initialize client + db = VortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) + + # Insert a vector + point_id = db.insert( + vector=DenseVector([0.1, 0.2, 0.3]), + payload=Payload.text("hello world"), + ) + print("Inserted point:", point_id) + + # Get the point + # Point.pretty() exists to provide a readable output of a Point + point = db.get(point_id=point_id) + print("Fetched point:", point.pretty()) + print("DenseVector as a list:", point.vector.to_list()) + + # Search + results = db.search( + vector=DenseVector([0.1, 0.2, 0.3]), + similarity=Similarity.COSINE, + limit=3, + ) + print("Search results:", results) + + # Delete + db.delete(point_id=point_id) + print("Deleted point") + + # Close connection + db.close() + +if __name__ == "__main__": + main() diff --git a/client/python/examples/context_manager_usage.py b/client/python/examples/context_manager_usage.py new file mode 100644 index 0000000..ec46151 --- /dev/null +++ b/client/python/examples/context_manager_usage.py @@ -0,0 +1,39 @@ +from vortexdb import VortexDB, DenseVector, Payload, Similarity + +# This file exists to shows how to use VortexDB as a Context Manager +# The gRPC connection is automatically opened on entry and closed when the context exits + +def main(): + with VortexDB( + grpc_url="localhost:50051", + api_key="my-secret-password", + ) as db: + + # Insert a vector + point_id = db.insert( + vector=DenseVector([0.1, 0.2, 0.3]), + payload=Payload.text("hello world"), + ) + print("Inserted point:", point_id) + + # Get the point + point = db.get(point_id=point_id) + print("Fetched point:", point.pretty()) + + # Search + results = db.search( + vector=DenseVector([0.1, 0.2, 0.3]), + similarity=Similarity.COSINE, + limit=3, + ) + print("Search results:", results) + + # Delete + db.delete(point_id=point_id) + print("Deleted point") + + # At this point, the gRPC channel is closed automatically + print("Connection closed") + +if __name__ == "__main__": + main() diff --git a/client/python/proto/vector-db.proto b/client/python/proto/vector-db.proto new file mode 100644 index 0000000..b3834e9 --- /dev/null +++ b/client/python/proto/vector-db.proto @@ -0,0 +1,73 @@ +syntax = "proto3"; + +package vectordb; + +import "google/protobuf/empty.proto"; + +message UUID { + string value = 1; +} + +service VectorDB { + //Insert a vector with a payload and return the assigned PointID + rpc InsertVector(InsertVectorRequest) returns (PointID) {} + + //Delete a vector by its PointID + rpc DeletePoint(PointID) returns (google.protobuf.Empty) {} + + //Get a vector and its payload by PointID + rpc GetPoint(PointID) returns (Point) {} + + //Search for the k nearest vectors to a target vector given a distance function + rpc SearchPoints(SearchRequest) returns (SearchResponse) {} +} + + +message InsertVectorRequest { + DenseVector vector = 1; + Payload payload = 2; +} + + +message SearchRequest { + DenseVector query_vector = 1; + Similarity similarity = 2; + uint64 limit = 3; +} + + +message SearchResponse { + repeated PointID result_point_ids = 1; +} + +message DenseVector { + repeated float values = 1; +} + +message Point { + PointID id = 1; + Payload payload = 2; + DenseVector vector = 3; +} + +message PointID { + UUID id = 1; +} + +enum Similarity{ + Euclidean = 0; + Manhattan = 1; + Hamming = 2; + Cosine = 3; +} + +enum ContentType { + Image = 0; + Text = 1; +} + +message Payload { + ContentType content_type = 1; + string content = 2; +} + diff --git a/client/python/pyproject.toml b/client/python/pyproject.toml new file mode 100644 index 0000000..758fd94 --- /dev/null +++ b/client/python/pyproject.toml @@ -0,0 +1,35 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "vortexdb" +version = "0.1.0" +description = "Python client for VortexDB" +readme = "README.md" +requires-python = ">=3.10" + +keywords = ["vector-database", "grpc", "embeddings", "search"] + +classifiers = [ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", +] + +dependencies = [ + "grpcio>=1.60", + "grpcio-tools>=1.60", + "protobuf>=4.25", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0", +] + +[tool.setuptools.packages.find] +where = ["."] +include = ["vortexdb*"] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/client/python/tests/test_client.py b/client/python/tests/test_client.py new file mode 100644 index 0000000..a752320 --- /dev/null +++ b/client/python/tests/test_client.py @@ -0,0 +1,137 @@ +import pytest +from unittest.mock import Mock + +from vortexdb.client import VortexDB +from vortexdb.connection import GRPCConnection +from vortexdb.models import DenseVector, Payload, Similarity, ContentType, Point +from vortexdb.exceptions import InvalidArgumentError + + + +# Fixtures for a mock connection and client layer + +@pytest.fixture +def mock_connection(monkeypatch): + """ + Replace GRPCConnection with a mock instance. + """ + conn = Mock(spec=GRPCConnection) + monkeypatch.setattr("vortexdb.client.GRPCConnection", lambda _: conn) + return conn + + +@pytest.fixture +def client(mock_connection): + return VortexDB( + grpc_url="localhost:50051", + api_key="secret", + ) + + +# Insert + +def test_insert_success(client, mock_connection): + response = Mock() + response.id = Mock() + response.id.value = "point-123" + + mock_connection.call.return_value = response + + vector = DenseVector([1, 2, 3]) + payload = Payload.text("hello") + + point_id = client.insert(vector=vector, payload=payload) + + assert point_id == "point-123" + + + +def test_insert_rejects_invalid_vector(client): + with pytest.raises(TypeError): + client.insert( + vector=[1, 2, 3], # not DenseVector + payload=Payload.text("hello"), + ) + + +# Get + +def test_get_point_success(client, mock_connection): + proto_point = Mock() + proto_point.id.id.value = "point-123" + proto_point.vector.values = [1, 2, 3] + proto_point.payload.content_type = ContentType.TEXT.to_proto() + proto_point.payload.content = "hello" + + mock_connection.call.return_value = proto_point + + point = client.get(point_id="point-123") + + assert isinstance(point, Point) + assert point.id == "point-123" + assert point.payload.content == "hello" + + +def test_get_point_not_found(client, mock_connection): + mock_connection.call.return_value = None + + result = client.get(point_id="missing") + + assert result is None + + +# Delete + +def test_delete_success(client, mock_connection): + mock_connection.call.return_value = None + + client.delete(point_id="point-123") + + mock_connection.call.assert_called_once() + + +# Search + +def test_search_success(client, mock_connection): + mock_connection.call.return_value = Mock( + result_point_ids=[ + Mock(id=Mock(value="p1")), + Mock(id=Mock(value="p2")), + ] + ) + + results = client.search( + vector=DenseVector([1, 2, 3]), + similarity=Similarity.COSINE, + limit=2, + ) + + assert results == ["p1", "p2"] + + +def test_search_invalid_vector(client): + with pytest.raises(TypeError): + client.search( + vector=[1, 2, 3], + similarity=Similarity.COSINE, + limit=2, + ) + + +# Close + +def test_close_closes_connection(client, mock_connection): + client.close() + mock_connection.close.assert_called_once() + +def test_context_manager_closes_connection(monkeypatch): + conn = Mock(spec=GRPCConnection) + monkeypatch.setattr("vortexdb.client.GRPCConnection", lambda _: conn) + + with VortexDB( + grpc_url="localhost:50051", + api_key="secret", + ) as db: + assert db is not None + + conn.close.assert_called_once() diff --git a/client/python/tests/test_config.py b/client/python/tests/test_config.py new file mode 100644 index 0000000..1017250 --- /dev/null +++ b/client/python/tests/test_config.py @@ -0,0 +1,74 @@ +import pytest + +from vortexdb.config import VortexDBConfig, ConfigurationError + + +# Clean slate env vars for every test +@pytest.fixture +def clean_env(monkeypatch): + for var in [ + "VORTEXDB_GRPC_URL", + "VORTEXDB_API_KEY", + "VORTEXDB_TIMEOUT", + ]: + monkeypatch.delenv(var, raising=False) + + +# Checking from_env + +def test_config_requires_api_key(clean_env): + with pytest.raises(ConfigurationError): + VortexDBConfig.from_env() + + +def test_config_from_explicit_args(clean_env): + cfg = VortexDBConfig.from_env( + grpc_url="localhost:50051", + api_key="secret", + timeout=10.0, + ) + + assert cfg.grpc_url == "localhost:50051" + assert cfg.api_key == "secret" + assert cfg.timeout == 10.0 + +# Env vars fallback + +def test_config_from_env_vars(clean_env, monkeypatch): + monkeypatch.setenv("VORTEXDB_GRPC_URL", "127.0.0.1:1234") + monkeypatch.setenv("VORTEXDB_API_KEY", "env-secret") + monkeypatch.setenv("VORTEXDB_TIMEOUT", "7.5") + + cfg = VortexDBConfig.from_env() + + assert cfg.grpc_url == "127.0.0.1:1234" + assert cfg.api_key == "env-secret" + assert cfg.timeout == 7.5 + + +# Defaults + +def test_config_default_grpc_url(clean_env, monkeypatch): + monkeypatch.setenv("VORTEXDB_API_KEY", "secret") + + cfg = VortexDBConfig.from_env() + + assert cfg.grpc_url == "localhost:50051" + + +def test_config_default_timeout(clean_env, monkeypatch): + monkeypatch.setenv("VORTEXDB_API_KEY", "secret") + + cfg = VortexDBConfig.from_env() + + assert cfg.timeout == 5.0 + + +# Invalid Timeout + +def test_config_invalid_timeout(clean_env, monkeypatch): + monkeypatch.setenv("VORTEXDB_API_KEY", "secret") + monkeypatch.setenv("VORTEXDB_TIMEOUT", "not-a-number") + + with pytest.raises(ValueError): + VortexDBConfig.from_env() diff --git a/client/python/tests/test_connection.py b/client/python/tests/test_connection.py new file mode 100644 index 0000000..ee82152 --- /dev/null +++ b/client/python/tests/test_connection.py @@ -0,0 +1,113 @@ +import grpc +import pytest +from unittest.mock import Mock, patch + +from vortexdb.connection import GRPCConnection +from vortexdb.config import VortexDBConfig +from vortexdb.exceptions import ( + AuthenticationError, + NotFoundError, + InvalidArgumentError, + TimeoutError, + ServiceUnavailableError, + InternalServerError, +) + +# Fake gRPC error, required for testing + +class FakeRpcError(grpc.RpcError): + """ + RpcError implementation for unit testing. + grpc.RpcError cannot be instantiated directly. + """ + + def __init__(self, status_code: grpc.StatusCode, details: str): + self._status_code = status_code + self._details = details + + def code(self): + return self._status_code + + def details(self): + return self._details + + +# Pytest fixtures for config and channel + +@pytest.fixture +def config(): + return VortexDBConfig( + grpc_url="localhost:50051", + api_key="secret", + timeout=3.0, + ) + + +@pytest.fixture +def connection(config): + with patch("grpc.insecure_channel") as mock_channel: + mock_channel.return_value = Mock() + yield GRPCConnection(config) + + +# Basic connection testing + +def test_channel_created_with_correct_url(config): + with patch("grpc.insecure_channel") as mock_channel: + GRPCConnection(config) + mock_channel.assert_called_once_with("localhost:50051") + + +def test_metadata_is_attached(connection): + assert ("authorization", "Bearer secret") in connection._metadata + + +def test_successful_rpc_call(connection): + fake_rpc = Mock(return_value="ok") + + result = connection.call(fake_rpc, request="req") + + fake_rpc.assert_called_once_with( + "req", + timeout=3.0, + metadata=connection._metadata, + ) + assert result == "ok" + + +# Error mapping test + +@pytest.mark.parametrize( + "status_code,expected_exception", + [ + (grpc.StatusCode.UNAUTHENTICATED, AuthenticationError), + (grpc.StatusCode.NOT_FOUND, NotFoundError), + (grpc.StatusCode.INVALID_ARGUMENT, InvalidArgumentError), + (grpc.StatusCode.DEADLINE_EXCEEDED, TimeoutError), + (grpc.StatusCode.UNAVAILABLE, ServiceUnavailableError), + ], +) +def test_grpc_error_mapping(status_code, expected_exception, connection): + error = FakeRpcError(status_code, "boom") + fake_rpc = Mock(side_effect=error) + + with pytest.raises(expected_exception): + connection.call(fake_rpc, request="req") + + +def test_unknown_grpc_error_maps_to_internal_error(connection): + error = FakeRpcError(grpc.StatusCode.UNKNOWN, "unknown") + fake_rpc = Mock(side_effect=error) + + with pytest.raises(InternalServerError): + connection.call(fake_rpc, request="req") + + +# Clean connection closure test + +def test_close_closes_channel(config): + with patch("grpc.insecure_channel") as mock_channel: + mock_channel.return_value = Mock() + conn = GRPCConnection(config) + conn.close() + conn._channel.close.assert_called_once() diff --git a/client/python/tests/test_models.py b/client/python/tests/test_models.py new file mode 100644 index 0000000..e78d686 --- /dev/null +++ b/client/python/tests/test_models.py @@ -0,0 +1,131 @@ +import pytest + +from vortexdb.models import ( + DenseVector, + Payload, + Point, + Similarity, + ContentType, +) + +from vortexdb.grpc import vector_db_pb2 + +# DenseVector Tests + +def test_dense_vector_valid(): + a = [1, 2.5, 3] + v = DenseVector(a) + assert v.values == [1.0, 2.5, 3.0] + + +def test_dense_vector_accepts_tuple(): + v = DenseVector((1, 2, 3)) + assert v.values == [1.0, 2.0, 3.0] + + +def test_dense_vector_rejects_empty(): + with pytest.raises(ValueError): + DenseVector([]) + + +def test_dense_vector_rejects_non_numeric(): + with pytest.raises(TypeError): + DenseVector([1, "a", 3]) + + +def test_dense_vector_is_frozen(): + v = DenseVector([1, 2, 3]) + with pytest.raises(Exception): + v.values = [4, 5, 6] + + +def test_dense_vector_to_proto(): + v = DenseVector([1, 2, 3]) + proto = v.to_proto() + assert list(proto.values) == [1.0, 2.0, 3.0] + + +# Similarity Test + +def test_similarity_to_proto(): + assert Similarity.EUCLIDEAN.to_proto() == vector_db_pb2.Euclidean + assert Similarity.MANHATTAN.to_proto() == vector_db_pb2.Manhattan + assert Similarity.HAMMING.to_proto() == vector_db_pb2.Hamming + assert Similarity.COSINE.to_proto() == vector_db_pb2.Cosine + + +# ContentType Tests + +def test_content_type_to_proto(): + assert ContentType.TEXT.to_proto() == vector_db_pb2.Text + assert ContentType.IMAGE.to_proto() == vector_db_pb2.Image + + +def test_content_type_from_proto(): + assert ContentType.from_proto(vector_db_pb2.Text) == ContentType.TEXT + assert ContentType.from_proto(vector_db_pb2.Image) == ContentType.IMAGE + + +def test_content_type_from_proto_invalid(): + with pytest.raises(KeyError): + ContentType.from_proto(100) + + +# Payload Tests + +def test_payload_text_factory(): + p = Payload.text("hello") + assert p.content_type == ContentType.TEXT + assert p.content == "hello" + + +def test_payload_image_factory(): + p = Payload.image("img_data") + assert p.content_type == ContentType.IMAGE + assert p.content == "img_data" + + +def test_payload_to_proto(): + p = Payload.text("hello") + proto = p.to_proto() + assert proto.content == "hello" + assert proto.content_type == vector_db_pb2.Text + +def test_payload_rejects_invalid_content_type(): + with pytest.raises(TypeError): + Payload("text", "hello") + + + +# Point Test + +def test_point_from_proto(): + proto = vector_db_pb2.Point( + id=vector_db_pb2.PointID( + id=vector_db_pb2.UUID(value="point-123") + ), + vector=vector_db_pb2.DenseVector(values=[1, 2, 3]), + payload=vector_db_pb2.Payload( + content_type=vector_db_pb2.Text, + content="hello" + ) + ) + + point = Point.from_proto(proto) + + assert point.id == "point-123" + assert point.vector.values == [1.0, 2.0, 3.0] + assert point.payload.content_type == ContentType.TEXT + assert point.payload.content == "hello" + +def test_point_from_proto_without_payload(): + proto = vector_db_pb2.Point( + id=vector_db_pb2.PointID( + id=vector_db_pb2.UUID(value="p1") + ), + vector=vector_db_pb2.DenseVector(values=[1,2,3]), + payload=None, + ) + + point = Point.from_proto(proto) + assert point.payload.content == "" diff --git a/client/python/vortexdb/__init__.py b/client/python/vortexdb/__init__.py new file mode 100644 index 0000000..62c100f --- /dev/null +++ b/client/python/vortexdb/__init__.py @@ -0,0 +1,33 @@ +# vortexdb/__init__.py + +from vortexdb.client import VortexDB +from vortexdb.models import ( + DenseVector, + Payload, + Point, + Similarity, +) +from vortexdb.exceptions import ( + VortexDBError, + AuthenticationError, + NotFoundError, + InvalidArgumentError, + TimeoutError, + ServiceUnavailableError, + InternalServerError, +) + +__all__ = [ + "VortexDB", + "DenseVector", + "Payload", + "Point", + "Similarity", + "VortexDBError", + "AuthenticationError", + "NotFoundError", + "InvalidArgumentError", + "TimeoutError", + "ServiceUnavailableError", + "InternalServerError", +] diff --git a/client/python/vortexdb/client.py b/client/python/vortexdb/client.py new file mode 100644 index 0000000..5bcc748 --- /dev/null +++ b/client/python/vortexdb/client.py @@ -0,0 +1,130 @@ +from typing import List + +from vortexdb.connection import GRPCConnection +from vortexdb.config import VortexDBConfig +from vortexdb.models import ( + DenseVector, + Payload, + Point, + Similarity, +) + +# Imported as proto, because I liked it - sounds less cumbersome +from vortexdb import protoutils as proto + + +class VortexDB: + """ High-level Python client for VortexDB """ + + def __init__( + self, + *, + grpc_url: str | None = None, + api_key: str | None = None, + timeout: float | None = None, + ): + # Config order followed - args -> env vars -> defaults + self._config = VortexDBConfig.from_env( + grpc_url=grpc_url, + api_key=api_key, + timeout=timeout, + ) + + self._conn = GRPCConnection(self._config) + +# The basic operations + + def insert(self, *, vector: DenseVector, payload: Payload) -> str: + """ + Insert a vector with payload. + Returns: point_id (str) + """ + if not isinstance(vector, DenseVector): + raise TypeError( + "vector must be a DenseVector. " + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + + request = proto.build_insert_request( + vector=vector, + payload=payload, + ) + + response = self._conn.call( + self._conn.stub.InsertVector, + request, + ) + + return response.id.value + + def get(self, *, point_id: str) -> Point | None: + """ + Retrieve a point by ID. + """ + request = proto.build_point_id_request(point_id) + + response = self._conn.call( + self._conn.stub.GetPoint, + request, + ) + + if response is None: + return None + + return Point.from_proto(response) + + + def delete(self, *, point_id: str) -> None: + """ + Delete a point by ID. + """ + request = proto.build_point_id_request(point_id) + + self._conn.call( + self._conn.stub.DeletePoint, + request, + ) + + def search( + self, + *, + vector: DenseVector, + similarity: Similarity, + limit: int, + ) -> List[str]: + """ + Search for nearest neighbors. + Returns: List of point IDs + """ + if not isinstance(vector, DenseVector): + raise TypeError( + "vector must be a DenseVector. " + "Use: DenseVector([1.0, 2.0, 3.0])" + ) + + request = proto.build_search_request( + vector=vector, + similarity=similarity, + limit=limit, + ) + + response = self._conn.call( + self._conn.stub.SearchPoints, + request, + ) + + return [pid.id.value for pid in response.result_point_ids] + + def close(self) -> None: + """ + Close the gRPC connection. + """ + self._conn.close() + + # Context Manager + # Will allow the usage of VortexDB with the 'with' keyword (Example given in examples/context_manager_usage.py) + def __enter__(self) -> "VortexDB": + return self + + def __exit__(self, exc_type, exc, tb) -> None: + self.close() diff --git a/client/python/vortexdb/config.py b/client/python/vortexdb/config.py new file mode 100644 index 0000000..a0e68f1 --- /dev/null +++ b/client/python/vortexdb/config.py @@ -0,0 +1,52 @@ +import os +from dataclasses import dataclass +from typing import Optional +from vortexdb.exceptions import ConfigurationError + + +DEFAULT_GRPC_HOST = "localhost" +DEFAULT_GRPC_PORT = 50051 +DEFAULT_TIMEOUT = 5.0 + + +@dataclass(frozen=True) +class VortexDBConfig: + """Configuration for the VortexDB Python client""" + + grpc_url: str + api_key: str + timeout: float = DEFAULT_TIMEOUT + + @staticmethod + def from_env( + *, + grpc_url: Optional[str] = None, + api_key: Optional[str] = None, + timeout: Optional[float] = None, + ) -> "VortexDBConfig": + """ Load configuration from explicit arguments with environment variable fallback """ + + resolved_grpc_url = ( + grpc_url + or os.getenv("VORTEXDB_GRPC_URL") + or f"{DEFAULT_GRPC_HOST}:{DEFAULT_GRPC_PORT}" + ) + + resolved_api_key = api_key or os.getenv("VORTEXDB_API_KEY") + if not resolved_api_key: + raise ConfigurationError( + "VortexDB API key is required. " + "Provide api_key argument or set VORTEXDB_API_KEY." + ) + + resolved_timeout = ( + timeout + if timeout is not None + else float(os.getenv("VORTEXDB_TIMEOUT", DEFAULT_TIMEOUT)) + ) + + return VortexDBConfig( + grpc_url=resolved_grpc_url, + api_key=resolved_api_key, + timeout=resolved_timeout, + ) diff --git a/client/python/vortexdb/connection.py b/client/python/vortexdb/connection.py new file mode 100644 index 0000000..d7a61a5 --- /dev/null +++ b/client/python/vortexdb/connection.py @@ -0,0 +1,73 @@ +import grpc +from typing import Any, Callable + +from vortexdb.config import VortexDBConfig +from vortexdb.exceptions import ( + AuthenticationError, + NotFoundError, + InvalidArgumentError, + TimeoutError, + ServiceUnavailableError, + InternalServerError, + VortexDBError, +) + +from vortexdb.grpc.vector_db_pb2_grpc import VectorDBStub + + +class GRPCConnection: + """ gRPC connection wrapper for VortexDB""" + + def __init__(self, config: VortexDBConfig): + self._config = config + self._channel = grpc.insecure_channel(config.grpc_url) + self._stub = VectorDBStub(self._channel) + # Because this is required in every request + self._metadata = ( + ("authorization", f"Bearer {config.api_key}"), + ) + + @property + def stub(self) -> VectorDBStub: + return self._stub + + def call( + self, + rpc: Callable[..., Any], + request: Any, + ) -> Any: + """ Execute a gRPC call with standard error handling """ + try: + return rpc( + request, + timeout=self._config.timeout, + metadata=self._metadata, + ) + + except grpc.RpcError as e: + raise self._map_grpc_error(e) from e + + def close(self) -> None: + """ Close the underlying gRPC channel """ + self._channel.close() + + @staticmethod + def _map_grpc_error(error: grpc.RpcError) -> VortexDBError: + code = error.code() + + if code == grpc.StatusCode.UNAUTHENTICATED: + return AuthenticationError(error.details()) + + if code == grpc.StatusCode.NOT_FOUND: + return NotFoundError(error.details()) + + if code == grpc.StatusCode.INVALID_ARGUMENT: + return InvalidArgumentError(error.details()) + + if code == grpc.StatusCode.DEADLINE_EXCEEDED: + return TimeoutError(error.details()) + + if code == grpc.StatusCode.UNAVAILABLE: + return ServiceUnavailableError(error.details()) + + return InternalServerError(error.details()) diff --git a/client/python/vortexdb/exceptions.py b/client/python/vortexdb/exceptions.py new file mode 100644 index 0000000..7a4498b --- /dev/null +++ b/client/python/vortexdb/exceptions.py @@ -0,0 +1,30 @@ +class VortexDBError(Exception): + """Base exception for all VortexDB client errors.""" + + +class AuthenticationError(VortexDBError): + """Authentication failed (invalid or missing API key)""" + + +class NotFoundError(VortexDBError): + """Could not find requested resource""" + + +class InvalidArgumentError(VortexDBError): + """Invalid input was provided""" + + +class TimeoutError(VortexDBError): + """Request timed out while communicating with the server""" + + +class ServiceUnavailableError(VortexDBError): + """The server is unavailable or unreachable""" + + +class InternalServerError(VortexDBError): + """Internal error in the server""" + +class ConfigurationError(VortexDBError): + """Invalid or missing client configuration.""" + diff --git a/client/python/vortexdb/grpc/__init__.py b/client/python/vortexdb/grpc/__init__.py new file mode 100644 index 0000000..26ad3ca --- /dev/null +++ b/client/python/vortexdb/grpc/__init__.py @@ -0,0 +1 @@ +# vortexdb/grpc/__init__.py \ No newline at end of file diff --git a/client/python/vortexdb/grpc/vector_db_pb2.py b/client/python/vortexdb/grpc/vector_db_pb2.py new file mode 100644 index 0000000..2b8cbb8 --- /dev/null +++ b/client/python/vortexdb/grpc/vector_db_pb2.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: vector-db.proto +# Protobuf Python Version: 6.31.1 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 6, + 31, + 1, + '', + 'vector-db.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0fvector-db.proto\x12\x08vectordb\x1a\x1bgoogle/protobuf/empty.proto\"\x15\n\x04UUID\x12\r\n\x05value\x18\x01 \x01(\t\"`\n\x13InsertVectorRequest\x12%\n\x06vector\x18\x01 \x01(\x0b\x32\x15.vectordb.DenseVector\x12\"\n\x07payload\x18\x02 \x01(\x0b\x32\x11.vectordb.Payload\"u\n\rSearchRequest\x12+\n\x0cquery_vector\x18\x01 \x01(\x0b\x32\x15.vectordb.DenseVector\x12(\n\nsimilarity\x18\x02 \x01(\x0e\x32\x14.vectordb.Similarity\x12\r\n\x05limit\x18\x03 \x01(\x04\"=\n\x0eSearchResponse\x12+\n\x10result_point_ids\x18\x01 \x03(\x0b\x32\x11.vectordb.PointID\"\x1d\n\x0b\x44\x65nseVector\x12\x0e\n\x06values\x18\x01 \x03(\x02\"q\n\x05Point\x12\x1d\n\x02id\x18\x01 \x01(\x0b\x32\x11.vectordb.PointID\x12\"\n\x07payload\x18\x02 \x01(\x0b\x32\x11.vectordb.Payload\x12%\n\x06vector\x18\x03 \x01(\x0b\x32\x15.vectordb.DenseVector\"%\n\x07PointID\x12\x1a\n\x02id\x18\x01 \x01(\x0b\x32\x0e.vectordb.UUID\"G\n\x07Payload\x12+\n\x0c\x63ontent_type\x18\x01 \x01(\x0e\x32\x15.vectordb.ContentType\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t*C\n\nSimilarity\x12\r\n\tEuclidean\x10\x00\x12\r\n\tManhattan\x10\x01\x12\x0b\n\x07Hamming\x10\x02\x12\n\n\x06\x43osine\x10\x03*\"\n\x0b\x43ontentType\x12\t\n\x05Image\x10\x00\x12\x08\n\x04Text\x10\x01\x32\x81\x02\n\x08VectorDB\x12\x42\n\x0cInsertVector\x12\x1d.vectordb.InsertVectorRequest\x1a\x11.vectordb.PointID\"\x00\x12:\n\x0b\x44\x65letePoint\x12\x11.vectordb.PointID\x1a\x16.google.protobuf.Empty\"\x00\x12\x30\n\x08GetPoint\x12\x11.vectordb.PointID\x1a\x0f.vectordb.Point\"\x00\x12\x43\n\x0cSearchPoints\x12\x17.vectordb.SearchRequest\x1a\x18.vectordb.SearchResponse\"\x00\x62\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'vector_db_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + DESCRIPTOR._loaded_options = None + _globals['_SIMILARITY']._serialized_start=619 + _globals['_SIMILARITY']._serialized_end=686 + _globals['_CONTENTTYPE']._serialized_start=688 + _globals['_CONTENTTYPE']._serialized_end=722 + _globals['_UUID']._serialized_start=58 + _globals['_UUID']._serialized_end=79 + _globals['_INSERTVECTORREQUEST']._serialized_start=81 + _globals['_INSERTVECTORREQUEST']._serialized_end=177 + _globals['_SEARCHREQUEST']._serialized_start=179 + _globals['_SEARCHREQUEST']._serialized_end=296 + _globals['_SEARCHRESPONSE']._serialized_start=298 + _globals['_SEARCHRESPONSE']._serialized_end=359 + _globals['_DENSEVECTOR']._serialized_start=361 + _globals['_DENSEVECTOR']._serialized_end=390 + _globals['_POINT']._serialized_start=392 + _globals['_POINT']._serialized_end=505 + _globals['_POINTID']._serialized_start=507 + _globals['_POINTID']._serialized_end=544 + _globals['_PAYLOAD']._serialized_start=546 + _globals['_PAYLOAD']._serialized_end=617 + _globals['_VECTORDB']._serialized_start=725 + _globals['_VECTORDB']._serialized_end=982 +# @@protoc_insertion_point(module_scope) diff --git a/client/python/vortexdb/grpc/vector_db_pb2_grpc.py b/client/python/vortexdb/grpc/vector_db_pb2_grpc.py new file mode 100644 index 0000000..edc3c8f --- /dev/null +++ b/client/python/vortexdb/grpc/vector_db_pb2_grpc.py @@ -0,0 +1,231 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc +import warnings + +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 +from vortexdb.grpc import vector_db_pb2 as vector__db__pb2 + +GRPC_GENERATED_VERSION = '1.76.0' +GRPC_VERSION = grpc.__version__ +_version_not_supported = False + +try: + from grpc._utilities import first_version_is_lower + _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) +except ImportError: + _version_not_supported = True + +if _version_not_supported: + raise RuntimeError( + f'The grpc package installed is at version {GRPC_VERSION},' + + ' but the generated code in vector_db_pb2_grpc.py depends on' + + f' grpcio>={GRPC_GENERATED_VERSION}.' + + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' + + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' + ) + + +class VectorDBStub(object): + """Missing associated documentation comment in .proto file.""" + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.InsertVector = channel.unary_unary( + '/vectordb.VectorDB/InsertVector', + request_serializer=vector__db__pb2.InsertVectorRequest.SerializeToString, + response_deserializer=vector__db__pb2.PointID.FromString, + _registered_method=True) + self.DeletePoint = channel.unary_unary( + '/vectordb.VectorDB/DeletePoint', + request_serializer=vector__db__pb2.PointID.SerializeToString, + response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + _registered_method=True) + self.GetPoint = channel.unary_unary( + '/vectordb.VectorDB/GetPoint', + request_serializer=vector__db__pb2.PointID.SerializeToString, + response_deserializer=vector__db__pb2.Point.FromString, + _registered_method=True) + self.SearchPoints = channel.unary_unary( + '/vectordb.VectorDB/SearchPoints', + request_serializer=vector__db__pb2.SearchRequest.SerializeToString, + response_deserializer=vector__db__pb2.SearchResponse.FromString, + _registered_method=True) + + +class VectorDBServicer(object): + """Missing associated documentation comment in .proto file.""" + + def InsertVector(self, request, context): + """Insert a vector with a payload and return the assigned PointID + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def DeletePoint(self, request, context): + """Delete a vector by its PointID + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetPoint(self, request, context): + """Get a vector and its payload by PointID + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def SearchPoints(self, request, context): + """Search for the k nearest vectors to a target vector given a distance function + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_VectorDBServicer_to_server(servicer, server): + rpc_method_handlers = { + 'InsertVector': grpc.unary_unary_rpc_method_handler( + servicer.InsertVector, + request_deserializer=vector__db__pb2.InsertVectorRequest.FromString, + response_serializer=vector__db__pb2.PointID.SerializeToString, + ), + 'DeletePoint': grpc.unary_unary_rpc_method_handler( + servicer.DeletePoint, + request_deserializer=vector__db__pb2.PointID.FromString, + response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + ), + 'GetPoint': grpc.unary_unary_rpc_method_handler( + servicer.GetPoint, + request_deserializer=vector__db__pb2.PointID.FromString, + response_serializer=vector__db__pb2.Point.SerializeToString, + ), + 'SearchPoints': grpc.unary_unary_rpc_method_handler( + servicer.SearchPoints, + request_deserializer=vector__db__pb2.SearchRequest.FromString, + response_serializer=vector__db__pb2.SearchResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'vectordb.VectorDB', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + server.add_registered_method_handlers('vectordb.VectorDB', rpc_method_handlers) + + + # This class is part of an EXPERIMENTAL API. +class VectorDB(object): + """Missing associated documentation comment in .proto file.""" + + @staticmethod + def InsertVector(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/vectordb.VectorDB/InsertVector', + vector__db__pb2.InsertVectorRequest.SerializeToString, + vector__db__pb2.PointID.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def DeletePoint(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/vectordb.VectorDB/DeletePoint', + vector__db__pb2.PointID.SerializeToString, + google_dot_protobuf_dot_empty__pb2.Empty.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def GetPoint(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/vectordb.VectorDB/GetPoint', + vector__db__pb2.PointID.SerializeToString, + vector__db__pb2.Point.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) + + @staticmethod + def SearchPoints(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary( + request, + target, + '/vectordb.VectorDB/SearchPoints', + vector__db__pb2.SearchRequest.SerializeToString, + vector__db__pb2.SearchResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + _registered_method=True) diff --git a/client/python/vortexdb/models.py b/client/python/vortexdb/models.py new file mode 100644 index 0000000..3f1986d --- /dev/null +++ b/client/python/vortexdb/models.py @@ -0,0 +1,134 @@ +from dataclasses import dataclass +from enum import Enum +from typing import List +from vortexdb.grpc import vector_db_pb2 + + +# I found this to be a good idea, because +# 1. readability +# 2. will help in HTTP client +# 3. transport conversion at the very end, won't break if proto enum changes + +class Similarity(Enum): + EUCLIDEAN = "euclidean" + MANHATTAN = "manhattan" + HAMMING = "hamming" + COSINE = "cosine" + + def to_proto(self) -> int: + return { + Similarity.EUCLIDEAN: vector_db_pb2.Euclidean, + Similarity.MANHATTAN: vector_db_pb2.Manhattan, + Similarity.HAMMING: vector_db_pb2.Hamming, + Similarity.COSINE: vector_db_pb2.Cosine, + }[self] + + +class ContentType(Enum): + TEXT = "text" + IMAGE = "image" + + def to_proto(self) -> int: + return { + ContentType.TEXT: vector_db_pb2.Text, + ContentType.IMAGE: vector_db_pb2.Image, + }[self] + + @staticmethod + def from_proto(value: int) -> "ContentType": + return { + vector_db_pb2.Text: ContentType.TEXT, + vector_db_pb2.Image: ContentType.IMAGE, + }[value] + + +# TODO Extend support to other data types than lists or tuples (numpy arrays probably) +# TODO Further compatibility to allow conversions directly to numpy arrays (similar to .to_list()) +@dataclass(frozen=True) +class DenseVector: + values: List[float] + + def __post_init__(self): + if not isinstance(self.values, (list, tuple)): + raise TypeError("DenseVector expects a list or tuple of floats") + + if not self.values: + raise ValueError("DenseVector cannot be empty") + + for v in self.values: + if not isinstance(v, (int, float)): + raise TypeError( + "DenseVector values must be numeric (int or float)" + ) + + # force float normalization + object.__setattr__(self, "values", [float(v) for v in self.values]) + + def to_proto(self) -> vector_db_pb2.DenseVector: + return vector_db_pb2.DenseVector(values=self.values) + + def to_list(self) -> list[float]: + return list(self.values) + + + + +@dataclass(frozen=True) +class Payload: + content_type: ContentType + content: str + + @staticmethod + def text(content: str) -> "Payload": + return Payload(ContentType.TEXT, content) + + @staticmethod + def image(content: str) -> "Payload": + return Payload(ContentType.IMAGE, content) + + def __post_init__(self): + if not isinstance(self.content_type, ContentType): + raise TypeError("content_type must be ContentType enum") + + + def to_proto(self) -> vector_db_pb2.Payload: + return vector_db_pb2.Payload( + content_type=self.content_type.to_proto(), + content=self.content, + ) + + +@dataclass(frozen=True) +class Point: + id: str + vector: DenseVector + payload: Payload + + @staticmethod + def from_proto(proto: vector_db_pb2.Point) -> "Point": + payload = proto.payload + if payload is None: + payload_obj = Payload.text("") + else: + payload_obj = Payload( + content_type=ContentType.from_proto(payload.content_type), + content=payload.content, + ) + + return Point( + id=proto.id.id.value, + vector=DenseVector(list(proto.vector.values)), + payload=Payload( + content_type=ContentType.from_proto(proto.payload.content_type), + content=proto.payload.content, + ), + ) + + def pretty(self) -> str: + return ( + f"\nPoint:\n id = {self.id},\n" + f" vector_dim = {len(self.vector.values)},\n" + f" vector = {self.vector},\n" + f" payload_type = {self.payload.content_type.name},\n" + f" payload = '{self.payload.content}'" + ) diff --git a/client/python/vortexdb/protoutils.py b/client/python/vortexdb/protoutils.py new file mode 100644 index 0000000..cd83e32 --- /dev/null +++ b/client/python/vortexdb/protoutils.py @@ -0,0 +1,32 @@ +from vortexdb.grpc import vector_db_pb2 +from vortexdb.models import DenseVector, Payload, Similarity + + +def build_insert_request( + *, + vector: DenseVector, + payload: Payload, +) -> vector_db_pb2.InsertVectorRequest: + return vector_db_pb2.InsertVectorRequest( + vector=vector.to_proto(), + payload=payload.to_proto(), + ) + + +def build_point_id_request(point_id: str) -> vector_db_pb2.PointID: + return vector_db_pb2.PointID( + id=vector_db_pb2.UUID(value=point_id) + ) + + +def build_search_request( + *, + vector: DenseVector, + similarity: Similarity, + limit: int, +) -> vector_db_pb2.SearchRequest: + return vector_db_pb2.SearchRequest( + query_vector=vector.to_proto(), + similarity=similarity.to_proto(), + limit=limit, + )