Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,36 @@ jobs:
path: .coverage*
include-hidden-files: true

integration-test-trino:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: '3.12'
- name: Install UV
uses: astral-sh/setup-uv@v7
with:
enable-cache: true
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
- name: Install
run: make install
- name: Run trino integration tests with coverage
run: COVERAGE=1 make test-trino
- name: Show debug logs
if: ${{ failure() }}
run: docker compose -f dev/docker-compose-trino.yml logs
- name: Upload coverage data
uses: actions/upload-artifact@v4
with:
name: coverage-trino
path: .coverage*
include-hidden-files: true

integration-coverage-report:
runs-on: ubuntu-latest
needs: [integration-test, integration-test-s3, integration-test-adls, integration-test-gcs]
needs: [integration-test, integration-test-s3, integration-test-adls, integration-test-gcs, integration-test-trino]
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ test-integration-rebuild: ## Rebuild integration Docker services from scratch
docker compose -f dev/docker-compose-integration.yml rm -f
docker compose -f dev/docker-compose-integration.yml build --no-cache

test-trino: ## Run tests marked with @pytest.mark.trino
sh ./dev/run-trino.sh
$(TEST_RUNNER) pytest tests/ -m trino $(PYTEST_ARGS)

test-s3: ## Run tests marked with @pytest.mark.s3
sh ./dev/run-minio.sh
$(TEST_RUNNER) pytest tests/ -m s3 $(PYTEST_ARGS)
Expand All @@ -134,7 +138,7 @@ test-gcs: ## Run tests marked with @pytest.mark.gcs
$(TEST_RUNNER) pytest tests/ -m gcs $(PYTEST_ARGS)

test-coverage: ## Run all tests with coverage and report
$(MAKE) COVERAGE=1 test test-integration test-s3 test-adls test-gcs
$(MAKE) COVERAGE=1 test test-integration test-s3 test-adls test-gcs test-s3 test-trino
$(MAKE) coverage-report

coverage-report: ## Combine and report coverage
Expand Down
1 change: 1 addition & 0 deletions dev/docker-compose-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ services:
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000
- CATALOG_JDBC_STRICT__MODE=true

minio:
image: minio/minio
container_name: pyiceberg-minio
Expand Down
97 changes: 97 additions & 0 deletions dev/docker-compose-trino.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
services:
rest:
image: apache/iceberg-rest-fixture
container_name: pyiceberg-rest
networks:
iceberg_net:
ports:
- 8181:8181
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- CATALOG_WAREHOUSE=s3://warehouse/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://minio:9000

trino:
image: trinodb/trino:478
container_name: pyiceberg-trino
networks:
iceberg_net:
ports:
- 8082:8080
environment:
- CATALOG_MANAGEMENT=dynamic
depends_on:
- rest
- hive
volumes:
- ./trino/catalog/warehouse_rest.properties:/etc/trino/catalog/warehouse_rest.properties
- ./trino/catalog/warehouse_hive.properties:/etc/trino/catalog/warehouse_hive.properties
- ./trino/config.properties:/etc/trino/config.properties

minio:
image: minio/minio
container_name: pyiceberg-minio
environment:
- MINIO_ROOT_USER=admin
- MINIO_ROOT_PASSWORD=password
- MINIO_DOMAIN=minio
networks:
iceberg_net:
aliases:
- warehouse.minio
ports:
- 9001:9001
- 9000:9000
command: ["server", "/data", "--console-address", ":9001"]
mc:
depends_on:
- minio
image: minio/mc
container_name: pyiceberg-mc
networks:
iceberg_net:
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/warehouse;
/usr/bin/mc policy set public minio/warehouse;
tail -f /dev/null
"

hive:
build: hive/
container_name: hive
hostname: hive
networks:
iceberg_net:
ports:
- 9083:9083
environment:
SERVICE_NAME: "metastore"
SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/"

networks:
iceberg_net:
33 changes: 33 additions & 0 deletions dev/run-trino.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

set -ex

if [ $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]; then
echo "Trino service running"
else
docker compose -f dev/docker-compose-trino.yml kill
docker compose -f dev/docker-compose-trino.yml up -d
while [ -z $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]
do
echo "Waiting for Trino"
sleep 1
done
fi
29 changes: 29 additions & 0 deletions dev/trino/catalog/warehouse_hive.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
connector.name=iceberg
iceberg.catalog.type=hive_metastore
iceberg.expire-snapshots.min-retention=0d
iceberg.remove-orphan-files.min-retention=0d
iceberg.register-table-procedure.enabled=true
hive.metastore.uri=thrift://hive:9083
iceberg.hive-catalog-name=hive
fs.native-s3.enabled=true
s3.region=us-east-1
s3.aws-access-key=admin
s3.aws-secret-key=password
s3.endpoint=http://minio:9000
s3.path-style-access=false
31 changes: 31 additions & 0 deletions dev/trino/catalog/warehouse_rest.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
connector.name=iceberg
iceberg.catalog.type=rest
iceberg.rest-catalog.uri=http://rest:8181
iceberg.rest-catalog.warehouse=s3://warehouse/default
iceberg.rest-catalog.nested-namespace-enabled=true
iceberg.rest-catalog.case-insensitive-name-matching=true
iceberg.expire-snapshots.min-retention=0d
iceberg.remove-orphan-files.min-retention=0d
iceberg.register-table-procedure.enabled=true
fs.native-s3.enabled=true
s3.region=us-east-1
s3.aws-access-key=admin
s3.aws-secret-key=password
s3.endpoint=http://minio:9000
s3.path-style-access=false
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: We could remove this s3.path-style-access=false. It's disabled by default.

23 changes: 23 additions & 0 deletions dev/trino/config.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Licensed to the Apache Software Foundation (ASF) under one
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we want to add config.properties‎ file?

Copy link
Contributor Author

@dingo4dev dingo4dev Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @ebyhr , Thanks for your review.
My first initial thought is to configure the catalog.management to dynamic which help to add and test the glue catalog and dynamo catalog without restart the container. Then, I though adding it will help development experience, you can modify server configure for your own resources without update dockerfile yourself

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, I understand the motivation now. The next question is why enabling CATALOG_MANAGEMENT environment variable is insufficient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added catalog.management=${ENV:CATALOG_MANAGEMENT} to make sure it consistence while someone updating config.properties explicitly. Or we can just use catalog.management=dynamic and remove the env var.

# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
coordinator=true
node-scheduler.include-coordinator=true
http-server.http.port=8080
discovery.uri=http://localhost:8080
http-server.process-forwarded=true
http-server.https.enabled=false
catalog.management=${ENV:CATALOG_MANAGEMENT}
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ dev = [
"mypy-boto3-dynamodb>=1.28.18",
"pyarrow-stubs>=20.0.0.20251107", # Remove when pyarrow >= 23.0.0 https://github.com/apache/arrow/pull/47609
"sqlalchemy>=2.0.18,<3",
"trino[sqlalchemy]>=0.336.0",
]
# for mkdocs
docs = [
Expand Down Expand Up @@ -157,6 +158,7 @@ markers = [
"s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)",
"adls: marks a test as requiring access to adls compliant storage (use with --adls.account-name, --adls.account-key, and --adls.endpoint args)",
"integration: marks integration tests against Apache Spark",
"trino: marks integration tests against Trino",
"gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)",
"benchmark: collection of tests to validate read/write performance before and after a change",
]
Expand Down
35 changes: 35 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from moto import mock_aws
from pydantic_core import to_json
from pytest_lazyfixture import lazy_fixture
from sqlalchemy import Connection

from pyiceberg.catalog import Catalog, load_catalog
from pyiceberg.catalog.memory import InMemoryCatalog
Expand Down Expand Up @@ -146,6 +147,18 @@ def pytest_addoption(parser: pytest.Parser) -> None:
"--gcs.oauth2.token", action="store", default="anon", help="The GCS authentication method for tests marked gcs"
)
parser.addoption("--gcs.project-id", action="store", default="test", help="The GCP project for tests marked gcs")
parser.addoption(
"--trino.rest.endpoint",
action="store",
default="trino://test@localhost:8082/warehouse_rest",
help="The Trino REST endpoint URL for tests marked as trino",
)
parser.addoption(
"--trino.hive.endpoint",
action="store",
default="trino://test@localhost:8082/warehouse_hive",
help="The Trino Hive endpoint URL for tests marked as trino",
)


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -2583,6 +2596,28 @@ def bound_reference_uuid() -> BoundReference:
return BoundReference(field=NestedField(1, "field", UUIDType(), required=False), accessor=Accessor(position=0, inner=None))


@pytest.fixture(scope="session")
def trino_hive_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]:
from sqlalchemy import create_engine

trino_endpoint = request.config.getoption("--trino.hive.endpoint")
engine = create_engine(trino_endpoint)
connection = engine.connect()
yield connection
connection.close()


@pytest.fixture(scope="session")
def trino_rest_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]:
from sqlalchemy import create_engine

trino_endpoint = request.config.getoption("--trino.rest.endpoint")
engine = create_engine(trino_endpoint)
connection = engine.connect()
yield connection
connection.close()


@pytest.fixture(scope="session")
def session_catalog() -> Catalog:
return load_catalog(
Expand Down
Loading
Loading