From 30017fff7b219d685bcaec6827f06f040f092f66 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 22 Dec 2025 00:44:32 +0100 Subject: [PATCH 1/3] Add GitHub Actions workflow for type checking (mypy, pyright, ty). Add script for including docstrings into stubfiles before building wheels. --- .github/workflows/python.yml | 9 +++ ci/scripts/python_test_type_annotations.bat | 38 ++++++++++ ci/scripts/python_test_type_annotations.sh | 40 +++++++++++ ci/scripts/python_wheel_macos_build.sh | 5 ++ ci/scripts/python_wheel_validate_contents.py | 5 ++ ci/scripts/python_wheel_windows_build.bat | 5 ++ ci/scripts/python_wheel_xlinux_build.sh | 5 ++ compose.yaml | 16 +++-- docs/source/developers/python/development.rst | 70 ++++++++++++++++++- python/MANIFEST.in | 1 + python/pyarrow-stubs/pyarrow/__init__.pyi | 26 +++++++ python/pyarrow/py.typed | 16 +++++ python/pyproject.toml | 40 ++++++++++- python/setup.py | 27 +++++++ 14 files changed, 296 insertions(+), 7 deletions(-) create mode 100644 ci/scripts/python_test_type_annotations.bat create mode 100755 ci/scripts/python_test_type_annotations.sh create mode 100644 python/pyarrow-stubs/pyarrow/__init__.pyi create mode 100644 python/pyarrow/py.typed diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index e5d367958dd..4ca0f9b6dc6 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -239,6 +239,11 @@ jobs: - name: Test shell: bash run: ci/scripts/python_test.sh $(pwd) $(pwd)/build + - name: Test annotations + shell: bash + env: + PYARROW_TEST_ANNOTATIONS: "ON" + run: ci/scripts/python_test_type_annotations.sh $(pwd)/python windows: name: AMD64 Windows 2022 Python 3.13 @@ -296,3 +301,7 @@ jobs: shell: cmd run: | call "ci\scripts\python_test.bat" %cd% + - name: Test annotations + shell: cmd + run: | + call "ci\scripts\python_test_type_annotations.bat" %cd%\python diff --git a/ci/scripts/python_test_type_annotations.bat b/ci/scripts/python_test_type_annotations.bat new file mode 100644 index 00000000000..3446e329a89 --- /dev/null +++ b/ci/scripts/python_test_type_annotations.bat @@ -0,0 +1,38 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@echo on + +set PYARROW_DIR=%1 + +echo Annotation testing on Windows ... + +@REM Install library stubs +%PYTHON_CMD% -m pip install pandas-stubs scipy-stubs sphinx types-cffi types-psutil types-requests types-python-dateutil || exit /B 1 + +@REM Install other dependencies for type checking +%PYTHON_CMD% -m pip install fsspec || exit /B 1 + +@REM Install type checkers +%PYTHON_CMD% -m pip install mypy pyright ty || exit /B 1 + +@REM Run type checkers +pushd %PYARROW_DIR% + +mypy +pyright +ty check diff --git a/ci/scripts/python_test_type_annotations.sh b/ci/scripts/python_test_type_annotations.sh new file mode 100755 index 00000000000..82610ce6630 --- /dev/null +++ b/ci/scripts/python_test_type_annotations.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex +pyarrow_dir=${1} + +if [ "${PYARROW_TEST_ANNOTATIONS}" == "ON" ]; then + # Install library stubs + pip install pandas-stubs scipy-stubs sphinx types-cffi types-psutil types-requests types-python-dateutil + + # Install type checkers + pip install mypy pyright ty + + # Install other dependencies for type checking + pip install fsspec + + # Run type checkers + pushd ${pyarrow_dir} + mypy + pyright + ty check; +else + echo "Skipping type annotation tests"; +fi diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 8d113312927..8d63679de08 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -175,6 +175,11 @@ export CMAKE_PREFIX_PATH=${build_dir}/install export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python +# We first populate stub docstrings and then build the wheel +python setup.py build_ext --inplace +python -m pip install griffe libcst +python ../dev/update_stub_docstrings.py pyarrow-stubs + python setup.py bdist_wheel popd diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py index 84fcaba42e6..ee4a31aedb8 100644 --- a/ci/scripts/python_wheel_validate_contents.py +++ b/ci/scripts/python_wheel_validate_contents.py @@ -35,6 +35,11 @@ def validate_wheel(path): assert not outliers, f"Unexpected contents in wheel: {sorted(outliers)}" print(f"The wheel: {wheels[0]} seems valid.") + candidates = [info for info in f.filelist if info.filename.endswith('compute.pyi')] + assert candidates, "compute.pyi not found in wheel" + content = f.read(candidates[0]).decode('utf-8', errors='replace') + assert '"""' in content, "compute.pyi missing docstrings (no triple quotes found)" + def main(): parser = argparse.ArgumentParser() diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index b4b7fed99fd..3da7f60f182 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -135,6 +135,11 @@ pushd C:\arrow\python @REM Build wheel %PYTHON_CMD% setup.py bdist_wheel || exit /B 1 +@REM We first populate stub docstrings and then build the wheel +%PYTHON_CMD% setup.py build_ext --inplace +%PYTHON_CMD% -m pip install griffe libcst +%PYTHON_CMD% ..\dev\update_stub_docstrings.py pyarrow-stubs + @REM Repair the wheel with delvewheel @REM @REM Since we bundled the Arrow C++ libraries ourselves, we only need to diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index a3fbeb3c0b3..977ef64e008 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -167,6 +167,11 @@ export ARROW_HOME=/tmp/arrow-dist export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python +# We first populate stub docstrings and then build the wheel +python setup.py build_ext --inplace +python -m pip install griffe libcst +python ../dev/update_stub_docstrings.py pyarrow-stubs + python setup.py bdist_wheel echo "=== Strip symbols from wheel ===" diff --git a/compose.yaml b/compose.yaml index 84481e1af76..1d368d4df08 100644 --- a/compose.yaml +++ b/compose.yaml @@ -919,12 +919,14 @@ services: environment: <<: [*common, *ccache, *sccache] PYTEST_ARGS: # inherit + PYARROW_TEST_ANNOTATIONS: "ON" volumes: *conda-volumes command: &python-conda-command [" /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/python_test.sh /arrow"] + /arrow/ci/scripts/python_test.sh /arrow && + /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"] conda-python-emscripten: # Usage: @@ -1001,6 +1003,7 @@ services: ARROW_S3: "OFF" ARROW_SUBSTRAIT: "OFF" ARROW_WITH_OPENTELEMETRY: "OFF" + PYARROW_TEST_ANNOTATIONS: "ON" SETUPTOOLS_SCM_PRETEND_VERSION: volumes: *ubuntu-volumes deploy: *cuda-deploy @@ -1008,7 +1011,8 @@ services: /bin/bash -c " /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/python_test.sh /arrow" + /arrow/ci/scripts/python_test.sh /arrow && + /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python" debian-python: # Usage: @@ -1500,6 +1504,7 @@ services: python: ${PYTHON} shm_size: *shm-size environment: + PYARROW_TEST_ANNOTATIONS: "ON" <<: [*common, *ccache, *sccache] PARQUET_REQUIRE_ENCRYPTION: # inherit HYPOTHESIS_PROFILE: # inherit @@ -1510,7 +1515,8 @@ services: /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && mamba uninstall -y numpy && - /arrow/ci/scripts/python_test.sh /arrow"] + /arrow/ci/scripts/python_test.sh /arrow && + /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"] conda-python-docs: # Usage: @@ -1530,13 +1536,15 @@ services: BUILD_DOCS_CPP: "ON" BUILD_DOCS_PYTHON: "ON" PYTEST_ARGS: "--doctest-modules --doctest-cython" + PYARROW_TEST_ANNOTATIONS: "ON" volumes: *conda-volumes command: ["/arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && pip install -e /arrow/dev/archery[numpydoc] && archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 && - /arrow/ci/scripts/python_test.sh /arrow"] + /arrow/ci/scripts/python_test.sh /arrow && + /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"] conda-python-dask: # Possible $DASK parameters: diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst index d03b2439b10..c23891e94d0 100644 --- a/docs/source/developers/python/development.rst +++ b/docs/source/developers/python/development.rst @@ -42,7 +42,7 @@ Unit Testing ============ We are using `pytest `_ to develop our unit -test suite. After `building the project `_ you can run its unit tests +test suite. After `building the project `_ you can run its unit tests like so: .. code-block:: @@ -101,6 +101,74 @@ The test groups currently include: * ``s3``: Tests for Amazon S3 * ``tensorflow``: Tests that involve TensorFlow +Type Checking +============= + +PyArrow provides type stubs (``*.pyi`` files) for static type checking. These +stubs are located in the ``pyarrow-stubs/`` directory and are automatically +included in the distributed wheel packages. + +Running Type Checkers +--------------------- + +We support multiple type checkers. Their configurations are in +``pyproject.toml``. + +**mypy** + +To run mypy on the PyArrow codebase: + +.. code-block:: + + $ cd arrow/python + $ mypy + +The mypy configuration is in the ``[tool.mypy]`` section of ``pyproject.toml``. + +**pyright** + +To run pyright: + +.. code-block:: + + $ cd arrow/python + $ pyright + +The pyright configuration is in the ``[tool.pyright]`` section of ``pyproject.toml``. + +**ty** + +To run ty (note: currently only partially configured): + +.. code-block:: + + $ cd arrow/python + $ ty check + +Maintaining Type Stubs +----------------------- + +Type stubs for PyArrow are maintained in the ``pyarrow-stubs/`` +directory. These stubs mirror the structure of the main ``pyarrow/`` package. + +When adding or modifying public APIs: + +1. **Update the corresponding ``.pyi`` stub file** in ``pyarrow-stubs/`` + to reflect the new or changed function/class signatures. + +2. **Include type annotations** where possible. For Cython modules or + dynamically generated APIs such as compute kernels add the corresponding + stub in ``pyarrow-stubs/``. + +3. **Run type checkers** to ensure the stubs are correct and complete. + +The stub files are automatically copied into the built wheel during the build +process and will be included when users install PyArrow, enabling type checking +in downstream projects and for users' IDEs. + +Note: ``py.typed`` marker file in the ``pyarrow/`` directory indicates to type +checkers that PyArrow supports type checking according to :pep:`561`. + Doctest ======= diff --git a/python/MANIFEST.in b/python/MANIFEST.in index ed7012e4b70..2840ba74128 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -4,6 +4,7 @@ include ../NOTICE.txt global-include CMakeLists.txt graft pyarrow +graft pyarrow-stubs graft cmake_modules global-exclude *.so diff --git a/python/pyarrow-stubs/pyarrow/__init__.pyi b/python/pyarrow-stubs/pyarrow/__init__.pyi new file mode 100644 index 00000000000..2a68a513099 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/__init__.pyi @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Type stubs for PyArrow. + +This is a placeholder stub file. +Complete type annotations will be added in subsequent PRs. +""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow/py.typed b/python/pyarrow/py.typed new file mode 100644 index 00000000000..13a83393a91 --- /dev/null +++ b/python/pyarrow/py.typed @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/python/pyproject.toml b/python/pyproject.toml index 0a730fd4f78..c3ce61c6c31 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -84,11 +84,11 @@ zip-safe=false include-package-data=true [tool.setuptools.packages.find] -include = ["pyarrow"] +include = ["pyarrow", "pyarrow.*"] namespaces = false [tool.setuptools.package-data] -pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"] +pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd", "py.typed"] [tool.setuptools_scm] root = '..' @@ -96,3 +96,39 @@ version_file = 'pyarrow/_generated_version.py' version_scheme = 'guess-next-dev' git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' fallback_version = '23.0.0a0' + +# TODO: Enable type checking once stubs are merged +[tool.mypy] +files = ["pyarrow-stubs"] +mypy_path = "$MYPY_CONFIG_FILE_DIR/pyarrow-stubs" +exclude = [ + "^pyarrow/", + "^benchmarks/", + "^examples/", + "^scripts/", +] + +# TODO: Enable type checking once stubs are merged +[tool.pyright] +pythonPlatform = "All" +pythonVersion = "3.10" +include = ["pyarrow-stubs"] +exclude = [ + "pyarrow", + "benchmarks", + "examples", + "scripts", + "build", +] +stubPath = "pyarrow-stubs" +typeCheckingMode = "basic" + +# TODO: Enable type checking once stubs are merged +[tool.ty.src] +include = ["pyarrow-stubs"] +exclude = [ + "pyarrow", + "benchmarks", + "examples", + "scripts", +] diff --git a/python/setup.py b/python/setup.py index a27bd3baefd..a25d2d76b36 100755 --- a/python/setup.py +++ b/python/setup.py @@ -121,8 +121,35 @@ def build_extensions(self): def run(self): self._run_cmake() + self._copy_stubs() _build_ext.run(self) + def _copy_stubs(self): + """Copy .pyi stub files from pyarrow-stubs to the build directory.""" + build_cmd = self.get_finalized_command('build') + build_lib = os.path.abspath(build_cmd.build_lib) + + stubs_src = pjoin(setup_dir, 'pyarrow-stubs', 'pyarrow') + stubs_dest = pjoin(build_lib, 'pyarrow') + + if os.path.exists(stubs_src): + print(f"-- Copying stub files from {stubs_src} to {stubs_dest}") + for root, dirs, files in os.walk(stubs_src): + # Calculate relative path from stubs_src + rel_dir = os.path.relpath(root, stubs_src) + dest_dir = pjoin(stubs_dest, rel_dir) if rel_dir != '.' else stubs_dest + + # Create destination directory if needed + if not os.path.exists(dest_dir): + os.makedirs(dest_dir) + + # Copy .pyi files + for file in files: + if file.endswith('.pyi'): + src_file = pjoin(root, file) + dest_file = pjoin(dest_dir, file) + shutil.copy2(src_file, dest_file) + # adapted from cmake_build_ext in dynd-python # github.com/libdynd/dynd-python From dcd6d8cc44b5187965d1701e577fd45021e9ebd7 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 22 Dec 2025 20:45:51 +0100 Subject: [PATCH 2/3] add-type-stubs-for-internal-type-system --- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 133 +++ python/pyarrow-stubs/pyarrow/_types.pyi | 966 ++++++++++++++++++ python/pyarrow-stubs/pyarrow/error.pyi | 104 ++ python/pyarrow-stubs/pyarrow/io.pyi | 22 + python/pyarrow-stubs/pyarrow/lib.pyi | 25 + python/pyarrow-stubs/pyarrow/scalar.pyi | 22 + python/pyproject.toml | 19 - 7 files changed, 1272 insertions(+), 19 deletions(-) create mode 100644 python/pyarrow-stubs/pyarrow/_stubs_typing.pyi create mode 100644 python/pyarrow-stubs/pyarrow/_types.pyi create mode 100644 python/pyarrow-stubs/pyarrow/error.pyi create mode 100644 python/pyarrow-stubs/pyarrow/io.pyi create mode 100644 python/pyarrow-stubs/pyarrow/lib.pyi create mode 100644 python/pyarrow-stubs/pyarrow/scalar.pyi diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi new file mode 100644 index 00000000000..0715012fddc --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -0,0 +1,133 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt + +from collections.abc import Collection, Iterator, Sequence +from decimal import Decimal +from typing import Any, Literal, Protocol, TypeAlias, TypeVar + +import numpy as np + +from numpy.typing import NDArray + +from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray + +ArrayLike: TypeAlias = Any +ScalarLike: TypeAlias = Any +Order: TypeAlias = Literal["ascending", "descending"] +JoinType: TypeAlias = Literal[ + "left semi", + "right semi", + "left anti", + "right anti", + "inner", + "left outer", + "right outer", + "full outer", +] +Compression: TypeAlias = Literal[ + "gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy" +] +NullEncoding: TypeAlias = Literal["mask", "encode"] +NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] +TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] +Mask: TypeAlias = ( + Sequence[bool | None] + | NDArray[np.bool_] + | BooleanArray + | ChunkedArray[Any] +) +Indices: TypeAlias = ( + Sequence[int | None] + | NDArray[np.integer[Any]] + | IntegerArray + | ChunkedArray[Any] +) + +PyScalar: TypeAlias = (bool | int | float | Decimal | str | bytes | + dt.date | dt.datetime | dt.time | dt.timedelta) + +_T = TypeVar("_T") +_V = TypeVar("_V", covariant=True) + +SingleOrList: TypeAlias = list[_T] | _T + + +class SupportEq(Protocol): + def __eq__(self, other) -> bool: ... + + +class SupportLt(Protocol): + def __lt__(self, other) -> bool: ... + + +class SupportGt(Protocol): + def __gt__(self, other) -> bool: ... + + +class SupportLe(Protocol): + def __le__(self, other) -> bool: ... + + +class SupportGe(Protocol): + def __ge__(self, other) -> bool: ... + + +FilterTuple: TypeAlias = ( + tuple[str, Literal["=", "==", "!="], SupportEq] + | tuple[str, Literal["<"], SupportLt] + | tuple[str, Literal[">"], SupportGt] + | tuple[str, Literal["<="], SupportLe] + | tuple[str, Literal[">="], SupportGe] + | tuple[str, Literal["in", "not in"], Collection] + | tuple[str, str, Any] # Allow general str for operator to avoid type errors +) + + +class Buffer(Protocol): + ... + + +class SupportPyBuffer(Protocol): + ... + + +class SupportArrowStream(Protocol): + def __arrow_c_stream__(self, requested_schema=None) -> Any: ... + + +class SupportPyArrowArray(Protocol): + def __arrow_array__(self, type=None) -> Any: ... + + +class SupportArrowArray(Protocol): + def __arrow_c_array__(self, requested_schema=None) -> Any: ... + + +class SupportArrowDeviceArray(Protocol): + def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ... + + +class SupportArrowSchema(Protocol): + def __arrow_c_schema__(self) -> Any: ... + + +class NullableCollection(Protocol[_V]): # type: ignore[reportInvalidTypeVarUse] + def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... + def __len__(self) -> int: ... + def __contains__(self, item: Any, /) -> bool: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi new file mode 100644 index 00000000000..3d802382ba1 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -0,0 +1,966 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 +import sys + +from collections.abc import Mapping, Sequence, Iterable, Iterator +from decimal import Decimal # noqa: F401 + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + +from typing import Any, Generic, Literal + +import numpy as np +import pandas as pd + +from pyarrow._stubs_typing import SupportArrowSchema +from pyarrow.lib import ( # noqa: F401 + Array, + ChunkedArray, + ExtensionArray, + MemoryPool, + MonthDayNano, + Table, +) +from typing_extensions import TypeVar, deprecated + +from .io import Buffer +from .scalar import ExtensionScalar +from ._stubs_typing import TimeUnit + +class _Weakrefable: + ... + + +class _Metadata(_Weakrefable): + ... + + +class DataType(_Weakrefable): + def field(self, i: int) -> Field: ... + + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + + @property + def byte_width(self) -> int: ... + + @property + def num_fields(self) -> int: ... + + @property + def num_buffers(self) -> int: ... + + @property + def has_variadic_buffers(self) -> bool: ... + + # Properties that exist on specific subtypes but accessed generically + @property + def list_size(self) -> int: ... + + def __hash__(self) -> int: ... + + def equals(self, other: DataType | str, *, + check_metadata: bool = False) -> bool: ... + + def to_pandas_dtype(self) -> np.generic: ... + + def _export_to_c(self, out_ptr: int) -> None: ... + + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + + def __arrow_c_schema__(self) -> Any: ... + + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) + + +class _BasicDataType(DataType, Generic[_AsPyType]): + ... + + +class NullType(_BasicDataType[None]): + ... + + +class BoolType(_BasicDataType[bool]): + ... + + +class UInt8Type(_BasicDataType[int]): + ... + + +class Int8Type(_BasicDataType[int]): + ... + + +class UInt16Type(_BasicDataType[int]): + ... + + +class Int16Type(_BasicDataType[int]): + ... + + +class UInt32Type(_BasicDataType[int]): + ... + + +class Int32Type(_BasicDataType[int]): + ... + + +class UInt64Type(_BasicDataType[int]): + ... + + +class Int64Type(_BasicDataType[int]): + ... + + +class Float16Type(_BasicDataType[float]): + ... + + +class Float32Type(_BasicDataType[float]): + ... + + +class Float64Type(_BasicDataType[float]): + ... + + +class Date32Type(_BasicDataType[dt.date]): + ... + + +class Date64Type(_BasicDataType[dt.date]): + ... + + +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): + ... + + +class StringType(_BasicDataType[str]): + ... + + +class LargeStringType(_BasicDataType[str]): + ... + + +class StringViewType(_BasicDataType[str]): + ... + + +class BinaryType(_BasicDataType[bytes]): + ... + + +class LargeBinaryType(_BasicDataType[bytes]): + ... + + +class BinaryViewType(_BasicDataType[bytes]): + ... + + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + + +class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): + + @property + def unit(self) -> _Unit: ... + + @property + def tz(self) -> _Tz: ... + + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + + +class FixedSizeBinaryType(_BasicDataType[Decimal]): + ... + + +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) + + +class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class ListType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + +class LargeListType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + @property + def value_type(self) -> _DataTypeT: ... + + +class ListViewType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + +class LargeListViewType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + +class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + @property + def list_size(self) -> int: ... + + +class DictionaryMemo(_Weakrefable): + ... + + +_IndexT = TypeVar( + "_IndexT", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, +) +_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType) +_ValueT = TypeVar("_ValueT", bound=DataType) +_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) + + +class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): + @property + def ordered(self) -> _Ordered: ... + + @property + def index_type(self) -> _IndexT: ... + + @property + def value_type(self) -> _BasicValueT: ... + + +_K = TypeVar("_K", bound=DataType) + + +class MapType(DataType, Generic[_K, _ValueT, _Ordered]): + @property + def key_field(self) -> Field[_K]: ... + + @property + def key_type(self) -> _K: ... + + @property + def item_field(self) -> Field[_ValueT]: ... + + @property + def item_type(self) -> _ValueT: ... + + @property + def keys_sorted(self) -> _Ordered: ... + + +_Size = TypeVar("_Size", default=int) + + +class StructType(DataType): + def get_field_index(self, name: str) -> int: ... + + def field(self, i: int | str) -> Field: ... + + def get_all_field_indices(self, name: str) -> list[int]: ... + + def __len__(self) -> int: ... + + def __iter__(self) -> Iterator[Field]: ... + + __getitem__ = field + @property + def names(self) -> list[str]: ... + + @property + def fields(self) -> list[Field]: ... + + +class UnionType(DataType): + @property + def mode(self) -> Literal["sparse", "dense"]: ... + + @property + def type_codes(self) -> list[int]: ... + + def __len__(self) -> int: ... + + def __iter__(self) -> Iterator[Field]: ... + + def field(self, i: int) -> Field: ... + + __getitem__ = field + + +class SparseUnionType(UnionType): + @property + def mode(self) -> Literal["sparse"]: ... + + +class DenseUnionType(UnionType): + @property + def mode(self) -> Literal["dense"]: ... + + +_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) + + +class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): + @property + def run_end_type(self) -> _RunEndType: ... + @property + def value_type(self) -> _BasicValueT: ... + + +_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray) + + +class BaseExtensionType(DataType): + def __arrow_ext_class__(self) -> type[ExtensionArray]: ... + + def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... + + @property + def extension_name(self) -> str: ... + + @property + def storage_type(self) -> DataType: ... + + def wrap_array(self, storage: _StorageT) -> _StorageT: ... + + +class ExtensionType(BaseExtensionType): + def __init__(self, storage_type: DataType, extension_name: str) -> None: ... + + def __arrow_ext_serialize__(self) -> bytes: ... + + @classmethod + def __arrow_ext_deserialize__( + cls, storage_type: DataType, serialized: bytes) -> Self: ... + + +class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): + @property + def value_type(self) -> _ValueT: ... + + @property + def shape(self) -> list[int]: ... + + @property + def dim_names(self) -> list[str] | None: ... + + @property + def permutation(self) -> list[int] | None: ... + + +class Bool8Type(BaseExtensionType): + ... + + +class UuidType(BaseExtensionType): + ... + + +class JsonType(BaseExtensionType): + ... + + +class OpaqueType(BaseExtensionType): + @property + def type_name(self) -> str: ... + + @property + def vendor_name(self) -> str: ... + + +class UnknownExtensionType(ExtensionType): + def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... + + +def register_extension_type(ext_type: ExtensionType) -> None: ... + + +def unregister_extension_type(type_name: str) -> None: ... + + +class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): + def __init__( + self, __arg0__: Mapping[str | bytes, str | bytes] + | Iterable[tuple[str, str]] + | KeyValueMetadata + | None = None, **kwargs: str + ) -> None: ... + + def equals(self, other: KeyValueMetadata) -> bool: ... + + def __len__(self) -> int: ... + + def __contains__(self, /, __key: object) -> bool: ... # type: ignore[override] + + def __getitem__(self, /, __key: Any) -> Any: ... # type: ignore[override] + + def __iter__(self) -> Iterator[bytes]: ... + + def get_all(self, key: str) -> list[bytes]: ... + + def to_dict(self) -> dict[bytes, bytes]: ... + + +class Field(_Weakrefable, Generic[_DataTypeT]): + def equals(self, other: Field, check_metadata: bool = False) -> bool: ... + + def __hash__(self) -> int: ... + + @property + def nullable(self) -> bool: ... + + @property + def name(self) -> str: ... + + @property + def metadata(self) -> dict[bytes, bytes] | None: ... + + @property + def type(self) -> _DataTypeT: ... + def with_metadata(self, metadata: dict[bytes | str, bytes | str] | + Mapping[bytes | str, bytes | str] | Any) -> Self: ... + + def remove_metadata(self) -> Self: ... + + def with_type(self, new_type: DataType) -> Field: ... + + def with_name(self, name: str) -> Self: ... + + def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ... + + def flatten(self) -> list[Field]: ... + + def _export_to_c(self, out_ptr: int) -> None: ... + + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + + def __arrow_c_schema__(self) -> Any: ... + + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + + +class Schema(_Weakrefable): + def __len__(self) -> int: ... + + def __getitem__(self, key: str | int) -> Field: ... + + _field = __getitem__ + def __iter__(self) -> Iterator[Field]: ... + + def __hash__(self) -> int: ... + + def __sizeof__(self) -> int: ... + @property + def pandas_metadata(self) -> dict: ... + + @property + def names(self) -> list[str]: ... + + @property + def types(self) -> list[DataType]: ... + + @property + def metadata(self) -> dict[bytes, bytes]: ... + + def empty_table(self) -> Table: ... + + def equals(self, other: Schema, check_metadata: bool = False) -> bool: ... + + @classmethod + def from_pandas(cls, df: pd.DataFrame, preserve_index: bool | + None = None) -> Schema: ... + + def field(self, i: int | str | bytes) -> Field: ... + + @deprecated("Use 'field' instead") + def field_by_name(self, name: str) -> Field: ... + + def get_field_index(self, name: str) -> int: ... + + def get_all_field_indices(self, name: str) -> list[int]: ... + + def append(self, field: Field) -> Schema: ... + + def insert(self, i: int, field: Field) -> Schema: ... + + def remove(self, i: int) -> Schema: ... + + def set(self, i: int, field: Field) -> Schema: ... + + @deprecated("Use 'with_metadata' instead") + def add_metadata(self, metadata: dict) -> Schema: ... + + def with_metadata(self, metadata: dict) -> Schema: ... + + def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... + + def remove_metadata(self) -> Schema: ... + + def to_string( + self, + truncate_metadata: bool = True, + show_field_metadata: bool = True, + show_schema_metadata: bool = True, + element_size_limit: int | None = None, + ) -> str: ... + + def _export_to_c(self, out_ptr: int) -> None: ... + + @classmethod + def _import_from_c(cls, in_ptr: int) -> Schema: ... + + def __arrow_c_schema__(self) -> Any: ... + + @staticmethod + def _import_from_c_capsule(schema: Any) -> Schema: ... + + +def unify_schemas( + schemas: Sequence[Schema], + *, + promote_options: Literal["default", "permissive"] = "default" +) -> Schema: ... + + +def field( + name: SupportArrowSchema | str | Any, type: _DataTypeT | str | None = None, + nullable: bool = ..., + metadata: dict[Any, Any] | None = None +) -> Field[_DataTypeT] | Field[Any]: ... + + +def null() -> NullType: ... + + +def bool_() -> BoolType: ... + + +def uint8() -> UInt8Type: ... + + +def int8() -> Int8Type: ... + + +def uint16() -> UInt16Type: ... + + +def int16() -> Int16Type: ... + + +def uint32() -> UInt32Type: ... + + +def int32() -> Int32Type: ... + + +def int64() -> Int64Type: ... + + +def uint64() -> UInt64Type: ... + + +def timestamp( + unit: _Unit | str, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]: ... + + +def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ... + + +def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ... + + +def duration(unit: _Unit | str) -> DurationType[_Unit]: ... + + +def month_day_nano_interval() -> MonthDayNanoIntervalType: ... + + +def date32() -> Date32Type: ... + + +def date64() -> Date64Type: ... + + +def float16() -> Float16Type: ... + + +def float32() -> Float32Type: ... + + +def float64() -> Float64Type: ... + + +def decimal32(precision: _Precision, scale: _Scale | + None = None) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... + + +def decimal64(precision: _Precision, scale: _Scale | + None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... + + +def decimal128(precision: _Precision, scale: _Scale | + None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... + + +def decimal256(precision: _Precision, scale: _Scale | + None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... + + +def string() -> StringType: ... + + +utf8 = string + + +def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType: ... + + +def large_binary() -> LargeBinaryType: ... + + +def large_string() -> LargeStringType: ... + + +large_utf8 = large_string + + +def binary_view() -> BinaryViewType: ... + + +def string_view() -> StringViewType: ... + + +def list_( + value_type: _DataTypeT | Field[_DataTypeT] | None = None, + list_size: Literal[-1] | _Size | None = None +) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ... + + +def large_list(value_type: _DataTypeT | + Field[_DataTypeT] | None = None) -> LargeListType[_DataTypeT]: ... + + +def list_view(value_type: _DataTypeT | + Field[_DataTypeT] | None = None) -> ListViewType[_DataTypeT]: ... + + +def large_list_view( + value_type: _DataTypeT | Field[_DataTypeT] | None = None +) -> LargeListViewType[_DataTypeT]: ... + + +def map_( + key_type: _K | Field | str | None = None, + item_type: _ValueT | Field | str | None = None, + keys_sorted: bool | None = None +) -> MapType[_K, _ValueT, Literal[False]]: ... + + +def dictionary( + index_type: _IndexT | str, + value_type: _BasicValueT | str, + ordered: _Ordered | None = None +) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... + + +def struct( + fields: Iterable[ + Field[Any] + | tuple[str, Field[Any] | None] + | tuple[str, DataType | None] + ] | Mapping[str, Field[Any] | DataType | None], +) -> StructType: ... + + +def sparse_union( + child_fields: list[Field[Any]], type_codes: list[int] | None = None +) -> SparseUnionType: ... + + +def dense_union( + child_fields: list[Field[Any]], type_codes: list[int] | None = None +) -> DenseUnionType: ... + + +def union( + child_fields: list[Field[Any]], mode: Literal["sparse", "dense"] | int | str, + type_codes: list[int] | None = None) -> SparseUnionType | DenseUnionType: ... + + +def run_end_encoded( + run_end_type: _RunEndType | str | None, value_type: _BasicValueT | str | None +) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ... + + +def json_(storage_type: DataType = ...) -> JsonType: ... + + +def uuid() -> UuidType: ... + + +def fixed_shape_tensor( + value_type: _ValueT, + shape: Sequence[int], + dim_names: Sequence[str] | None = None, + permutation: Sequence[int] | None = None, +) -> FixedShapeTensorType[_ValueT]: ... + + +def bool8() -> Bool8Type: ... + + +def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ... + + +def type_for_alias(name: Any) -> DataType: ... + + +def schema( + fields: ( + Iterable[Field[Any]] + | Iterable[tuple[str, DataType | str | None]] + | Mapping[Any, DataType | str | None] + ), + metadata: Mapping[bytes, bytes] + | Mapping[str, str] + | Mapping[bytes, str] + | Mapping[str, bytes] | None = None, +) -> Schema: ... + + +def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... + + +__all__ = [ + "_Weakrefable", + "_Metadata", + "DataType", + "_BasicDataType", + "NullType", + "BoolType", + "UInt8Type", + "Int8Type", + "UInt16Type", + "Int16Type", + "UInt32Type", + "Int32Type", + "UInt64Type", + "Int64Type", + "Float16Type", + "Float32Type", + "Float64Type", + "Date32Type", + "Date64Type", + "MonthDayNanoIntervalType", + "StringType", + "LargeStringType", + "StringViewType", + "BinaryType", + "LargeBinaryType", + "BinaryViewType", + "TimestampType", + "Time32Type", + "Time64Type", + "DurationType", + "FixedSizeBinaryType", + "Decimal32Type", + "Decimal64Type", + "Decimal128Type", + "Decimal256Type", + "ListType", + "LargeListType", + "ListViewType", + "LargeListViewType", + "FixedSizeListType", + "DictionaryMemo", + "DictionaryType", + "MapType", + "StructType", + "UnionType", + "SparseUnionType", + "DenseUnionType", + "RunEndEncodedType", + "BaseExtensionType", + "ExtensionType", + "FixedShapeTensorType", + "Bool8Type", + "UuidType", + "JsonType", + "OpaqueType", + "UnknownExtensionType", + "register_extension_type", + "unregister_extension_type", + "KeyValueMetadata", + "Field", + "Schema", + "unify_schemas", + "field", + "null", + "bool_", + "uint8", + "int8", + "uint16", + "int16", + "uint32", + "int32", + "int64", + "uint64", + "timestamp", + "time32", + "time64", + "duration", + "month_day_nano_interval", + "date32", + "date64", + "float16", + "float32", + "float64", + "decimal32", + "decimal64", + "decimal128", + "decimal256", + "string", + "utf8", + "binary", + "large_binary", + "large_string", + "large_utf8", + "binary_view", + "string_view", + "list_", + "large_list", + "list_view", + "large_list_view", + "map_", + "dictionary", + "struct", + "sparse_union", + "dense_union", + "union", + "run_end_encoded", + "json_", + "uuid", + "fixed_shape_tensor", + "bool8", + "opaque", + "type_for_alias", + "schema", + "from_numpy_dtype", + "_Unit", + "_Tz", + "_Time32Unit", + "_Time64Unit", + "_DataTypeT", +] diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi new file mode 100644 index 00000000000..eac936afcb5 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/error.pyi @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sys + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + + +class ArrowException(Exception): + ... + + +class ArrowInvalid(ValueError, ArrowException): + ... + + +class ArrowMemoryError(MemoryError, ArrowException): + ... + + +class ArrowKeyError(KeyError, ArrowException): + ... + + +class ArrowTypeError(TypeError, ArrowException): + ... + + +class ArrowNotImplementedError(NotImplementedError, ArrowException): + ... + + +class ArrowCapacityError(ArrowException): + ... + + +class ArrowIndexError(IndexError, ArrowException): + ... + + +class ArrowSerializationError(ArrowException): + ... + + +class ArrowCancelled(ArrowException): + signum: int | None + def __init__(self, message: str, signum: int | None = None) -> None: ... + + +ArrowIOError = IOError + + +class StopToken: + ... + + +def enable_signal_handlers(enable: bool) -> None: ... + + +have_signal_refcycle: bool + + +class SignalStopHandler: + def __enter__(self) -> Self: ... + def __exit__(self, exc_type, exc_value, exc_tb) -> None: ... + def __dealloc__(self) -> None: ... + @property + def stop_token(self) -> StopToken: ... + + +__all__ = [ + "ArrowException", + "ArrowInvalid", + "ArrowMemoryError", + "ArrowKeyError", + "ArrowTypeError", + "ArrowNotImplementedError", + "ArrowCapacityError", + "ArrowIndexError", + "ArrowSerializationError", + "ArrowCancelled", + "ArrowIOError", + "StopToken", + "enable_signal_handlers", + "have_signal_refcycle", + "SignalStopHandler", +] diff --git a/python/pyarrow-stubs/pyarrow/io.pyi b/python/pyarrow-stubs/pyarrow/io.pyi new file mode 100644 index 00000000000..467ec48cc76 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/io.pyi @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Placeholder stub - complete annotations in future PR.""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/lib.pyi b/python/pyarrow-stubs/pyarrow/lib.pyi new file mode 100644 index 00000000000..775434be2ea --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/lib.pyi @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Placeholder stub for pyarrow.lib C extension module. + +Complete type annotations will be added in subsequent PRs. +""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/scalar.pyi b/python/pyarrow-stubs/pyarrow/scalar.pyi new file mode 100644 index 00000000000..467ec48cc76 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/scalar.pyi @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Placeholder stub - complete annotations in future PR.""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyproject.toml b/python/pyproject.toml index c3ce61c6c31..aed9b391e8c 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -101,34 +101,15 @@ fallback_version = '23.0.0a0' [tool.mypy] files = ["pyarrow-stubs"] mypy_path = "$MYPY_CONFIG_FILE_DIR/pyarrow-stubs" -exclude = [ - "^pyarrow/", - "^benchmarks/", - "^examples/", - "^scripts/", -] # TODO: Enable type checking once stubs are merged [tool.pyright] pythonPlatform = "All" pythonVersion = "3.10" include = ["pyarrow-stubs"] -exclude = [ - "pyarrow", - "benchmarks", - "examples", - "scripts", - "build", -] stubPath = "pyarrow-stubs" typeCheckingMode = "basic" # TODO: Enable type checking once stubs are merged [tool.ty.src] include = ["pyarrow-stubs"] -exclude = [ - "pyarrow", - "benchmarks", - "examples", - "scripts", -] From d3c57403116fc07374a7def8a3e37609b16c612b Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 22 Dec 2025 22:55:21 +0100 Subject: [PATCH 3/3] Remove PYARROW_TEST_ANNOTATIONS flag --- .github/workflows/python.yml | 2 -- ci/scripts/python_test_type_annotations.sh | 25 ++++++++++------------ compose.yaml | 4 ---- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 4ca0f9b6dc6..bdd5ea428a8 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -241,8 +241,6 @@ jobs: run: ci/scripts/python_test.sh $(pwd) $(pwd)/build - name: Test annotations shell: bash - env: - PYARROW_TEST_ANNOTATIONS: "ON" run: ci/scripts/python_test_type_annotations.sh $(pwd)/python windows: diff --git a/ci/scripts/python_test_type_annotations.sh b/ci/scripts/python_test_type_annotations.sh index 82610ce6630..5c3c831d4bd 100755 --- a/ci/scripts/python_test_type_annotations.sh +++ b/ci/scripts/python_test_type_annotations.sh @@ -20,21 +20,18 @@ set -ex pyarrow_dir=${1} -if [ "${PYARROW_TEST_ANNOTATIONS}" == "ON" ]; then - # Install library stubs - pip install pandas-stubs scipy-stubs sphinx types-cffi types-psutil types-requests types-python-dateutil +# Install library stubs +pip install pandas-stubs scipy-stubs sphinx types-cffi types-psutil types-requests types-python-dateutil - # Install type checkers - pip install mypy pyright ty +# Install type checkers +pip install mypy pyright ty - # Install other dependencies for type checking - pip install fsspec +# Install other dependencies for type checking +pip install fsspec - # Run type checkers - pushd ${pyarrow_dir} - mypy - pyright - ty check; -else - echo "Skipping type annotation tests"; +# Run type checkers +pushd ${pyarrow_dir} +mypy +pyright +ty check; fi diff --git a/compose.yaml b/compose.yaml index 1d368d4df08..21136ec3c6c 100644 --- a/compose.yaml +++ b/compose.yaml @@ -919,7 +919,6 @@ services: environment: <<: [*common, *ccache, *sccache] PYTEST_ARGS: # inherit - PYARROW_TEST_ANNOTATIONS: "ON" volumes: *conda-volumes command: &python-conda-command [" @@ -1003,7 +1002,6 @@ services: ARROW_S3: "OFF" ARROW_SUBSTRAIT: "OFF" ARROW_WITH_OPENTELEMETRY: "OFF" - PYARROW_TEST_ANNOTATIONS: "ON" SETUPTOOLS_SCM_PRETEND_VERSION: volumes: *ubuntu-volumes deploy: *cuda-deploy @@ -1504,7 +1502,6 @@ services: python: ${PYTHON} shm_size: *shm-size environment: - PYARROW_TEST_ANNOTATIONS: "ON" <<: [*common, *ccache, *sccache] PARQUET_REQUIRE_ENCRYPTION: # inherit HYPOTHESIS_PROFILE: # inherit @@ -1536,7 +1533,6 @@ services: BUILD_DOCS_CPP: "ON" BUILD_DOCS_PYTHON: "ON" PYTEST_ARGS: "--doctest-modules --doctest-cython" - PYARROW_TEST_ANNOTATIONS: "ON" volumes: *conda-volumes command: ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&