diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index e5d367958dd..4ca0f9b6dc6 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -239,6 +239,11 @@ jobs:
       - name: Test
         shell: bash
         run: ci/scripts/python_test.sh $(pwd) $(pwd)/build
+      - name: Test annotations
+        shell: bash
+        env:
+          PYARROW_TEST_ANNOTATIONS: "ON"
+        run: ci/scripts/python_test_type_annotations.sh $(pwd)/python
 
   windows:
     name: AMD64 Windows 2022 Python 3.13
@@ -296,3 +301,7 @@ jobs:
         shell: cmd
         run: |
           call "ci\scripts\python_test.bat" %cd%
+      - name: Test annotations
+        shell: cmd
+        run: |
+          call "ci\scripts\python_test_type_annotations.bat" %cd%\python
diff --git a/ci/scripts/python_test_type_annotations.bat b/ci/scripts/python_test_type_annotations.bat
new file mode 100644
index 00000000000..3446e329a89
--- /dev/null
+++ b/ci/scripts/python_test_type_annotations.bat
@@ -0,0 +1,38 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements.  See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership.  The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License.  You may obtain a copy of the License at
+@rem
+@rem   http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied.  See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+set PYARROW_DIR=%1
+
+echo Annotation testing on Windows ...
+
+@REM Install library stubs
+%PYTHON_CMD% -m pip install pandas-stubs scipy-stubs sphinx types-cffi types-psutil types-requests types-python-dateutil || exit /B 1
+
+@REM Install other dependencies for type checking
+%PYTHON_CMD% -m pip install fsspec || exit /B 1
+
+@REM Install type checkers
+%PYTHON_CMD% -m pip install mypy pyright ty || exit /B 1
+
+@REM Run type checkers
+pushd %PYARROW_DIR%
+
+mypy
+pyright
+ty check
diff --git a/ci/scripts/python_test_type_annotations.sh b/ci/scripts/python_test_type_annotations.sh
new file mode 100755
index 00000000000..82610ce6630
--- /dev/null
+++ b/ci/scripts/python_test_type_annotations.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+pyarrow_dir=${1}
+
+if [ "${PYARROW_TEST_ANNOTATIONS}" == "ON" ]; then
+  # Install library stubs
+  pip install pandas-stubs scipy-stubs sphinx types-cffi types-psutil types-requests types-python-dateutil
+
+  # Install type checkers
+  pip install mypy pyright ty
+
+  # Install other dependencies for type checking
+  pip install fsspec
+
+  # Run type checkers
+  pushd ${pyarrow_dir}
+  mypy
+  pyright
+  ty check;
+else
+  echo "Skipping type annotation tests";
+fi
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 8d113312927..8d63679de08 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -175,6 +175,11 @@ export CMAKE_PREFIX_PATH=${build_dir}/install
 export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION}
 
 pushd ${source_dir}/python
+# We first populate stub docstrings and then build the wheel
+python setup.py build_ext --inplace
+python -m pip install griffe libcst
+python ../dev/update_stub_docstrings.py pyarrow-stubs
+
 python setup.py bdist_wheel
 popd
 
diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py
index 84fcaba42e6..ee4a31aedb8 100644
--- a/ci/scripts/python_wheel_validate_contents.py
+++ b/ci/scripts/python_wheel_validate_contents.py
@@ -35,6 +35,11 @@ def validate_wheel(path):
     assert not outliers, f"Unexpected contents in wheel: {sorted(outliers)}"
     print(f"The wheel: {wheels[0]} seems valid.")
 
+    candidates = [info for info in f.filelist if info.filename.endswith('compute.pyi')]
+    assert candidates, "compute.pyi not found in wheel"
+    content = f.read(candidates[0]).decode('utf-8', errors='replace')
+    assert '"""' in content, "compute.pyi missing docstrings (no triple quotes found)"
+
 
 def main():
     parser = argparse.ArgumentParser()
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index b4b7fed99fd..3da7f60f182 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -135,6 +135,11 @@ pushd C:\arrow\python
 @REM Build wheel
 %PYTHON_CMD% setup.py bdist_wheel || exit /B 1
 
+@REM We first populate stub docstrings and then build the wheel
+%PYTHON_CMD% setup.py build_ext --inplace
+%PYTHON_CMD% -m pip install griffe libcst
+%PYTHON_CMD% ..\dev\update_stub_docstrings.py pyarrow-stubs
+
 @REM Repair the wheel with delvewheel
 @REM
 @REM Since we bundled the Arrow C++ libraries ourselves, we only need to
diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh
index a3fbeb3c0b3..977ef64e008 100755
--- a/ci/scripts/python_wheel_xlinux_build.sh
+++ b/ci/scripts/python_wheel_xlinux_build.sh
@@ -167,6 +167,11 @@ export ARROW_HOME=/tmp/arrow-dist
 export CMAKE_PREFIX_PATH=/tmp/arrow-dist
 
 pushd /arrow/python
+# We first populate stub docstrings and then build the wheel
+python setup.py build_ext --inplace
+python -m pip install griffe libcst
+python ../dev/update_stub_docstrings.py pyarrow-stubs
+
 python setup.py bdist_wheel
 
 echo "=== Strip symbols from wheel ==="
diff --git a/compose.yaml b/compose.yaml
index 84481e1af76..1d368d4df08 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -919,12 +919,14 @@ services:
     environment:
       <<: [*common, *ccache, *sccache]
       PYTEST_ARGS:  # inherit
+      PYARROW_TEST_ANNOTATIONS: "ON"
     volumes: *conda-volumes
     command: &python-conda-command
       ["
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_build.sh /arrow /build &&
-        /arrow/ci/scripts/python_test.sh /arrow"]
+        /arrow/ci/scripts/python_test.sh /arrow &&
+        /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"]
 
   conda-python-emscripten:
     # Usage:
@@ -1001,6 +1003,7 @@ services:
       ARROW_S3: "OFF"
       ARROW_SUBSTRAIT: "OFF"
       ARROW_WITH_OPENTELEMETRY: "OFF"
+      PYARROW_TEST_ANNOTATIONS: "ON"
       SETUPTOOLS_SCM_PRETEND_VERSION:
     volumes: *ubuntu-volumes
     deploy: *cuda-deploy
@@ -1008,7 +1011,8 @@ services:
       /bin/bash -c "
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_build.sh /arrow /build &&
-        /arrow/ci/scripts/python_test.sh /arrow"
+        /arrow/ci/scripts/python_test.sh /arrow &&
+        /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"
 
   debian-python:
     # Usage:
@@ -1500,6 +1504,7 @@ services:
         python: ${PYTHON}
     shm_size: *shm-size
     environment:
+      PYARROW_TEST_ANNOTATIONS: "ON"
       <<: [*common, *ccache, *sccache]
       PARQUET_REQUIRE_ENCRYPTION:  # inherit
       HYPOTHESIS_PROFILE:  # inherit
@@ -1510,7 +1515,8 @@ services:
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         mamba uninstall -y numpy &&
-        /arrow/ci/scripts/python_test.sh /arrow"]
+        /arrow/ci/scripts/python_test.sh /arrow &&
+        /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"]
 
   conda-python-docs:
     # Usage:
@@ -1530,13 +1536,15 @@ services:
       BUILD_DOCS_CPP: "ON"
       BUILD_DOCS_PYTHON: "ON"
       PYTEST_ARGS: "--doctest-modules --doctest-cython"
+      PYARROW_TEST_ANNOTATIONS: "ON"
     volumes: *conda-volumes
     command:
       ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         pip install -e /arrow/dev/archery[numpydoc] &&
         archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 &&
-        /arrow/ci/scripts/python_test.sh /arrow"]
+        /arrow/ci/scripts/python_test.sh /arrow &&
+        /arrow/ci/scripts/python_test_type_annotations.sh /arrow/python"]
 
   conda-python-dask:
     # Possible $DASK parameters:
diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py
new file mode 100644
index 00000000000..eaeb2a510eb
--- /dev/null
+++ b/dev/update_stub_docstrings.py
@@ -0,0 +1,214 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Utility to extract docstrings from pyarrow and update
+# docstrings in stubfiles.
+#
+# Usage
+# =====
+#
+# python ../dev/update_stub_docstrings.py pyarrow-stubs
+
+
+from pathlib import Path
+from textwrap import indent
+
+import click
+# TODO: perhaps replace griffe with importlib
+import griffe
+from griffe import AliasResolutionError
+import libcst
+from libcst import matchers as m
+
+
+def _get_docstring(name, package, indentation):
+    # print("extract_docstrings", name)
+    try:
+        obj = package.get_member(name)
+    except (KeyError, ValueError, AliasResolutionError):
+        # Some cython __init__ symbols can't be found
+        # e.g. pyarrow.lib.OSFile.__init__
+        stack = name.split(".")
+        parent_name = ".".join(stack[:-1])
+
+        try:
+            obj = package.get_member(parent_name).all_members[stack[-1]]
+        except (KeyError, ValueError, AliasResolutionError):
+            print(f"{name} not found in {package.name}, it's probably ok.")
+            return None
+
+    if obj.has_docstring:
+        docstring = obj.docstring.value
+        # Remove signature if present in docstring
+        if docstring.startswith(obj.name) or (
+            (hasattr(obj.parent, "name") and
+                docstring.startswith(f"{obj.parent.name}.{obj.name}"))):
+            docstring = "\n".join(docstring.splitlines()[2:])
+        # Skip empty docstrings
+        if docstring.strip() == "":
+            return None
+        # Indent docstring
+        indentation_prefix = indentation * "    "
+        docstring = indent(docstring + '\n"""', indentation_prefix)
+        docstring = '"""\n' + docstring
+        return docstring
+    return None
+
+
+class ReplaceEllipsis(libcst.CSTTransformer):
+    def __init__(self, package, namespace):
+        self.package = package
+        self.base_namespace = namespace
+        self.stack = []
+        self.indentation = 0
+
+    # Insert module level docstring if _clone_signature is used
+    def leave_Module(self, original_node, updated_node):
+        new_body = []
+        clone_matcher = m.SimpleStatementLine(
+            body=[m.Assign(
+                value=m.Call(func=m.Name(value="_clone_signature"))
+            ), m.ZeroOrMore()]
+        )
+        for statement in updated_node.body:
+            new_body.append(statement)
+            if m.matches(statement, clone_matcher):
+                name = statement.body[0].targets[0].target.value
+                if self.base_namespace:
+                    name = f"{self.base_namespace}.{name}"
+                docstring = _get_docstring(name, self.package, 0)
+                if docstring is not None:
+                    new_expr = libcst.Expr(value=libcst.SimpleString(docstring))
+                    new_line = libcst.SimpleStatementLine(body=[new_expr])
+                    new_body.append(new_line)
+
+        return updated_node.with_changes(body=new_body)
+
+    def visit_ClassDef(self, node):
+        self.stack.append(node.name.value)
+        self.indentation += 1
+
+    def leave_ClassDef(self, original_node, updated_node):
+        name = ".".join(self.stack)
+        if self.base_namespace:
+            name = self.base_namespace + "." + name
+
+        class_matcher_1 = m.ClassDef(
+            name=m.Name(),
+            body=m.IndentedBlock(
+                body=[m.SimpleStatementLine(
+                    body=[m.Expr(m.Ellipsis()), m.ZeroOrMore()]
+                ), m.ZeroOrMore()]
+            )
+        )
+        class_matcher_2 = m.ClassDef(
+            name=m.Name(),
+            body=m.IndentedBlock(
+                body=[m.FunctionDef(), m.ZeroOrMore()]
+            )
+        )
+
+        if m.matches(updated_node, class_matcher_1):
+            docstring = _get_docstring(name, self.package, self.indentation)
+            if docstring is not None:
+                new_node = libcst.SimpleString(value=docstring)
+                updated_node = updated_node.deep_replace(
+                    updated_node.body.body[0].body[0].value, new_node)
+
+        if m.matches(updated_node, class_matcher_2):
+            docstring = _get_docstring(name, self.package, self.indentation)
+            if docstring is not None:
+                new_docstring = libcst.SimpleString(value=docstring)
+                new_body = [
+                    libcst.SimpleWhitespace(self.indentation * "    "),
+                    libcst.Expr(value=new_docstring),
+                    libcst.Newline()
+                ] + list(updated_node.body.body)
+                new_body = libcst.IndentedBlock(body=new_body)
+                updated_node = updated_node.with_changes(body=new_body)
+
+        self.stack.pop()
+        self.indentation -= 1
+        return updated_node
+
+    def visit_FunctionDef(self, node):
+        self.stack.append(node.name.value)
+        self.indentation += 1
+
+    def leave_FunctionDef(self, original_node, updated_node):
+        name = ".".join(self.stack)
+        if self.base_namespace:
+            name = self.base_namespace + "." + name
+
+        function_matcher = m.FunctionDef(
+            name=m.Name(),
+            body=m.SimpleStatementSuite(
+                body=[m.Expr(
+                    m.Ellipsis()
+                )]))
+        if m.matches(original_node, function_matcher):
+            docstring = _get_docstring(name, self.package, self.indentation)
+            if docstring is not None:
+                new_docstring = libcst.SimpleString(value=docstring)
+                new_body = [
+                    libcst.SimpleWhitespace(self.indentation * "    "),
+                    libcst.Expr(value=new_docstring),
+                    libcst.Newline()
+                ]
+                new_body = libcst.IndentedBlock(body=new_body)
+                updated_node = updated_node.with_changes(body=new_body)
+
+        self.stack.pop()
+        self.indentation -= 1
+        return updated_node
+
+
+@click.command()
+@click.argument('pyarrow_folder', type=click.Path(resolve_path=True))
+def add_docs_to_stub_files(pyarrow_folder):
+    print("Updating docstrings of stub files in:", pyarrow_folder)
+    package = griffe.load("pyarrow", try_relative_path=True,
+                          force_inspection=True, resolve_aliases=True)
+    lib_modules = ["array", "builder", "compat", "config", "device", "error", "io",
+                   "_ipc", "memory", "pandas_shim", "scalar", "table", "tensor",
+                   "_types"]
+
+    for stub_file in Path(pyarrow_folder).rglob('*.pyi'):
+        if stub_file.name == "_stubs_typing.pyi":
+            continue
+        module = stub_file.with_suffix('').name
+        print(f"[{stub_file} {module}]")
+
+        with open(stub_file, 'r') as f:
+            tree = libcst.parse_module(f.read())
+
+        if module in lib_modules:
+            module = "lib"
+        elif stub_file.parent.name in ["parquet", "interchange"]:
+            module = f"{stub_file.parent.name}.{module}"
+        elif module == "__init__":
+            module = ""
+
+        modified_tree = tree.visit(ReplaceEllipsis(package, module))
+        with open(stub_file, "w") as f:
+            f.write(modified_tree.code)
+        print("\n")
+
+
+if __name__ == "__main__":
+    docstrings_map = {}
+    add_docs_to_stub_files(obj={})
diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst
index d03b2439b10..c23891e94d0 100644
--- a/docs/source/developers/python/development.rst
+++ b/docs/source/developers/python/development.rst
@@ -42,7 +42,7 @@ Unit Testing
 ============
 
 We are using `pytest <https://docs.pytest.org/en/latest/>`_ to develop our unit
-test suite. After `building the project <build_pyarrow>`_ you can run its unit tests
+test suite. After `building the project <building.html>`_ you can run its unit tests
 like so:
 
 .. code-block::
@@ -101,6 +101,74 @@ The test groups currently include:
 * ``s3``: Tests for Amazon S3
 * ``tensorflow``: Tests that involve TensorFlow
 
+Type Checking
+=============
+
+PyArrow provides type stubs (``*.pyi`` files) for static type checking. These
+stubs are located in the ``pyarrow-stubs/`` directory and are automatically
+included in the distributed wheel packages.
+
+Running Type Checkers
+---------------------
+
+We support multiple type checkers. Their configurations are in
+``pyproject.toml``.
+
+**mypy**
+
+To run mypy on the PyArrow codebase:
+
+.. code-block::
+
+   $ cd arrow/python
+   $ mypy
+
+The mypy configuration is in the ``[tool.mypy]`` section of ``pyproject.toml``.
+
+**pyright**
+
+To run pyright:
+
+.. code-block::
+
+   $ cd arrow/python
+   $ pyright
+
+The pyright configuration is in the ``[tool.pyright]`` section of ``pyproject.toml``.
+
+**ty**
+
+To run ty (note: currently only partially configured):
+
+.. code-block::
+
+   $ cd arrow/python
+   $ ty check
+
+Maintaining Type Stubs
+-----------------------
+
+Type stubs for PyArrow are maintained in the ``pyarrow-stubs/``
+directory. These stubs mirror the structure of the main ``pyarrow/`` package.
+
+When adding or modifying public APIs:
+
+1. **Update the corresponding ``.pyi`` stub file** in ``pyarrow-stubs/``
+   to reflect the new or changed function/class signatures.
+
+2. **Include type annotations** where possible. For Cython modules or
+   dynamically generated APIs such as compute kernels add the corresponding
+   stub in ``pyarrow-stubs/``.
+
+3. **Run type checkers** to ensure the stubs are correct and complete.
+
+The stub files are automatically copied into the built wheel during the build
+process and will be included when users install PyArrow, enabling type checking
+in downstream projects and for users' IDEs.
+
+Note: ``py.typed`` marker file in the ``pyarrow/`` directory indicates to type
+checkers that PyArrow supports type checking according to :pep:`561`.
+
 Doctest
 =======
 
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index ed7012e4b70..2840ba74128 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -4,6 +4,7 @@ include ../NOTICE.txt
 
 global-include CMakeLists.txt
 graft pyarrow
+graft pyarrow-stubs
 graft cmake_modules
 
 global-exclude *.so
diff --git a/python/pyarrow-stubs/pyarrow/__init__.pyi b/python/pyarrow-stubs/pyarrow/__init__.pyi
new file mode 100644
index 00000000000..ff0bd7fd5b8
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/__init__.pyi
@@ -0,0 +1,694 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow.lib as _lib
+
+from pyarrow.lib import (
+    BuildInfo,
+    CppBuildInfo,
+    RuntimeInfo,
+    set_timezone_db_path,
+    MonthDayNano,
+    VersionInfo,
+    build_info,
+    cpp_build_info,
+    cpp_version,
+    cpp_version_info,
+    runtime_info,
+    cpu_count,
+    set_cpu_count,
+    enable_signal_handlers,
+    io_thread_count,
+    set_io_thread_count,
+)
+
+from pyarrow.lib import (
+    null,
+    bool_,
+    int8,
+    int16,
+    int32,
+    int64,
+    uint8,
+    uint16,
+    uint32,
+    uint64,
+    time32,
+    time64,
+    timestamp,
+    date32,
+    date64,
+    duration,
+    month_day_nano_interval,
+    float16,
+    float32,
+    float64,
+    binary,
+    string,
+    utf8,
+    binary_view,
+    string_view,
+    large_binary,
+    large_string,
+    large_utf8,
+    decimal32,
+    decimal64,
+    decimal128,
+    decimal256,
+    list_,
+    large_list,
+    list_view,
+    large_list_view,
+    map_,
+    struct,
+    union,
+    sparse_union,
+    dense_union,
+    dictionary,
+    run_end_encoded,
+    json_,
+    uuid,
+    fixed_shape_tensor,
+    bool8,
+    opaque,
+    field,
+    type_for_alias,
+    DataType,
+    DictionaryType,
+    StructType,
+    ListType,
+    LargeListType,
+    FixedSizeListType,
+    ListViewType,
+    LargeListViewType,
+    MapType,
+    UnionType,
+    SparseUnionType,
+    DenseUnionType,
+    TimestampType,
+    Time32Type,
+    Time64Type,
+    DurationType,
+    FixedSizeBinaryType,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    BaseExtensionType,
+    ExtensionType,
+    RunEndEncodedType,
+    FixedShapeTensorType,
+    Bool8Type,
+    UuidType,
+    JsonType,
+    OpaqueType,
+    UnknownExtensionType,
+    register_extension_type,
+    unregister_extension_type,
+    DictionaryMemo,
+    KeyValueMetadata,
+    Field,
+    Schema,
+    schema,
+    unify_schemas,
+    Array,
+    Tensor,
+    array,
+    arange,
+    chunked_array,
+    record_batch,
+    nulls,
+    repeat,
+    SparseCOOTensor,
+    SparseCSRMatrix,
+    SparseCSCMatrix,
+    SparseCSFTensor,
+    infer_type,
+    from_numpy_dtype,
+    NullArray,
+    NumericArray,
+    IntegerArray,
+    FloatingPointArray,
+    BooleanArray,
+    Int8Array,
+    UInt8Array,
+    Int16Array,
+    UInt16Array,
+    Int32Array,
+    UInt32Array,
+    Int64Array,
+    UInt64Array,
+    HalfFloatArray,
+    FloatArray,
+    DoubleArray,
+    ListArray,
+    LargeListArray,
+    FixedSizeListArray,
+    ListViewArray,
+    LargeListViewArray,
+    MapArray,
+    UnionArray,
+    BinaryArray,
+    StringArray,
+    LargeBinaryArray,
+    LargeStringArray,
+    BinaryViewArray,
+    StringViewArray,
+    FixedSizeBinaryArray,
+    DictionaryArray,
+    Date32Array,
+    Date64Array,
+    TimestampArray,
+    Time32Array,
+    Time64Array,
+    DurationArray,
+    MonthDayNanoIntervalArray,
+    Decimal32Array,
+    Decimal64Array,
+    Decimal128Array,
+    Decimal256Array,
+    StructArray,
+    ExtensionArray,
+    RunEndEncodedArray,
+    FixedShapeTensorArray,
+    Bool8Array,
+    UuidArray,
+    JsonArray,
+    OpaqueArray,
+    scalar,
+    NA,
+    _NULL as NULL,
+    Scalar,
+    NullScalar,
+    BooleanScalar,
+    Int8Scalar,
+    Int16Scalar,
+    Int32Scalar,
+    Int64Scalar,
+    UInt8Scalar,
+    UInt16Scalar,
+    UInt32Scalar,
+    UInt64Scalar,
+    HalfFloatScalar,
+    FloatScalar,
+    DoubleScalar,
+    Decimal32Scalar,
+    Decimal64Scalar,
+    Decimal128Scalar,
+    Decimal256Scalar,
+    ListScalar,
+    LargeListScalar,
+    FixedSizeListScalar,
+    ListViewScalar,
+    LargeListViewScalar,
+    Date32Scalar,
+    Date64Scalar,
+    Time32Scalar,
+    Time64Scalar,
+    TimestampScalar,
+    DurationScalar,
+    MonthDayNanoIntervalScalar,
+    BinaryScalar,
+    LargeBinaryScalar,
+    BinaryViewScalar,
+    StringScalar,
+    LargeStringScalar,
+    StringViewScalar,
+    FixedSizeBinaryScalar,
+    DictionaryScalar,
+    MapScalar,
+    StructScalar,
+    UnionScalar,
+    RunEndEncodedScalar,
+    ExtensionScalar,
+    Bool8Scalar,
+    UuidScalar,
+    JsonScalar,
+    OpaqueScalar,
+)
+
+# Buffers, allocation
+from pyarrow.lib import (
+    DeviceAllocationType,
+    Device,
+    MemoryManager,
+    default_cpu_memory_manager
+)
+
+from pyarrow.lib import (
+    Buffer,
+    ResizableBuffer,
+    foreign_buffer,
+    py_buffer,
+    Codec,
+    compress,
+    decompress,
+    allocate_buffer,
+)
+
+from pyarrow.lib import (
+    MemoryPool,
+    LoggingMemoryPool,
+    ProxyMemoryPool,
+    total_allocated_bytes,
+    set_memory_pool,
+    default_memory_pool,
+    system_memory_pool,
+    jemalloc_memory_pool,
+    mimalloc_memory_pool,
+    logging_memory_pool,
+    proxy_memory_pool,
+    log_memory_allocations,
+    jemalloc_set_decay_ms,
+    supported_memory_backends,
+)
+
+# I/O
+from pyarrow.lib import (
+    NativeFile,
+    PythonFile,
+    BufferedInputStream,
+    BufferedOutputStream,
+    CacheOptions,
+    CompressedInputStream,
+    CompressedOutputStream,
+    TransformInputStream,
+    transcoding_input_stream,
+    FixedSizeBufferWriter,
+    BufferReader,
+    BufferOutputStream,
+    OSFile,
+    MemoryMappedFile,
+    memory_map,
+    create_memory_map,
+    MockOutputStream,
+    input_stream,
+    output_stream,
+    have_libhdfs,
+)
+
+from pyarrow.lib import (
+    ChunkedArray,
+    RecordBatch,
+    Table,
+    table,
+    concat_arrays,
+    concat_batches,
+    concat_tables,
+    TableGroupBy,
+    RecordBatchReader,
+)
+
+# Exceptions
+from pyarrow.lib import (
+    ArrowCancelled,
+    ArrowCapacityError,
+    ArrowException,
+    ArrowKeyError,
+    ArrowIndexError,
+    ArrowInvalid,
+    ArrowIOError,
+    ArrowMemoryError,
+    ArrowNotImplementedError,
+    ArrowTypeError,
+    ArrowSerializationError,
+)
+
+from pyarrow.ipc import serialize_pandas, deserialize_pandas
+import pyarrow.ipc as ipc
+import pyarrow.lib as lib
+import pyarrow.types as types
+import pyarrow.feather as feather
+import pyarrow.compute as compute
+import pyarrow.csv as csv
+import pyarrow.json as json
+import pyarrow.dataset as dataset
+
+# ----------------------------------------------------------------------
+# Deprecations
+
+from pyarrow.util import _deprecate_api, _deprecate_class
+
+from pyarrow.ipc import (
+    Message,
+    MessageReader,
+    MetadataVersion,
+    RecordBatchFileReader,
+    RecordBatchFileWriter,
+    RecordBatchStreamReader,
+    RecordBatchStreamWriter,
+)
+
+
+__version__: str
+_gc_enabled: bool
+
+
+def show_versions() -> None: ...
+def show_info() -> None: ...
+def _module_is_available(module: str) -> bool: ...
+def _filesystem_is_available(fs: str) -> bool: ...
+
+
+def get_include() -> str: ...
+def _get_pkg_config_executable() -> str: ...
+def _has_pkg_config(pkgname: str) -> bool: ...
+def _read_pkg_config_variable(pkgname: str, cli_args: list[str]) -> str: ...
+def get_libraries() -> list[str]: ...
+def create_library_symlinks() -> None: ...
+def get_library_dirs() -> list[str]: ...
+
+
+__all__ = [
+    "__version__",
+    "_lib",
+    "_gc_enabled",
+    "BuildInfo",
+    "CppBuildInfo",
+    "RuntimeInfo",
+    "set_timezone_db_path",
+    "MonthDayNano",
+    "VersionInfo",
+    "build_info",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "cpu_count",
+    "set_cpu_count",
+    "enable_signal_handlers",
+    "io_thread_count",
+    "set_io_thread_count",
+    "show_versions",
+    "show_info",
+    "_module_is_available",
+    "_filesystem_is_available",
+    "null",
+    "bool_",
+    "int8",
+    "int16",
+    "int32",
+    "int64",
+    "uint8",
+    "uint16",
+    "uint32",
+    "uint64",
+    "time32",
+    "time64",
+    "timestamp",
+    "date32",
+    "date64",
+    "duration",
+    "month_day_nano_interval",
+    "float16",
+    "float32",
+    "float64",
+    "binary",
+    "string",
+    "utf8",
+    "binary_view",
+    "string_view",
+    "large_binary",
+    "large_string",
+    "large_utf8",
+    "decimal32",
+    "decimal64",
+    "decimal128",
+    "decimal256",
+    "list_",
+    "large_list",
+    "list_view",
+    "large_list_view",
+    "map_",
+    "struct",
+    "union",
+    "sparse_union",
+    "dense_union",
+    "dictionary",
+    "run_end_encoded",
+    "json_",
+    "uuid",
+    "fixed_shape_tensor",
+    "bool8",
+    "opaque",
+    "field",
+    "type_for_alias",
+    "DataType",
+    "DictionaryType",
+    "StructType",
+    "ListType",
+    "LargeListType",
+    "FixedSizeListType",
+    "ListViewType",
+    "LargeListViewType",
+    "MapType",
+    "UnionType",
+    "SparseUnionType",
+    "DenseUnionType",
+    "TimestampType",
+    "Time32Type",
+    "Time64Type",
+    "DurationType",
+    "FixedSizeBinaryType",
+    "Decimal32Type",
+    "Decimal64Type",
+    "Decimal128Type",
+    "Decimal256Type",
+    "BaseExtensionType",
+    "ExtensionType",
+    "RunEndEncodedType",
+    "FixedShapeTensorType",
+    "Bool8Type",
+    "UuidType",
+    "JsonType",
+    "OpaqueType",
+    "UnknownExtensionType",
+    "register_extension_type",
+    "unregister_extension_type",
+    "DictionaryMemo",
+    "KeyValueMetadata",
+    "Field",
+    "Schema",
+    "schema",
+    "unify_schemas",
+    "Array",
+    "Tensor",
+    "array",
+    "arange",
+    "chunked_array",
+    "record_batch",
+    "nulls",
+    "repeat",
+    "SparseCOOTensor",
+    "SparseCSRMatrix",
+    "SparseCSCMatrix",
+    "SparseCSFTensor",
+    "infer_type",
+    "from_numpy_dtype",
+    "NullArray",
+    "NumericArray",
+    "IntegerArray",
+    "FloatingPointArray",
+    "BooleanArray",
+    "Int8Array",
+    "UInt8Array",
+    "Int16Array",
+    "UInt16Array",
+    "Int32Array",
+    "UInt32Array",
+    "Int64Array",
+    "UInt64Array",
+    "HalfFloatArray",
+    "FloatArray",
+    "DoubleArray",
+    "ListArray",
+    "LargeListArray",
+    "FixedSizeListArray",
+    "ListViewArray",
+    "LargeListViewArray",
+    "MapArray",
+    "UnionArray",
+    "BinaryArray",
+    "StringArray",
+    "LargeBinaryArray",
+    "LargeStringArray",
+    "BinaryViewArray",
+    "StringViewArray",
+    "FixedSizeBinaryArray",
+    "DictionaryArray",
+    "Date32Array",
+    "Date64Array",
+    "TimestampArray",
+    "Time32Array",
+    "Time64Array",
+    "DurationArray",
+    "MonthDayNanoIntervalArray",
+    "Decimal32Array",
+    "Decimal64Array",
+    "Decimal128Array",
+    "Decimal256Array",
+    "StructArray",
+    "ExtensionArray",
+    "Bool8Array",
+    "UuidArray",
+    "JsonArray",
+    "OpaqueArray",
+    "RunEndEncodedArray",
+    "FixedShapeTensorArray",
+    "scalar",
+    "NA",
+    "NULL",
+    "Scalar",
+    "NullScalar",
+    "BooleanScalar",
+    "Int8Scalar",
+    "Int16Scalar",
+    "Int32Scalar",
+    "Int64Scalar",
+    "UInt8Scalar",
+    "UInt16Scalar",
+    "UInt32Scalar",
+    "UInt64Scalar",
+    "HalfFloatScalar",
+    "FloatScalar",
+    "DoubleScalar",
+    "Decimal32Scalar",
+    "Decimal64Scalar",
+    "Decimal128Scalar",
+    "Decimal256Scalar",
+    "ListScalar",
+    "LargeListScalar",
+    "FixedSizeListScalar",
+    "ListViewScalar",
+    "LargeListViewScalar",
+    "Date32Scalar",
+    "Date64Scalar",
+    "Time32Scalar",
+    "Time64Scalar",
+    "TimestampScalar",
+    "DurationScalar",
+    "MonthDayNanoIntervalScalar",
+    "BinaryScalar",
+    "LargeBinaryScalar",
+    "BinaryViewScalar",
+    "StringScalar",
+    "LargeStringScalar",
+    "StringViewScalar",
+    "FixedSizeBinaryScalar",
+    "DictionaryScalar",
+    "MapScalar",
+    "StructScalar",
+    "UnionScalar",
+    "RunEndEncodedScalar",
+    "ExtensionScalar",
+    "Bool8Scalar",
+    "UuidScalar",
+    "JsonScalar",
+    "OpaqueScalar",
+    "DeviceAllocationType",
+    "Device",
+    "MemoryManager",
+    "default_cpu_memory_manager",
+    "Buffer",
+    "ResizableBuffer",
+    "foreign_buffer",
+    "py_buffer",
+    "Codec",
+    "compress",
+    "decompress",
+    "allocate_buffer",
+    "MemoryPool",
+    "LoggingMemoryPool",
+    "ProxyMemoryPool",
+    "total_allocated_bytes",
+    "set_memory_pool",
+    "default_memory_pool",
+    "system_memory_pool",
+    "jemalloc_memory_pool",
+    "mimalloc_memory_pool",
+    "logging_memory_pool",
+    "proxy_memory_pool",
+    "log_memory_allocations",
+    "jemalloc_set_decay_ms",
+    "supported_memory_backends",
+    "NativeFile",
+    "PythonFile",
+    "BufferedInputStream",
+    "BufferedOutputStream",
+    "CacheOptions",
+    "CompressedInputStream",
+    "CompressedOutputStream",
+    "TransformInputStream",
+    "transcoding_input_stream",
+    "FixedSizeBufferWriter",
+    "BufferReader",
+    "BufferOutputStream",
+    "OSFile",
+    "MemoryMappedFile",
+    "memory_map",
+    "create_memory_map",
+    "MockOutputStream",
+    "input_stream",
+    "output_stream",
+    "have_libhdfs",
+    "ChunkedArray",
+    "RecordBatch",
+    "Table",
+    "table",
+    "concat_arrays",
+    "concat_batches",
+    "concat_tables",
+    "TableGroupBy",
+    "RecordBatchReader",
+    "ArrowCancelled",
+    "ArrowCapacityError",
+    "ArrowException",
+    "ArrowKeyError",
+    "ArrowIndexError",
+    "ArrowInvalid",
+    "ArrowIOError",
+    "ArrowMemoryError",
+    "ArrowNotImplementedError",
+    "ArrowTypeError",
+    "ArrowSerializationError",
+    "serialize_pandas",
+    "deserialize_pandas",
+    "lib",
+    "ipc",
+    "types",
+    "_deprecate_api",
+    "_deprecate_class",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "get_include",
+    "_get_pkg_config_executable",
+    "compute",
+    "feather",
+    "csv",
+    "json",
+    "_has_pkg_config",
+    "_read_pkg_config_variable",
+    "get_libraries",
+    "create_library_symlinks",
+    "dataset",
+    "get_library_dirs",
+]
diff --git a/python/pyarrow-stubs/pyarrow/_acero.pyi b/python/pyarrow-stubs/pyarrow/_acero.pyi
new file mode 100644
index 00000000000..85ed9683e7e
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_acero.pyi
@@ -0,0 +1,163 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+from collections.abc import Iterable, Collection, Sequence
+
+if sys.version_info >= (3, 11):
+    from typing import Self, LiteralString
+else:
+    from typing_extensions import Self, LiteralString
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import Literal
+
+from . import lib
+from .compute import Expression
+from .dataset import InMemoryDataset, Dataset
+from .table import Aggregation, AggregateOptions
+
+_StrOrExpr: TypeAlias = str | Expression
+
+IntoField: TypeAlias = str | int | Expression
+Target: TypeAlias = (
+    IntoField
+    | tuple[IntoField, ...]
+    | list[str]
+    | list[int]
+    | list[Expression]
+    | list[IntoField]
+)
+
+UserDefinedAggregation: TypeAlias = LiteralString
+OutputName: TypeAlias = str
+AggregationSpec: TypeAlias = tuple[
+    Target, Aggregation | UserDefinedAggregation, AggregateOptions | None, OutputName
+]
+
+
+class Declaration(lib._Weakrefable):
+    def __init__(
+        self,
+        factory_name: str,
+        options: ExecNodeOptions,
+        inputs: list[Declaration] | None = None,
+    ) -> None: ...
+    @classmethod
+    def from_sequence(cls, decls: Iterable[Declaration]) -> Self: ...
+    def to_reader(self, use_threads: bool = True) -> lib.RecordBatchReader: ...
+    def to_table(self, use_threads: bool = True) -> lib.Table: ...
+
+
+class ExecNodeOptions(lib._Weakrefable):
+    ...
+
+
+class TableSourceNodeOptions(ExecNodeOptions):
+    def __init__(self, table: lib.Table | lib.RecordBatch | None) -> None: ...
+
+
+class FilterNodeOptions(ExecNodeOptions):
+    def __init__(self, filter_expression: Expression | None) -> None: ...
+
+
+class ProjectNodeOptions(ExecNodeOptions):
+    def __init__(self, expressions: Collection[Expression],
+                 names: Collection[str] | None = None) -> None: ...
+
+
+class AggregateNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        aggregates: Iterable[
+            tuple[
+                Target,
+                Aggregation | UserDefinedAggregation,
+                AggregateOptions | None,
+                OutputName,
+            ]
+        ],
+        keys: Iterable[str | Expression] | None = None,
+    ) -> None: ...
+
+
+class OrderByNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        sort_keys:
+        Iterable[tuple[str | Expression | int, Literal["ascending", "descending"]]]
+        = (),
+        *,
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> None: ...
+
+
+class HashJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        join_type: Literal[
+            "left semi",
+            "right semi",
+            "left anti",
+            "right anti",
+            "inner",
+            "left outer",
+            "right outer",
+            "full outer",
+        ],
+        left_keys: _StrOrExpr | Sequence[_StrOrExpr],
+        right_keys: _StrOrExpr | Sequence[_StrOrExpr],
+        left_output: Sequence[_StrOrExpr] | None = None,
+        right_output: Sequence[_StrOrExpr] | None = None,
+        output_suffix_for_left: str = "",
+        output_suffix_for_right: str = "",
+        filter_expression:
+        lib.BooleanScalar | lib.BooleanArray | Expression | None = None,
+    ) -> None: ...
+
+
+class AsofJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        left_on: _StrOrExpr,
+        left_by: _StrOrExpr | Sequence[_StrOrExpr],
+        right_on: _StrOrExpr,
+        right_by: _StrOrExpr | Sequence[_StrOrExpr],
+        tolerance: int,
+    ) -> None: ...
+
+
+def _perform_join(
+    join_type: str,
+    left_operand: lib.Table | Dataset,
+    left_keys: str | list[str],
+    right_operand: lib.Table | Dataset,
+    right_keys: str | list[str],
+    left_suffix: str,
+    right_suffix: str,
+    use_threads: bool,
+    coalesce_keys: bool,
+    output_type: type[lib.Table | InMemoryDataset] = lib.Table,
+    filter_expression: Expression | None = None,
+) -> lib.Table | InMemoryDataset: ...
+
+
+def _filter_table(
+    table: lib.Table | lib.RecordBatch, filter_expression: Expression,
+    use_threads: bool = True) -> lib.Table | lib.RecordBatch: ...
diff --git a/python/pyarrow-stubs/pyarrow/_azurefs.pyi b/python/pyarrow-stubs/pyarrow/_azurefs.pyi
new file mode 100644
index 00000000000..5872de03825
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_azurefs.pyi
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Literal
+
+from ._fs import FileSystem
+
+
+class AzureFileSystem(FileSystem):
+    def __init__(
+        self,
+        account_name: str | None = None,
+        account_key: str | None = None,
+        blob_storage_authority: str | None = None,
+        dfs_storage_authority: str | None = None,
+        blob_storage_scheme: Literal["http", "https"] = "https",
+        dfs_storage_scheme: Literal["http", "https"] = "https",
+        sas_token: str | None = None,
+        tenant_id: str | None = None,
+        client_id: str | None = None,
+        client_secret: str | None = None,
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/_compute.pyi b/python/pyarrow-stubs/pyarrow/_compute.pyi
new file mode 100644
index 00000000000..dfe46908c08
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_compute.pyi
@@ -0,0 +1,671 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import types as stdlib_types
+from collections.abc import (
+    Callable,
+    Iterable,
+    Mapping,
+    Sequence,
+)
+
+from typing import (
+    Any,
+    Literal,
+    TypeAlias,
+    TypedDict,
+)
+
+from . import lib
+
+_Order: TypeAlias = Literal["ascending", "descending"]
+_Placement: TypeAlias = Literal["at_start", "at_end"]
+
+
+class Kernel(lib._Weakrefable):
+    ...
+
+
+class Function(lib._Weakrefable):
+    @property
+    def arity(self) -> int | stdlib_types.EllipsisType: ...
+
+    @property
+    def kind(
+        self,
+    ) -> Literal["scalar", "vector", "scalar_aggregate", "hash_aggregate", "meta"]: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def num_kernels(self) -> int: ...
+
+    @property
+    def kernels(
+        self,
+    ) -> list[
+        ScalarKernel | VectorKernel | ScalarAggregateKernel | HashAggregateKernel
+    ]: ...
+
+    def call(
+        self,
+        args: Iterable,
+        options: FunctionOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        length: int | None = None,
+    ) -> Any: ...
+
+
+class FunctionOptions(lib._Weakrefable):
+    def serialize(self) -> lib.Buffer: ...
+    @classmethod
+    def deserialize(cls, buf: lib.Buffer) -> FunctionOptions: ...
+
+
+class FunctionRegistry(lib._Weakrefable):
+    def get_function(self, name: str) -> Function: ...
+    def list_functions(self) -> list[str]: ...
+
+
+class HashAggregateFunction(Function):
+    ...
+
+
+class HashAggregateKernel(Kernel):
+    ...
+
+
+class ScalarAggregateFunction(Function):
+    ...
+
+
+class ScalarAggregateKernel(Kernel):
+    ...
+
+
+class ScalarFunction(Function):
+    ...
+
+
+class ScalarKernel(Kernel):
+    ...
+
+
+class VectorFunction(Function):
+    ...
+
+
+class VectorKernel(Kernel):
+    ...
+
+# ==================== _compute.pyx Option classes ====================
+
+
+class ArraySortOptions(FunctionOptions):
+    def __init__(
+        self,
+        order: _Order = "ascending",
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+
+class AssumeTimezoneOptions(FunctionOptions):
+    def __init__(
+        self,
+        timezone: str,
+        *,
+        ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+        nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    ) -> None: ...
+
+
+class CastOptions(FunctionOptions):
+    allow_int_overflow: bool
+    allow_time_truncate: bool
+    allow_time_overflow: bool
+    allow_decimal_truncate: bool
+    allow_float_truncate: bool
+    allow_invalid_utf8: bool
+
+    def __init__(
+        self,
+        target_type: lib.DataType | None = None,
+        *,
+        allow_int_overflow: bool | None = None,
+        allow_time_truncate: bool | None = None,
+        allow_time_overflow: bool | None = None,
+        allow_decimal_truncate: bool | None = None,
+        allow_float_truncate: bool | None = None,
+        allow_invalid_utf8: bool | None = None,
+    ) -> None: ...
+    @staticmethod
+    def safe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    @staticmethod
+    def unsafe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    def is_safe(self) -> bool: ...
+
+
+class CountOptions(FunctionOptions):
+    def __init__(self, mode: Literal["only_valid",
+                 "only_null", "all"] = "only_valid") -> None: ...
+
+
+class CumulativeOptions(FunctionOptions):
+    def __init__(self, start: lib.Scalar | None = None,
+                 *, skip_nulls: bool = False) -> None: ...
+
+
+class CumulativeSumOptions(FunctionOptions):
+    def __init__(self, start: lib.Scalar | None = None,
+                 *, skip_nulls: bool = False) -> None: ...
+
+
+class DayOfWeekOptions(FunctionOptions):
+    def __init__(self, *, count_from_zero: bool = True,
+                 week_start: int = 1) -> None: ...
+
+
+class DictionaryEncodeOptions(FunctionOptions):
+    def __init__(self, null_encoding: Literal["mask", "encode"] = "mask") -> None: ...
+
+
+class RunEndEncodeOptions(FunctionOptions):
+    # TODO: default is DataType(int32)
+    def __init__(self, run_end_type: lib.DataType | str = ...) -> None: ...
+
+
+class ElementWiseAggregateOptions(FunctionOptions):
+    def __init__(self, *, skip_nulls: bool = True) -> None: ...
+
+
+class ExtractRegexOptions(FunctionOptions):
+    def __init__(self, pattern: str) -> None: ...
+
+
+class ExtractRegexSpanOptions(FunctionOptions):
+    def __init__(self, pattern: str) -> None: ...
+
+
+class FilterOptions(FunctionOptions):
+    def __init__(self,
+                 null_selection_behavior: Literal["drop",
+                                                  "emit_null"] = "drop") -> None: ...
+
+
+class IndexOptions(FunctionOptions):
+    def __init__(self, value: lib.Scalar) -> None: ...
+
+
+class JoinOptions(FunctionOptions):
+    def __init__(
+        self,
+        null_handling:
+        Literal["emit_null", "skip", "replace"]
+        = "emit_null", *, null_replacement: str = "") -> None: ...
+
+
+class ListSliceOptions(FunctionOptions):
+    def __init__(
+        self,
+        start: int,
+        stop: int | None = None,
+        step: int = 1,
+        return_fixed_size_list: bool | None = None,
+    ) -> None: ...
+
+
+class ListFlattenOptions(FunctionOptions):
+    def __init__(self, recursive: bool = False) -> None: ...
+
+
+class MakeStructOptions(FunctionOptions):
+    def __init__(
+        self,
+        field_names: Sequence[str] = (),
+        *,
+        field_nullability: Sequence[bool] | None = None,
+        field_metadata: Sequence[lib.KeyValueMetadata] | None = None,
+    ) -> None: ...
+
+
+class MapLookupOptions(FunctionOptions):
+    # TODO: query_key: Scalar or Object can be converted to Scalar
+    def __init__(
+        self, query_key: lib.Scalar, occurrence: Literal["first", "last", "all"]
+    ) -> None: ...
+
+
+class MatchSubstringOptions(FunctionOptions):
+    def __init__(self, pattern: str, *, ignore_case: bool = False) -> None: ...
+
+
+class ModeOptions(FunctionOptions):
+    def __init__(self, n: int = 1, *, skip_nulls: bool = True,
+                 min_count: int = 0) -> None: ...
+
+
+class NullOptions(FunctionOptions):
+    def __init__(self, *, nan_is_null: bool = False) -> None: ...
+
+
+class PadOptions(FunctionOptions):
+    def __init__(
+        self, width: int, padding: str = " ", lean_left_on_odd_padding: bool = True
+    ) -> None: ...
+
+
+class PairwiseOptions(FunctionOptions):
+    def __init__(self, period: int = 1) -> None: ...
+
+
+class PartitionNthOptions(FunctionOptions):
+    def __init__(self, pivot: int, *,
+                 null_placement: _Placement = "at_end") -> None: ...
+
+
+class WinsorizeOptions(FunctionOptions):
+    def __init__(self, lower_limit: float, upper_limit: float) -> None: ...
+
+
+class QuantileOptions(FunctionOptions):
+    def __init__(
+        self,
+        q: float | Sequence[float] = 0.5,
+        *,
+        interpolation: Literal["linear", "lower",
+                               "higher", "nearest", "midpoint"] = "linear",
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+
+class RandomOptions(FunctionOptions):
+    def __init__(self, *, initializer: int | Literal["system"] = "system") -> None: ...
+
+
+class RankOptions(FunctionOptions):
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+        tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    ) -> None: ...
+
+
+class RankQuantileOptions(FunctionOptions):
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+
+class PivotWiderOptions(FunctionOptions):
+    def __init__(
+        self,
+        key_names: Sequence[str],
+        *,
+        unexpected_key_behavior: Literal["ignore", "raise"] = "ignore",
+    ) -> None: ...
+
+
+class ReplaceSliceOptions(FunctionOptions):
+    def __init__(self, start: int, stop: int, replacement: str) -> None: ...
+
+
+class ReplaceSubstringOptions(FunctionOptions):
+    def __init__(
+        self, pattern: str, replacement: str, *, max_replacements: int | None = None
+    ) -> None: ...
+
+
+_RoundMode: TypeAlias = Literal[
+    "down",
+    "up",
+    "towards_zero",
+    "towards_infinity",
+    "half_down",
+    "half_up",
+    "half_towards_zero",
+    "half_towards_infinity",
+    "half_to_even",
+    "half_to_odd",
+]
+
+
+class RoundBinaryOptions(FunctionOptions):
+    def __init__(
+        self,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+
+class RoundOptions(FunctionOptions):
+    def __init__(
+        self,
+        ndigits: int = 0,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+
+_DateTimeUint: TypeAlias = Literal[
+    "year",
+    "quarter",
+    "month",
+    "week",
+    "day",
+    "hour",
+    "minute",
+    "second",
+    "millisecond",
+    "microsecond",
+    "nanosecond",
+]
+
+
+class RoundTemporalOptions(FunctionOptions):
+    def __init__(
+        self,
+        multiple: int = 1,
+        unit: _DateTimeUint = "day",
+        *,
+        week_starts_monday: bool = True,
+        ceil_is_strictly_greater: bool = False,
+        calendar_based_origin: bool = False,
+    ) -> None: ...
+
+
+class RoundToMultipleOptions(FunctionOptions):
+    def __init__(self, multiple: int | float | lib.Scalar = 1.0,
+                 round_mode: _RoundMode = "half_to_even") -> None: ...
+
+
+class ScalarAggregateOptions(FunctionOptions):
+    def __init__(self, *, skip_nulls: bool = True, min_count: int = 1) -> None: ...
+
+
+class SelectKOptions(FunctionOptions):
+    def __init__(self, k: int, sort_keys: Sequence[tuple[str, _Order]]) -> None: ...
+
+
+class SetLookupOptions(FunctionOptions):
+    def __init__(self, value_set: lib.Array, *, skip_nulls: bool = True) -> None: ...
+
+
+class SliceOptions(FunctionOptions):
+    def __init__(
+        self, start: int, stop: int | None = None, step: int = 1) -> None: ...
+
+
+class SortOptions(FunctionOptions):
+    def __init__(
+        self,
+        sort_keys: Sequence[tuple[str, _Order]],
+        *,
+        null_placement: _Placement = "at_end"
+    ) -> None: ...
+
+
+class SplitOptions(FunctionOptions):
+    def __init__(self, *, max_splits: int | None = None,
+                 reverse: bool = False) -> None: ...
+
+
+class SplitPatternOptions(FunctionOptions):
+    def __init__(
+        self, pattern: str, *, max_splits: int | None = None, reverse: bool = False
+    ) -> None: ...
+
+
+class StrftimeOptions(FunctionOptions):
+    def __init__(self, format: str = "%Y-%m-%dT%H:%M:%S",
+                 locale: str = "C") -> None: ...
+
+
+class StrptimeOptions(FunctionOptions):
+    def __init__(self,
+                 format: str,
+                 unit: Literal["s",
+                               "ms",
+                               "us",
+                               "ns"],
+                 error_is_null: bool = False) -> None: ...
+
+
+class StructFieldOptions(FunctionOptions):
+    def __init__(self, indices: list[str] | list[bytes] |
+                 list[int] | Expression | bytes | str | int) -> None: ...
+
+
+class TakeOptions(FunctionOptions):
+    def __init__(self, boundscheck: bool = True) -> None: ...
+
+
+class TDigestOptions(FunctionOptions):
+    def __init__(
+        self,
+        q: float | Sequence[float] = 0.5,
+        *,
+        delta: int = 100,
+        buffer_size: int = 500,
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+
+class TrimOptions(FunctionOptions):
+    def __init__(self, characters: str) -> None: ...
+
+
+class Utf8NormalizeOptions(FunctionOptions):
+    def __init__(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: ...
+
+
+class VarianceOptions(FunctionOptions):
+    def __init__(self, *, ddof: int = 0, skip_nulls: bool = True,
+                 min_count: int = 0) -> None: ...
+
+
+class SkewOptions(FunctionOptions):
+    def __init__(
+        self, *, skip_nulls: bool = True, biased: bool = True, min_count: int = 0
+    ) -> None: ...
+
+
+class WeekOptions(FunctionOptions):
+    def __init__(
+        self,
+        *,
+        week_starts_monday: bool = True,
+        count_from_zero: bool = False,
+        first_week_is_fully_in_year: bool = False,
+    ) -> None: ...
+
+
+class ZeroFillOptions(FunctionOptions):
+    def __init__(self, width: int, padding: str = "0") -> None: ...
+
+# ==================== _compute.pyx Functions ====================
+
+
+def call_function(
+    name: str,
+    args: list,
+    options: FunctionOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+    length: int | None = None,
+) -> Any: ...
+def function_registry() -> FunctionRegistry: ...
+def get_function(name: str) -> Function: ...
+def list_functions() -> list[str]: ...
+
+# ==================== _compute.pyx Udf ====================
+
+
+def call_tabular_function(
+    function_name: str,
+    args: Iterable | None = None,
+    func_registry: FunctionRegistry | None = None) -> lib.RecordBatchReader: ...
+
+
+class _FunctionDoc(TypedDict):
+    summary: str
+    description: str
+
+
+def register_scalar_function(
+    func: Callable | None,
+    function_name: str | None,
+    function_doc: _FunctionDoc | dict[str, str],
+    in_types: Mapping[str, lib.DataType] | None,
+    out_type: lib.DataType | None,
+    func_registry: FunctionRegistry | None = None,
+) -> None: ...
+
+
+def register_tabular_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc | dict[str, str],
+    in_types: Mapping[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None: ...
+
+
+def register_aggregate_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc | dict[str, str],
+    in_types: Mapping[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None: ...
+
+
+def register_vector_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc | dict[str, str],
+    in_types: Mapping[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None: ...
+
+
+class UdfContext:
+    @property
+    def batch_length(self) -> int: ...
+    @property
+    def memory_pool(self) -> lib.MemoryPool: ...
+
+
+def _get_udf_context(memory_pool: lib.MemoryPool, batch_length: int) -> UdfContext: ...
+
+# ==================== _compute.pyx Expression ====================
+
+
+class Expression(lib._Weakrefable):
+    @staticmethod
+    def from_substrait(buffer: bytes | lib.Buffer) -> Expression: ...
+
+    def to_substrait(self, schema: lib.Schema,
+                     allow_arrow_extensions: bool = False) -> lib.Buffer: ...
+
+    @staticmethod
+    def _call(
+        func_name: str, args: list, options: FunctionOptions | None = None
+    ) -> Expression: ...
+
+    @staticmethod
+    def _field(name_or_index: str | int) -> Expression: ...
+
+    @staticmethod
+    def _nested_field(name: str) -> Expression: ...
+
+    @staticmethod
+    def _scalar(value: Any) -> Expression: ...
+
+    def __invert__(self) -> Expression: ...
+
+    def __and__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __rand__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __or__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __ror__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __add__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __radd__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __mul__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __rmul__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __sub__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __rsub__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __eq__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __ne__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __gt__(self, value: object) -> Expression: ...
+    def __lt__(self, value: object) -> Expression: ...
+    def __ge__(self, value: object) -> Expression: ...
+    def __le__(self, value: object) -> Expression: ...
+
+    def __truediv__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def __rtruediv__(
+        self, other: Expression | lib.Scalar | lib.Array | object) -> Expression: ...
+
+    def is_valid(self) -> Expression: ...
+    def is_null(self, nan_is_null: bool = False) -> Expression: ...
+    def is_nan(self) -> Expression: ...
+
+    def cast(
+        self,
+        type: lib.DataType | str, safe: bool = True, options: CastOptions | None = None
+    ) -> Expression: ...
+
+    def isin(self, values: lib.Array | Iterable | Any) -> Expression: ...
+    def equals(self, other: object) -> bool: ...
+
+    # Attributes and methods for materialized expressions (used in tests)
+    @property
+    def type(self) -> lib.DataType: ...
+    def to_pylist(self) -> list: ...
+    def to_numpy(self, zero_copy_only: bool = True, writable: bool = False) -> Any: ...
+    def to_pandas(self, **kwargs) -> Any: ...
+    def as_py(self) -> Any: ...
+    def tolist(self) -> list: ...
+    def slice(self, offset: int = 0, length: int | None = None) -> Expression: ...
+
+# ==================== _compute.py ====================
diff --git a/python/pyarrow-stubs/pyarrow/_compute_docstring.pyi b/python/pyarrow-stubs/pyarrow/_compute_docstring.pyi
new file mode 100644
index 00000000000..514a4e4269c
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_compute_docstring.pyi
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+function_doc_additions: dict[str, str]
diff --git a/python/pyarrow-stubs/pyarrow/_csv.pyi b/python/pyarrow-stubs/pyarrow/_csv.pyi
new file mode 100644
index 00000000000..6c911a8b0c1
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_csv.pyi
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Callable, Sequence
+from dataclasses import dataclass, field
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from . import lib
+
+
+@dataclass(kw_only=True)
+class ReadOptions(lib._Weakrefable):
+    use_threads: bool = field(default=True, kw_only=False)  # noqa: Y015
+    block_size: int | float | None = None
+    skip_rows: int = 0
+    skip_rows_after_names: int = 0
+    column_names: Sequence[str] | None = None
+    autogenerate_column_names: bool = False
+    encoding: str = "utf8"
+    def validate(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class ParseOptions(lib._Weakrefable):
+    delimiter: str = field(default=",", kw_only=False)  # noqa: Y015
+    quote_char: str | Literal[False] = '"'
+    double_quote: bool = True
+    escape_char: str | Literal[False] = False
+    newlines_in_values: bool = False
+    ignore_empty_lines: bool = True
+    invalid_row_handler: Callable[[InvalidRow], str] | None = None
+
+    def validate(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class ConvertOptions(lib._Weakrefable):
+    check_utf8: bool = field(default=True, kw_only=False)  # noqa: Y015
+    column_types: lib.Schema | dict | Sequence[tuple[str, lib.DataType]] | None = None
+    null_values: list[str] | None = None
+    true_values: list[str] | None = None
+    false_values: list[str] | None = None
+    decimal_point: str = "."
+    strings_can_be_null: bool = False
+    quoted_strings_can_be_null: bool = True
+    include_columns: list[str] | None = None
+    include_missing_columns: bool = False
+    auto_dict_encode: bool = False
+    auto_dict_max_cardinality: int | None = None
+    timestamp_parsers: Sequence[str | lib._Weakrefable] | None = None
+
+    def validate(self) -> None: ...
+
+
+@dataclass(kw_only=True)
+class WriteOptions(lib._Weakrefable):
+    include_header: bool = field(default=True, kw_only=False)  # noqa: Y015
+    batch_size: int = 1024
+    delimiter: str = ","
+    quoting_style: Literal["needed", "all_valid", "none"] = "needed"
+    quoting_header: Literal["needed", "all_valid", "none"] = "needed"
+
+    def validate(self) -> None: ...
+
+
+@dataclass
+class InvalidRow(lib._Weakrefable):
+    expected_columns: int
+    actual_columns: int
+    number: int | None
+    text: str
+
+
+class CSVWriter(lib._CRecordBatchWriter):
+    def __init__(
+        self,
+        # TODO: OutputStream
+        sink: StrPath | IO[Any],
+        schema: lib.Schema,
+        write_options: WriteOptions | None = None,
+        *,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+
+class CSVStreamingReader(lib.RecordBatchReader):
+    ...
+
+
+ISO8601: lib._Weakrefable
+
+
+def open_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> CSVStreamingReader: ...
+
+
+def read_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Table: ...
+
+
+def write_csv(
+    data: lib.RecordBatch | lib.Table,
+    output_file: StrPath | lib.NativeFile | IO[Any],
+    write_options: WriteOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/_cuda.pyi b/python/pyarrow-stubs/pyarrow/_cuda.pyi
new file mode 100644
index 00000000000..d484fc5cf5f
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_cuda.pyi
@@ -0,0 +1,158 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any
+
+import cuda  # type: ignore[import-not-found]
+
+from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-untyped, import-not-found] # noqa: E501
+
+from . import lib
+from ._stubs_typing import ArrayLike
+
+
+class Context(lib._Weakrefable):
+    def __init__(self, device_number: int = 0, handle: int | None = None) -> None: ...
+
+    @staticmethod
+    def from_numba(context: _numba_driver.Context | None = None) -> Context: ...
+
+    def to_numba(self) -> _numba_driver.Context: ...
+
+    @staticmethod
+    def get_num_devices() -> int: ...
+
+    @property
+    def device_number(self) -> int: ...
+
+    @property
+    def handle(self) -> int: ...
+
+    def synchronize(self) -> None: ...
+
+    @property
+    def bytes_allocated(self) -> int: ...
+
+    def get_device_address(self, address: int) -> int: ...
+
+    def new_buffer(self, nbytes: int) -> CudaBuffer: ...
+
+    @property
+    def memory_manager(self) -> lib.MemoryManager: ...
+
+    @property
+    def device(self) -> lib.Device: ...
+
+    def foreign_buffer(self, address: int, size: int, base: Any |
+                       None = None) -> CudaBuffer: ...
+
+    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer: ...
+
+    def buffer_from_data(
+        self,
+        data: CudaBuffer | HostBuffer | lib.Buffer | ArrayLike,
+        offset: int = 0,
+        size: int = -1,
+    ) -> CudaBuffer: ...
+
+    def buffer_from_object(self, obj: Any) -> CudaBuffer: ...
+
+
+class IpcMemHandle(lib._Weakrefable):
+    @staticmethod
+    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle: ...
+
+    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer: ...
+
+
+class CudaBuffer(lib.Buffer):
+    @staticmethod
+    def from_buffer(buf: lib.Buffer) -> CudaBuffer: ...
+
+    @staticmethod
+    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer: ...
+
+    def to_numba(self) -> _numba_driver.MemoryPointer: ...
+
+    def copy_to_host(
+        self,
+        position: int = 0,
+        nbytes: int = -1,
+        buf: lib.Buffer | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        resizable: bool = False,
+    ) -> lib.Buffer: ...
+
+    def copy_from_host(
+        self, data: lib.Buffer | ArrayLike, position: int = 0, nbytes: int = -1
+    ) -> int: ...
+
+    def copy_from_device(self, buf: CudaBuffer, position: int = 0,
+                         nbytes: int = -1) -> int: ...
+
+    def export_for_ipc(self) -> IpcMemHandle: ...
+
+    @property
+    def context(self) -> Context: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer: ...
+
+    def to_pybytes(self) -> bytes: ...
+
+
+class HostBuffer(lib.Buffer):
+    @property
+    def size(self) -> int: ...
+
+
+class BufferReader(lib.NativeFile):
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer: ...
+
+
+class BufferWriter(lib.NativeFile):
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def writeat(self, position: int, data: ArrayLike) -> None: ...
+
+    @property
+    def buffer_size(self) -> int: ...
+
+    @buffer_size.setter
+    def buffer_size(self, buffer_size: int): ...
+
+    @property
+    def num_bytes_buffered(self) -> int: ...
+
+
+def new_host_buffer(size: int, device: int = 0) -> HostBuffer: ...
+
+
+def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer: ...
+
+
+def read_message(
+    source: CudaBuffer | cuda.BufferReader, pool: lib.MemoryManager | None = None
+) -> lib.Message: ...
+
+
+def read_record_batch(
+    buffer: lib.Buffer,
+    object: lib.Schema,
+    *,
+    dictionary_memo: lib.DictionaryMemo | None = None,
+    pool: lib.MemoryPool | None = None,
+) -> lib.RecordBatch: ...
diff --git a/python/pyarrow-stubs/pyarrow/_dataset.pyi b/python/pyarrow-stubs/pyarrow/_dataset.pyi
new file mode 100644
index 00000000000..c8cd3d97089
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_dataset.pyi
@@ -0,0 +1,682 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from collections.abc import Collection, Callable, Iterator, Iterable
+from typing import (
+    IO,
+    Any,
+    Generic,
+    Literal,
+    NamedTuple,
+    TypeVar,
+)
+
+from _typeshed import StrPath
+
+from . import csv, _json, _parquet, lib
+from ._fs import FileSelector, FileSystem, SupportedFileSystem
+from ._stubs_typing import Indices, JoinType, Order
+from .acero import ExecNodeOptions
+from .compute import Expression
+from .ipc import IpcWriteOptions, RecordBatchReader
+
+
+class Dataset(lib._Weakrefable):
+    @property
+    def partition_expression(self) -> Expression: ...
+
+    def replace_schema(self, schema: lib.Schema) -> Self: ...
+
+    def get_fragments(self, filter: Expression | None = None): ...
+
+    def scanner(
+        self,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    def to_batches(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]: ...
+
+    def to_table(
+        self,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def count_rows(
+        self,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int: ...
+
+    @property
+    def schema(self) -> lib.Schema: ...
+
+    def filter(self, expression: Expression | None) -> Self: ...
+
+    def sort_by(self, sorting: str |
+                list[tuple[str, Order]], **kwargs) -> InMemoryDataset: ...
+
+    def join(
+        self,
+        right_dataset: Dataset,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> InMemoryDataset: ...
+
+    def join_asof(
+        self,
+        right_dataset: Dataset,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> InMemoryDataset: ...
+
+    @property
+    def format(self) -> FileFormat: ...
+
+
+class InMemoryDataset(Dataset):
+    def __init__(
+        self,
+        source: lib.Table
+        | lib.RecordBatch
+        | lib.RecordBatchReader
+        | Iterable[lib.RecordBatch]
+        | list[Any],
+        schema: lib.Schema | None = None,
+    ) -> None: ...
+
+
+class UnionDataset(Dataset):
+    def __init__(
+        self,
+        schema: lib.Schema | None = None,
+        children: list[Dataset] | None = None,
+    ) -> None: ...
+
+    @property
+    def children(self) -> list[Dataset]: ...
+
+
+class FileSystemDataset(Dataset):
+    def __init__(
+        self,
+        fragments: list[Fragment],
+        schema: lib.Schema,
+        format: FileFormat,
+        filesystem: SupportedFileSystem | None = None,
+        root_partition: Expression | None = None,
+    ) -> None: ...
+
+    @classmethod
+    def from_paths(
+        cls,
+        paths: list[str],
+        schema: lib.Schema | None = None,
+        format: FileFormat | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        partitions: list[Expression] | None = None,
+        root_partition: Expression | None = None,
+    ) -> FileSystemDataset: ...
+
+    @property
+    def filesystem(self) -> FileSystem: ...
+    @property
+    def partitioning(self) -> Partitioning | None: ...
+
+    @property
+    def files(self) -> list[str]: ...
+
+
+class FileWriteOptions(lib._Weakrefable):
+    @property
+    def format(self) -> FileFormat: ...
+
+
+class FileFormat(lib._Weakrefable):
+    def inspect(
+        self, file: StrPath | IO, filesystem: SupportedFileSystem | None = None
+    ) -> lib.Schema: ...
+
+    def make_fragment(
+        self,
+        file: StrPath | IO | lib.Buffer | lib.BufferReader,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment: ...
+
+    def make_write_options(self) -> FileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+    @property
+    def default_fragment_scan_options(self) -> FragmentScanOptions: ...
+    @default_fragment_scan_options.setter
+    def default_fragment_scan_options(self, options: FragmentScanOptions) -> None: ...
+
+
+class Fragment(lib._Weakrefable):
+    def open(self) -> lib.NativeFile | lib.BufferReader: ...
+    @property
+    def path(self) -> str: ...
+    @property
+    def row_groups(self) -> list[int]: ...
+
+    @property
+    def filesystem(self) -> SupportedFileSystem: ...
+
+    @property
+    def physical_schema(self) -> lib.Schema: ...
+
+    @property
+    def partition_expression(self) -> Expression: ...
+
+    def scanner(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    def to_batches(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]: ...
+
+    def to_table(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table: ...
+
+    def count_rows(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int: ...
+
+
+class FileFragment(Fragment):
+    def open(self) -> lib.NativeFile: ...
+
+    @property
+    def path(self) -> str: ...
+
+    @property
+    def filesystem(self) -> FileSystem: ...
+
+    @property
+    def buffer(self) -> lib.Buffer: ...
+
+    @property
+    def format(self) -> FileFormat: ...
+
+
+class FragmentScanOptions(lib._Weakrefable):
+    @property
+    def type_name(self) -> str: ...
+
+
+class IpcFileWriteOptions(FileWriteOptions):
+    @property
+    def write_options(self) -> IpcWriteOptions: ...
+    @write_options.setter
+    def write_options(self, write_options: IpcWriteOptions) -> None: ...
+
+
+class IpcFileFormat(FileFormat):
+    def equals(self, other: IpcFileFormat) -> bool: ...
+    def make_write_options(self, **kwargs) -> IpcFileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+
+
+class FeatherFileFormat(IpcFileFormat):
+    ...
+
+
+class CsvFileFormat(FileFormat):
+    def __init__(
+        self,
+        parse_options: csv.ParseOptions | None = None,
+        default_fragment_scan_options: CsvFragmentScanOptions | None = None,
+        convert_options: csv.ConvertOptions | None = None,
+        read_options: csv.ReadOptions | None = None,
+    ) -> None: ...
+    def make_write_options(
+        self, **kwargs) -> CsvFileWriteOptions: ...  # type: ignore[override]
+
+    @property
+    def parse_options(self) -> csv.ParseOptions: ...
+    @parse_options.setter
+    def parse_options(self, parse_options: csv.ParseOptions) -> None: ...
+    def equals(self, other: CsvFileFormat) -> bool: ...
+
+
+class CsvFragmentScanOptions(FragmentScanOptions):
+    convert_options: csv.ConvertOptions
+    read_options: csv.ReadOptions
+
+    def __init__(
+        self,
+        convert_options: csv.ConvertOptions | None = None,
+        read_options: csv.ReadOptions | None = None,
+    ) -> None: ...
+    def equals(self, other: CsvFragmentScanOptions) -> bool: ...
+
+
+class CsvFileWriteOptions(FileWriteOptions):
+    write_options: csv.WriteOptions
+
+
+class JsonFileFormat(FileFormat):
+    def __init__(
+        self,
+        default_fragment_scan_options: JsonFragmentScanOptions | None = None,
+        parse_options: _json.ParseOptions | None = None,
+        read_options: _json.ReadOptions | None = None,
+    ) -> None: ...
+    def equals(self, other: JsonFileFormat) -> bool: ...
+
+
+class JsonFragmentScanOptions(FragmentScanOptions):
+    parse_options: _json.ParseOptions
+    read_options: _json.ReadOptions
+
+    def __init__(
+        self,
+        parse_options: _json.ParseOptions | None = None,
+        read_options: _json.ReadOptions | None = None,
+    ) -> None: ...
+    def equals(self, other: JsonFragmentScanOptions) -> bool: ...
+
+
+class Partitioning(lib._Weakrefable):
+    def parse(self, path: str) -> Expression: ...
+
+    def format(self, expr: Expression) -> tuple[str, str]: ...
+
+    @property
+    def schema(self) -> lib.Schema: ...
+
+    @property
+    def dictionaries(self) -> list[Any]: ...
+
+
+class PartitioningFactory(lib._Weakrefable):
+    @property
+    def type_name(self) -> str: ...
+
+
+class KeyValuePartitioning(Partitioning):
+    @property
+    def dictionaries(self) -> list[Any]: ...
+
+
+class DirectoryPartitioning(KeyValuePartitioning):
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory: ...
+
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+
+
+class HivePartitioning(KeyValuePartitioning):
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        null_fallback: str = "__HIVE_DEFAULT_PARTITION__",
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+
+    @staticmethod
+    def discover(
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        null_fallback="__HIVE_DEFAULT_PARTITION__",
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory: ...
+
+
+class FilenamePartitioning(KeyValuePartitioning):
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory: ...
+
+
+class DatasetFactory(lib._Weakrefable):
+    root_partition: Expression
+    def finish(self, schema: lib.Schema | None = None) -> Dataset: ...
+
+    def inspect(
+        self,
+        *,
+        promote_options: str = "default",
+        fragments: list[Fragment] | int | str | None = None,
+    ) -> lib.Schema: ...
+
+    def inspect_schemas(self) -> list[lib.Schema]: ...
+
+
+class FileSystemFactoryOptions(lib._Weakrefable):
+    partitioning: Partitioning
+    partitioning_factory: PartitioningFactory
+    partition_base_dir: str
+    exclude_invalid_files: bool
+    selector_ignore_prefixes: list[str]
+
+    def __init__(
+        self,
+        partition_base_dir: str | None = None,
+        partitioning: Partitioning | PartitioningFactory | None = None,
+        exclude_invalid_files: bool | None = True,
+        selector_ignore_prefixes: list[str] | None = None,
+    ) -> None: ...
+
+
+class FileSystemDatasetFactory(DatasetFactory):
+    def __init__(
+        self,
+        filesystem: SupportedFileSystem,
+        paths_or_selector: Collection[str] | FileSelector,
+        format: FileFormat,
+        options: FileSystemFactoryOptions | None = None,
+    ) -> None: ...
+
+
+class UnionDatasetFactory(DatasetFactory):
+    def __init__(self, factories: list[DatasetFactory]) -> None: ...
+
+
+_RecordBatchT = TypeVar("_RecordBatchT", bound=lib.RecordBatch)
+
+
+class RecordBatchIterator(lib._Weakrefable, Generic[_RecordBatchT]):
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> _RecordBatchT: ...
+
+
+class TaggedRecordBatch(NamedTuple):
+    record_batch: lib.RecordBatch
+    fragment: Fragment
+
+
+class TaggedRecordBatchIterator(lib._Weakrefable):
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> TaggedRecordBatch: ...
+
+
+class Scanner(lib._Weakrefable):
+    @staticmethod
+    def from_dataset(
+        dataset: Dataset,
+        *,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    @staticmethod
+    def from_fragment(
+        fragment: Fragment,
+        *,
+        schema: lib.Schema | None = None,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    @staticmethod
+    def from_batches(
+        source: Iterator[lib.RecordBatch] | RecordBatchReader | Any,
+        *,
+        schema: lib.Schema | None = None,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+
+    @property
+    def dataset_schema(self) -> lib.Schema: ...
+
+    @property
+    def projected_schema(self) -> lib.Schema: ...
+
+    def to_batches(self) -> Iterator[lib.RecordBatch]: ...
+
+    def scan_batches(self) -> TaggedRecordBatchIterator: ...
+
+    def to_table(self) -> lib.Table: ...
+
+    def take(self, indices: Indices) -> lib.Table: ...
+
+    def head(self, num_rows: int) -> lib.Table: ...
+
+    def count_rows(self) -> int: ...
+
+    def to_reader(self) -> RecordBatchReader: ...
+
+
+def get_partition_keys(partition_expression: Expression) -> dict[str, Any]: ...
+
+
+class WrittenFile(lib._Weakrefable):
+    def __init__(self, path: str, metadata: _parquet.FileMetaData |
+                 None, size: int) -> None: ...
+
+
+def _filesystemdataset_write(
+    data: Scanner,
+    base_dir: StrPath,
+    basename_template: str,
+    filesystem: SupportedFileSystem,
+    partitioning: Partitioning,
+    preserve_order: bool,
+    file_options: FileWriteOptions,
+    max_partitions: int,
+    file_visitor: Callable[[str], None] | None,
+    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"],
+    max_open_files: int,
+    max_rows_per_file: int,
+    min_rows_per_group: int,
+    max_rows_per_group: int,
+    create_dir: bool,
+): ...
+
+
+class _ScanNodeOptions(ExecNodeOptions):
+    def _set_options(self, dataset: Dataset, scan_options: dict) -> None: ...
+
+
+class ScanNodeOptions(_ScanNodeOptions):
+    def __init__(
+        self, dataset: Dataset, require_sequenced_output: bool = False, **kwargs
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/_dataset_orc.pyi b/python/pyarrow-stubs/pyarrow/_dataset_orc.pyi
new file mode 100644
index 00000000000..62f49bf5d30
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_dataset_orc.pyi
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from ._dataset import FileFormat
+
+
+class OrcFileFormat(FileFormat):
+    def equals(self, other: OrcFileFormat) -> bool: ...
+    @property
+    def default_extname(self): ...
diff --git a/python/pyarrow-stubs/pyarrow/_dataset_parquet.pyi b/python/pyarrow-stubs/pyarrow/_dataset_parquet.pyi
new file mode 100644
index 00000000000..6c27e3c8a93
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_dataset_parquet.pyi
@@ -0,0 +1,200 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Iterable
+from dataclasses import dataclass
+from typing import IO, Any, TypedDict
+
+from _typeshed import StrPath
+
+from ._compute import Expression
+from ._dataset import (
+    DatasetFactory,
+    FileFormat,
+    FileFragment,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+)
+from ._dataset_parquet_encryption import ParquetDecryptionConfig
+from ._fs import SupportedFileSystem
+from ._parquet import FileDecryptionProperties, FileMetaData
+from ._types import DataType, LargeListType, ListType
+from .lib import CacheOptions, Schema, _Weakrefable, NativeFile, Buffer, BufferReader
+
+parquet_encryption_enabled: bool
+
+
+class ParquetFileFormat(FileFormat):
+    def __init__(
+        self,
+        read_options: ParquetReadOptions | None = None,
+        default_fragment_scan_options: ParquetFragmentScanOptions | None = None,
+        *,
+        pre_buffer: bool = True,
+        coerce_int96_timestamp_unit: str | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+        arrow_extensions_enabled: bool = True,
+        binary_type: DataType | None = None,
+        list_type: type[ListType | LargeListType] | None = None,
+        use_buffered_stream: bool = False,
+        buffer_size: int = 8192,
+        dictionary_columns: list[str] | set[str] | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+    ) -> None: ...
+    @property
+    def read_options(self) -> ParquetReadOptions: ...
+    def make_write_options(
+        self, **kwargs) -> ParquetFileWriteOptions: ...  # type: ignore[override]
+
+    def equals(self, other: ParquetFileFormat) -> bool: ...
+    @property
+    def default_extname(self) -> str: ...
+
+    def make_fragment(
+        self,
+        file: StrPath | IO | Buffer | BufferReader,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        row_groups: Iterable[int] | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment: ...
+
+
+class _NameStats(TypedDict):
+    min: Any
+    max: Any
+
+
+class RowGroupInfo:
+    id: int
+    metadata: FileMetaData
+    schema: Schema
+
+    def __init__(self, id: int, metadata: FileMetaData, schema: Schema) -> None: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def statistics(self) -> dict[str, _NameStats]: ...
+
+
+class ParquetFileFragment(FileFragment):
+    def ensure_complete_metadata(self) -> None: ...
+    @property
+    def path(self) -> str: ...
+    @property
+    def filesystem(self) -> SupportedFileSystem: ...
+    def open(self) -> NativeFile: ...
+
+    @property
+    def row_groups(self) -> list[int]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def num_row_groups(self) -> int: ...
+
+    def split_by_row_group(
+        self, filter: Expression | None = None, schema: Schema | None = None
+    ) -> list[Fragment]: ...
+
+    def subset(
+        self,
+        filter: Expression | None = None,
+        schema: Schema | None = None,
+        row_group_ids: list[int] | None = None,
+    ) -> ParquetFileFormat: ...
+
+
+class ParquetReadOptions(_Weakrefable):
+    def __init__(
+        self,
+        dictionary_columns: list[str] | set[str] | None = None,
+        coerce_int96_timestamp_unit: str | None = None,
+        binary_type: DataType | None = None,
+        list_type: type[ListType | LargeListType] | None = None,
+    ) -> None: ...
+
+    @property
+    def dictionary_columns(self) -> set[str]: ...
+    @dictionary_columns.setter
+    def dictionary_columns(self, columns: list[str] | set[str]) -> None: ...
+
+    @property
+    def coerce_int96_timestamp_unit(self) -> str: ...
+    @coerce_int96_timestamp_unit.setter
+    def coerce_int96_timestamp_unit(self, unit: str) -> None: ...
+
+    @property
+    def binary_type(self) -> DataType: ...
+    @binary_type.setter
+    def binary_type(self, type: DataType | None) -> None: ...
+
+    @property
+    def list_type(self) -> type[ListType | LargeListType]: ...
+    @list_type.setter
+    def list_type(self, type: type[ListType | LargeListType] | None) -> None: ...
+
+    def equals(self, other: ParquetReadOptions) -> bool: ...
+
+
+class ParquetFileWriteOptions(FileWriteOptions):
+    def update(self, **kwargs) -> None: ...
+    def _set_properties(self) -> None: ...
+    def _set_arrow_properties(self) -> None: ...
+    def _set_encryption_config(self) -> None: ...
+    # accept passthrough options used in tests
+    def __init__(self, **kwargs) -> None: ...
+
+
+@dataclass(kw_only=True)
+class ParquetFragmentScanOptions(FragmentScanOptions):
+    use_buffered_stream: bool = False
+    buffer_size: int = 8192
+    pre_buffer: bool = True
+    cache_options: CacheOptions | None = None
+    thrift_string_size_limit: int | None = None
+    thrift_container_size_limit: int | None = None
+    decryption_config: ParquetDecryptionConfig | None = None
+    decryption_properties: FileDecryptionProperties | None = None
+    page_checksum_verification: bool = False
+
+    def equals(self, other: ParquetFragmentScanOptions) -> bool: ...
+
+
+@dataclass
+class ParquetFactoryOptions(_Weakrefable):
+
+    partition_base_dir: str | None = None
+    partitioning: Partitioning | PartitioningFactory | None = None
+    validate_column_chunk_paths: bool = False
+
+
+class ParquetDatasetFactory(DatasetFactory):
+    def __init__(
+        self,
+        metadata_path: str,
+        filesystem: SupportedFileSystem,
+        format: FileFormat,
+        options: ParquetFactoryOptions | None = None,
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/_dataset_parquet_encryption.pyi b/python/pyarrow-stubs/pyarrow/_dataset_parquet_encryption.pyi
new file mode 100644
index 00000000000..b36f18522e5
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_dataset_parquet_encryption.pyi
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from ._dataset_parquet import ParquetFileWriteOptions, ParquetFragmentScanOptions
+from ._parquet import FileDecryptionProperties
+from ._parquet_encryption import (CryptoFactory, EncryptionConfiguration,
+                                  DecryptionConfiguration, KmsConnectionConfig)
+from .lib import _Weakrefable
+
+
+class ParquetEncryptionConfig(_Weakrefable):
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> None: ...
+
+
+class ParquetDecryptionConfig(_Weakrefable):
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        decryption_config: DecryptionConfiguration,
+    ) -> None: ...
+
+
+def set_encryption_config(
+    opts: ParquetFileWriteOptions,
+    config: ParquetEncryptionConfig,
+) -> None: ...
+
+
+def set_decryption_properties(
+    opts: ParquetFragmentScanOptions,
+    config: FileDecryptionProperties,
+): ...
+
+
+def set_decryption_config(
+    opts: ParquetFragmentScanOptions,
+    config: ParquetDecryptionConfig,
+): ...
diff --git a/python/pyarrow-stubs/pyarrow/_feather.pyi b/python/pyarrow-stubs/pyarrow/_feather.pyi
new file mode 100644
index 00000000000..2f4757cd5f1
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_feather.pyi
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Literal
+from collections.abc import Sequence
+
+from _typeshed import StrPath
+
+from .lib import Buffer, NativeFile, Table, _Weakrefable
+
+
+class FeatherError(Exception):
+    ...
+
+
+def write_feather(
+    table: Table,
+    dest: StrPath | IO | NativeFile,
+    compression: str | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: Literal[1, 2] = 2,
+): ...
+
+
+class FeatherReader(_Weakrefable):
+    def __init__(
+        self,
+        source: StrPath | IO | NativeFile | Buffer,
+        use_memory_map: bool,
+        use_threads: bool,
+    ) -> None: ...
+    @property
+    def version(self) -> str: ...
+    def read(self) -> Table: ...
+    def read_indices(self, indices: Sequence[int]) -> Table: ...
+    def read_names(self, names: Sequence[str]) -> Table: ...
diff --git a/python/pyarrow-stubs/pyarrow/_flight.pyi b/python/pyarrow-stubs/pyarrow/_flight.pyi
new file mode 100644
index 00000000000..03d6c6580ab
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_flight.pyi
@@ -0,0 +1,660 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import asyncio
+import enum
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from collections.abc import Generator, Iterable, Iterator, Sequence
+from typing import Any, Generic, NamedTuple, TypeVar
+from datetime import datetime
+from typing_extensions import deprecated
+
+from .ipc import _ReadPandasMixin, ReadStats
+from .lib import (
+    ArrowCancelled,
+    ArrowException,
+    ArrowInvalid,
+    Buffer,
+    IpcReadOptions,
+    IpcWriteOptions,
+    RecordBatch,
+    RecordBatchReader,
+    Scalar,
+    Schema,
+    Table,
+    _CRecordBatchWriter,
+    _Weakrefable,
+)
+
+_T = TypeVar("_T")
+
+
+class FlightCallOptions(_Weakrefable):
+    def __init__(
+        self,
+        timeout: float | None = None,
+        write_options: IpcWriteOptions | None = None,
+        headers: list[tuple[str | bytes, str | bytes]] | None = None,
+        read_options: IpcReadOptions | None = None,
+    ) -> None: ...
+
+
+class CertKeyPair(NamedTuple):
+    cert: str | bytes | None
+    key: str | bytes | None
+
+
+class FlightError(Exception):
+    extra_info: bytes
+
+
+class FlightInternalError(FlightError, ArrowException):
+    ...
+
+
+class FlightTimedOutError(FlightError, ArrowException):
+    ...
+
+
+class FlightCancelledError(FlightError, ArrowCancelled):
+    def __init__(self, message: str, *, extra_info: bytes | None = None) -> None: ...
+
+
+class FlightServerError(FlightError, ArrowException):
+    ...
+
+
+class FlightUnauthenticatedError(FlightError, ArrowException):
+    ...
+
+
+class FlightUnauthorizedError(FlightError, ArrowException):
+    ...
+
+
+class FlightUnavailableError(FlightError, ArrowException):
+    ...
+
+
+class FlightWriteSizeExceededError(ArrowInvalid):
+    limit: int
+    actual: int
+
+
+class Action(_Weakrefable):
+    def __init__(
+        self, action_type: bytes | str, buf: Buffer | bytes | None) -> None: ...
+
+    @property
+    def type(self) -> str: ...
+
+    @property
+    def body(self) -> Buffer: ...
+
+    def serialize(self) -> bytes: ...
+
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+
+class ActionType(NamedTuple):
+    type: str
+    description: str
+
+    def make_action(self, buf: Buffer | bytes) -> Action: ...
+
+
+class Result(_Weakrefable):
+    def __init__(self, buf: Buffer | bytes) -> None: ...
+
+    @property
+    def body(self) -> Buffer: ...
+
+    def serialize(self) -> bytes: ...
+
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+
+class BasicAuth(_Weakrefable):
+    def __init__(
+        self, username: str | bytes | None = None, password: str | bytes | None = None
+    ) -> None: ...
+
+    @property
+    def username(self) -> bytes: ...
+    @property
+    def password(self) -> bytes: ...
+    def serialize(self) -> str: ...
+    @staticmethod
+    def deserialize(serialized: str | bytes) -> BasicAuth: ...
+
+
+class DescriptorType(enum.Enum):
+    UNKNOWN = 0
+    PATH = 1
+    CMD = 2
+
+
+class FlightMethod(enum.Enum):
+    INVALID = 0
+    HANDSHAKE = 1
+    LIST_FLIGHTS = 2
+    GET_FLIGHT_INFO = 3
+    GET_SCHEMA = 4
+    DO_GET = 5
+    DO_PUT = 6
+    DO_ACTION = 7
+    LIST_ACTIONS = 8
+    DO_EXCHANGE = 9
+
+
+class FlightDescriptor(_Weakrefable):
+    @staticmethod
+    def for_path(*path: str | bytes) -> FlightDescriptor: ...
+
+    @staticmethod
+    def for_command(command: str | bytes) -> FlightDescriptor: ...
+
+    @property
+    def descriptor_type(self) -> DescriptorType: ...
+
+    @property
+    def path(self) -> list[bytes] | None: ...
+
+    @property
+    def command(self) -> bytes | None: ...
+
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+
+class Ticket(_Weakrefable):
+    def __init__(self, ticket: str | bytes) -> None: ...
+    @property
+    def ticket(self) -> bytes: ...
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+
+class Location(_Weakrefable):
+    def __init__(self, uri: str | bytes) -> None: ...
+    @property
+    def uri(self) -> bytes: ...
+    def equals(self, other: Location) -> bool: ...
+    @staticmethod
+    def for_grpc_tcp(host: str | bytes, port: int) -> Location: ...
+
+    @staticmethod
+    def for_grpc_tls(host: str | bytes, port: int) -> Location: ...
+
+    @staticmethod
+    def for_grpc_unix(path: str | bytes) -> Location: ...
+
+
+class FlightEndpoint(_Weakrefable):
+    def __init__(
+        self,
+        ticket: Ticket | str | bytes | object,
+        locations: list[str | bytes | Location | object],
+        expiration_time: Scalar[Any] | str | datetime | None = ...,
+        app_metadata: bytes | str | object = ...,
+    ): ...
+
+    @property
+    def ticket(self) -> Ticket: ...
+
+    @property
+    def locations(self) -> list[Location]: ...
+
+    def serialize(self) -> bytes: ...
+    @property
+    def expiration_time(self) -> Scalar[Any] | None: ...
+
+    @property
+    def app_metadata(self) -> bytes | str: ...
+
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+
+class SchemaResult(_Weakrefable):
+    def __init__(self, schema: Schema) -> None: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+
+class FlightInfo(_Weakrefable):
+    def __init__(
+        self,
+        schema: Schema | None,
+        descriptor: FlightDescriptor,
+        endpoints: list[FlightEndpoint],
+        total_records: int | None = ...,
+        total_bytes: int | None = ...,
+        ordered: bool = ...,
+        app_metadata: bytes | str = ...,
+    ) -> None: ...
+
+    @property
+    def schema(self) -> Schema | None: ...
+
+    @property
+    def descriptor(self) -> FlightDescriptor: ...
+
+    @property
+    def endpoints(self) -> list[FlightEndpoint]: ...
+
+    @property
+    def total_records(self) -> int: ...
+
+    @property
+    def total_bytes(self) -> int: ...
+
+    @property
+    def ordered(self) -> bool: ...
+
+    @property
+    def app_metadata(self) -> bytes | str: ...
+
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+
+class FlightStreamChunk(_Weakrefable):
+    @property
+    def data(self) -> RecordBatch | None: ...
+    @property
+    def app_metadata(self) -> Buffer | None: ...
+    def __iter__(self): ...
+
+
+class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
+    # Needs to be separate class so the "real" class can subclass the
+    # pure-Python mixin class
+
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> FlightStreamChunk: ...
+    @property
+    def schema(self) -> Schema: ...
+
+    def read_all(self) -> Table: ...
+
+    def read_chunk(self) -> FlightStreamChunk: ...
+
+    def to_reader(self) -> RecordBatchReader: ...
+
+
+class MetadataRecordBatchReader(_MetadataRecordBatchReader):
+    @property
+    def stats(self) -> ReadStats: ...
+
+
+class FlightStreamReader(MetadataRecordBatchReader):
+    @property
+    def stats(self) -> ReadStats: ...
+
+    def cancel(self) -> None: ...
+
+    def read_all(self) -> Table: ...
+
+    def read(self) -> RecordBatch | None: ...
+
+
+class MetadataRecordBatchWriter(_CRecordBatchWriter):
+    def begin(self, schema: Schema, options: IpcWriteOptions | None = None) -> None: ...
+
+    def write_metadata(self, buf: Buffer | bytes) -> None: ...
+
+    def write_batch(self, batch: RecordBatch) -> None: ...  # type: ignore[override]
+
+    def write_table(self, table: Table, max_chunksize: int |
+                    None = None, **kwargs) -> None: ...
+
+    def close(self) -> None: ...
+
+    def write_with_metadata(self, batch: RecordBatch, buf: Buffer | bytes) -> None: ...
+
+
+class FlightStreamWriter(MetadataRecordBatchWriter):
+    def done_writing(self) -> None: ...
+
+
+class FlightMetadataReader(_Weakrefable):
+    def read(self) -> Buffer | None: ...
+
+
+class FlightMetadataWriter(_Weakrefable):
+    def write(self, message: Buffer) -> None: ...
+
+
+class AsyncioCall(Generic[_T]):
+    _future: asyncio.Future[_T]
+
+    def as_awaitable(self) -> asyncio.Future[_T]: ...
+    def wakeup(self, result_or_exception: BaseException | _T) -> None: ...
+
+
+class AsyncioFlightClient:
+    def __init__(self, client: FlightClient) -> None: ...
+
+    async def get_flight_info(
+        self,
+        descriptor: FlightDescriptor,
+        *,
+        options: FlightCallOptions | None = None,
+    ): ...
+
+
+class FlightClient(_Weakrefable):
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location,
+        *,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        middleware: list[ClientMiddlewareFactory] | None = None,
+        write_size_limit_bytes: int | None = None,
+        disable_server_verification: bool = False,
+        generic_options: list[tuple[str, int | str]] | None = None,
+    ): ...
+
+    @property
+    def supports_async(self) -> bool: ...
+    def as_async(self) -> AsyncioFlightClient: ...
+    def wait_for_available(self, timeout: int = 5) -> None: ...
+
+    @classmethod
+    @deprecated(
+        "Use the ``FlightClient`` constructor or "
+        "``pyarrow.flight.connect`` function instead."
+    )
+    def connect(
+        cls,
+        location: str | tuple[str, int] | Location,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        disable_server_verification: bool = False,
+    ) -> FlightClient: ...
+
+    def authenticate(
+        self, auth_handler: ClientAuthHandler, options: FlightCallOptions | None = None
+    ) -> None: ...
+
+    def authenticate_basic_token(
+        self, username: str | bytes, password: str | bytes,
+        options: FlightCallOptions | None = None
+    ) -> tuple[str, str]: ...
+
+    def list_actions(self, options: FlightCallOptions |
+                     None = None) -> list[Action]: ...
+
+    def do_action(
+        self, action: Action | tuple[bytes | str, bytes | str] | str,
+        options: FlightCallOptions | None = None
+    ) -> Iterator[Result]: ...
+
+    def list_flights(
+        self, criteria: str | bytes | None = None,
+        options: FlightCallOptions | None = None
+    ) -> Generator[FlightInfo, None, None]: ...
+
+    def get_flight_info(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> FlightInfo: ...
+
+    def get_schema(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> SchemaResult: ...
+
+    def do_get(
+        self, ticket: Ticket, options: FlightCallOptions | None = None
+    ) -> FlightStreamReader: ...
+
+    def do_put(
+        self,
+        descriptor: FlightDescriptor,
+        schema: Schema | None,
+        options: FlightCallOptions | None = None,
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]: ...
+
+    def do_exchange(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]: ...
+
+    def close(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, traceback) -> None: ...
+
+
+class FlightDataStream(_Weakrefable):
+    ...
+
+
+class RecordBatchStream(FlightDataStream):
+    def __init__(self, data_source: RecordBatchReader | Table | None = None,
+                 options: IpcWriteOptions | None = None) -> None: ...
+
+
+class GeneratorStream(FlightDataStream):
+    def __init__(
+        self,
+        schema: Schema,
+        generator: Iterable[
+            FlightDataStream
+            | Table
+            | RecordBatch
+            | RecordBatchReader
+            | tuple[RecordBatch, bytes]
+        ],
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+
+class ServerCallContext(_Weakrefable):
+    def peer_identity(self) -> bytes: ...
+
+    def peer(self) -> str: ...
+
+    # Set safe=True as gRPC on Windows sometimes gives garbage bytes
+    def is_cancelled(self) -> bool: ...
+
+    def add_header(self, key: str, value: str) -> None: ...
+
+    def add_trailer(self, key: str, value: str) -> None: ...
+
+    def get_middleware(self, key: str) -> ServerMiddleware | None: ...
+
+
+class ServerAuthReader(_Weakrefable):
+    def read(self) -> str: ...
+
+
+class ServerAuthSender(_Weakrefable):
+    def write(self, message: str) -> None: ...
+
+
+class ClientAuthReader(_Weakrefable):
+    def read(self) -> str: ...
+
+
+class ClientAuthSender(_Weakrefable):
+    def write(self, message: str) -> None: ...
+
+
+class ServerAuthHandler(_Weakrefable):
+    def authenticate(self, outgoing: ServerAuthSender, incoming: ServerAuthReader): ...
+
+    def is_valid(self, token: str) -> bool: ...
+
+
+class ClientAuthHandler(_Weakrefable):
+    def authenticate(self, outgoing: ClientAuthSender, incoming: ClientAuthReader): ...
+
+    def get_token(self) -> str: ...
+
+
+class CallInfo(NamedTuple):
+    method: FlightMethod
+
+
+class ClientMiddlewareFactory(_Weakrefable):
+    def start_call(self, info: CallInfo) -> ClientMiddleware | None: ...
+
+
+class ClientMiddleware(_Weakrefable):
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]: ...
+
+    def received_headers(self, headers: dict[str, list[str] | list[bytes]]): ...
+
+    def call_completed(self, exception: ArrowException): ...
+
+
+class ServerMiddlewareFactory(_Weakrefable):
+    def start_call(
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> ServerMiddleware | None: ...
+
+
+class TracingServerMiddlewareFactory(ServerMiddlewareFactory):
+    ...
+
+
+class ServerMiddleware(_Weakrefable):
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]: ...
+
+    def call_completed(self, exception: ArrowException): ...
+
+    @property
+    def trace_context(self) -> dict: ...
+
+
+class TracingServerMiddleware(ServerMiddleware):
+    trace_context: dict
+    def __init__(self, trace_context: dict) -> None: ...
+
+
+class _ServerMiddlewareFactoryWrapper(ServerMiddlewareFactory):
+    def __init__(self, factories: dict[str, ServerMiddlewareFactory]) -> None: ...
+
+    def start_call(  # type: ignore[override]
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> _ServerMiddlewareFactoryWrapper | None: ...
+
+
+class _ServerMiddlewareWrapper(ServerMiddleware):
+    def __init__(self, middleware: dict[str, ServerMiddleware]) -> None: ...
+    def send_headers(self) -> dict[str, dict[str, list[str] | list[bytes]]]: ...
+    def call_completed(self, exception: ArrowException) -> None: ...
+
+
+class _FlightServerFinalizer(_Weakrefable):
+
+    def finalize(self) -> None: ...
+
+
+class FlightServerBase(_Weakrefable):
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location | None = None,
+        auth_handler: ServerAuthHandler | None = None,
+        tls_certificates: list[tuple[str, str]] | None = None,
+        verify_client: bool = False,
+        root_certificates: str | None = None,
+        middleware: dict[str, ServerMiddlewareFactory] | None = None,
+    ): ...
+
+    @property
+    def port(self) -> int: ...
+
+    def list_flights(self, context: ServerCallContext,
+                     criteria: str) -> Iterator[FlightInfo]: ...
+
+    def get_flight_info(
+        self, context: ServerCallContext, descriptor: FlightDescriptor
+    ) -> FlightInfo: ...
+
+    def get_schema(self, context: ServerCallContext,
+                   descriptor: FlightDescriptor) -> Schema: ...
+
+    def do_put(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: FlightMetadataWriter,
+    ) -> None: ...
+
+    def do_get(self, context: ServerCallContext,
+               ticket: Ticket) -> FlightDataStream: ...
+
+    def do_exchange(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: MetadataRecordBatchWriter,
+    ) -> None: ...
+
+    def list_actions(self, context: ServerCallContext) -> Iterable[Action]: ...
+
+    def do_action(self, context: ServerCallContext,
+                  action: Action) -> Iterable[bytes]: ...
+
+    def serve(self) -> None: ...
+
+    def run(self) -> None: ...
+
+    def shutdown(self) -> None: ...
+
+    def wait(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(
+        self, exc_type: object, exc_value: object, traceback: object) -> None: ...
+
+
+def connect(
+    location: str | tuple[str, int] | Location,
+    *,
+    tls_root_certs: str | None = None,
+    cert_chain: str | None = None,
+    private_key: str | None = None,
+    override_hostname: str | None = None,
+    middleware: list[ClientMiddlewareFactory] | None = None,
+    write_size_limit_bytes: int | None = None,
+    disable_server_verification: bool = False,
+    generic_options: Sequence[tuple[str, int | str]] | None = None,
+) -> FlightClient: ...
diff --git a/python/pyarrow-stubs/pyarrow/_fs.pyi b/python/pyarrow-stubs/pyarrow/_fs.pyi
new file mode 100644
index 00000000000..caf23a75d99
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_fs.pyi
@@ -0,0 +1,234 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+import enum
+import sys
+
+from abc import ABC, abstractmethod
+from _typeshed import StrPath
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from fsspec import AbstractFileSystem  # type: ignore
+
+from .lib import NativeFile, _Weakrefable
+
+
+class FileType(enum.IntFlag):
+    NotFound = enum.auto()
+    Unknown = enum.auto()
+    File = enum.auto()
+    Directory = enum.auto()
+
+
+class FileInfo(_Weakrefable):
+    def __init__(
+        self,
+        path: str,
+        type: FileType = FileType.Unknown,
+        *,
+        mtime: dt.datetime | float | None = None,
+        mtime_ns: int | None = None,
+        size: int | None = None,
+    ): ...
+
+    def __getitem__(self, int) -> FileInfo: ...
+
+    @property
+    def type(self) -> FileType: ...
+
+    @property
+    def is_file(self) -> bool: ...
+    @property
+    def path(self) -> str: ...
+
+    @property
+    def base_name(self) -> str: ...
+
+    @property
+    def size(self) -> int: ...
+
+    @property
+    def extension(self) -> str: ...
+
+    @property
+    def mtime(self) -> dt.datetime | None: ...
+
+    @property
+    def mtime_ns(self) -> int | None: ...
+
+
+class FileSelector(_Weakrefable):
+    base_dir: str
+    allow_not_found: bool
+    recursive: bool
+    def __init__(self, base_dir: str, allow_not_found: bool = False,
+                 recursive: bool = False): ...
+
+
+class FileSystem(_Weakrefable):
+    @classmethod
+    def from_uri(cls, uri: str | StrPath) -> tuple[Self, str]: ...
+
+    def equals(self, other: FileSystem | object) -> bool: ...
+
+    @property
+    def type_name(self) -> str: ...
+
+    def get_file_info(
+        self, paths_or_selector: str | list[str] | FileSelector
+    ) -> list[FileInfo] | FileInfo: ...
+
+    def create_dir(self, path: str, *, recursive: bool = True) -> None: ...
+
+    def delete_dir(self, path: str) -> None: ...
+
+    def delete_dir_contents(
+        self, path: str, *, accept_root_dir: bool = False, missing_dir_ok: bool = False
+    ) -> None: ...
+
+    def move(self, src: str, dest: str) -> None: ...
+
+    def copy_file(self, src: str, dest: str) -> None: ...
+
+    def delete_file(self, path: str) -> None: ...
+
+    def open_input_file(self, path: str) -> NativeFile: ...
+
+    def open_input_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None) -> NativeFile: ...
+
+    def open_output_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ) -> NativeFile: ...
+
+    def open_append_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ): ...
+
+    def normalize_path(self, path: str) -> str: ...
+
+
+class LocalFileSystem(FileSystem):
+    def __init__(self, *, use_mmap: bool = False) -> None: ...
+
+
+class SubTreeFileSystem(FileSystem):
+    def __init__(self, base_path: str, base_fs: FileSystem): ...
+    @property
+    def base_path(self) -> str: ...
+    @property
+    def base_fs(self) -> FileSystem: ...
+
+
+class _MockFileSystem(FileSystem):
+    def __init__(self, current_time: dt.datetime | None = None) -> None: ...
+
+
+class PyFileSystem(FileSystem):
+    def __init__(self, handler: FileSystemHandler | None) -> None: ...
+    @property
+    def handler(self) -> FileSystemHandler: ...
+
+
+class FileSystemHandler(ABC):
+    @abstractmethod
+    def get_type_name(self) -> str: ...
+
+    @abstractmethod
+    def get_file_info(self, paths: str | list[str]) -> FileInfo | list[FileInfo]: ...
+
+    @abstractmethod
+    def get_file_info_selector(self, selector: FileSelector) -> list[FileInfo]: ...
+
+    @abstractmethod
+    def create_dir(self, path: str, recursive: bool) -> None: ...
+
+    @abstractmethod
+    def delete_dir(self, path: str) -> None: ...
+
+    @abstractmethod
+    def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None: ...
+
+    @abstractmethod
+    def delete_root_dir_contents(self) -> None: ...
+
+    @abstractmethod
+    def delete_file(self, path: str) -> None: ...
+
+    @abstractmethod
+    def move(self, src: str, dest: str) -> None: ...
+
+    @abstractmethod
+    def copy_file(self, src: str, dest: str) -> None: ...
+
+    @abstractmethod
+    def open_input_stream(self, path: str) -> NativeFile: ...
+
+    @abstractmethod
+    def open_input_file(self, path: str) -> NativeFile: ...
+
+    @abstractmethod
+    def open_output_stream(self, path: str, metadata: dict[str, str]) -> NativeFile: ...
+
+    @abstractmethod
+    def open_append_stream(self, path: str, metadata: dict[str, str]) -> NativeFile: ...
+
+    @abstractmethod
+    def normalize_path(self, path: str) -> str: ...
+
+
+SupportedFileSystem: TypeAlias = AbstractFileSystem | FileSystem
+
+
+def _copy_files(
+    source_fs: FileSystem,
+    source_path: str,
+    destination_fs: SupportedFileSystem | None,
+    destination_path: str,
+    chunk_size: int = 1048576,
+    use_threads: bool = True,
+) -> None: ...
+
+
+def _copy_files_selector(
+    source_fs: FileSystem,
+    source_sel: FileSelector,
+    destination_fs: SupportedFileSystem | None,
+    destination_base_dir: str,
+    chunk_size: int = 1048576,
+    use_threads: bool = True,
+) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/_gcsfs.pyi b/python/pyarrow-stubs/pyarrow/_gcsfs.pyi
new file mode 100644
index 00000000000..a0af3fa3871
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_gcsfs.pyi
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+
+class GcsFileSystem(FileSystem):
+    def __init__(
+        self,
+        *,
+        anonymous: bool = False,
+        access_token: str | None = None,
+        target_service_account: str | None = None,
+        credential_token_expiration: dt.datetime | None = None,
+        default_bucket_location: str = "US",
+        scheme: str = "https",
+        endpoint_override: str | None = None,
+        default_metadata: dict | KeyValueMetadata | None = None,
+        retry_time_limit: dt.timedelta | None = None,
+        project_id: str | None = None,
+    ): ...
+    @property
+    def default_bucket_location(self) -> str: ...
+
+    @property
+    def project_id(self) -> str: ...
diff --git a/python/pyarrow-stubs/pyarrow/_hdfs.pyi b/python/pyarrow-stubs/pyarrow/_hdfs.pyi
new file mode 100644
index 00000000000..370eaf70927
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_hdfs.pyi
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from _typeshed import StrPath
+
+from ._fs import FileSystem
+
+
+class HadoopFileSystem(FileSystem):
+    def __init__(
+        self,
+        host: str | None = None,
+        port: int = 8020,
+        *,
+        user: str | None = None,
+        replication: int = 3,
+        buffer_size: int = 0,
+        default_block_size: int | None = None,
+        kerb_ticket: StrPath | None = None,
+        extra_conf: dict | None = None,
+    ): ...
+    @staticmethod
+    def from_uri(uri: str | int) -> HadoopFileSystem: ...  # type: ignore[override]
diff --git a/python/pyarrow-stubs/pyarrow/_ipc.pyi b/python/pyarrow-stubs/pyarrow/_ipc.pyi
new file mode 100644
index 00000000000..5a87f243904
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_ipc.pyi
@@ -0,0 +1,317 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+import sys
+
+from io import IOBase
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from collections.abc import Iterable, Iterator, Mapping
+from typing import Any, Literal, NamedTuple
+
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportPyBuffer
+from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable
+
+from .io import Buffer, Codec, NativeFile, BufferReader
+from ._types import DictionaryMemo, KeyValueMetadata
+
+
+class MetadataVersion(enum.IntEnum):
+    V1 = enum.auto()
+    V2 = enum.auto()
+    V3 = enum.auto()
+    V4 = enum.auto()
+    V5 = enum.auto()
+
+
+class Alignment(enum.IntEnum):
+    Any = enum.auto()
+    At64Byte = enum.auto()
+    DataTypeSpecific = enum.auto()
+
+
+class WriteStats(NamedTuple):
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+
+class ReadStats(NamedTuple):
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+
+class IpcReadOptions(_Weakrefable):
+    ensure_native_endian: bool
+    use_threads: bool
+    ensure_alignment: Alignment
+    included_fields: list[int] | None
+
+    def __init__(
+        self,
+        *,
+        ensure_native_endian: bool = True,
+        use_threads: bool = True,
+        ensure_alignment: Alignment = ...,
+        included_fields: list[int] | None = None,
+    ) -> None: ...
+
+
+class IpcWriteOptions(_Weakrefable):
+    metadata_version: Any
+    allow_64bit: bool
+    use_legacy_format: bool
+    compression: Any
+    use_threads: bool
+    emit_dictionary_deltas: bool
+    unify_dictionaries: bool
+
+    def __init__(
+        self,
+        *,
+        metadata_version: MetadataVersion = MetadataVersion.V5,
+        allow_64bit: bool = False,
+        use_legacy_format: bool = False,
+        compression: Codec | Literal["lz4", "zstd"] | None = None,
+        use_threads: bool = True,
+        emit_dictionary_deltas: bool = False,
+        unify_dictionaries: bool = False,
+    ) -> None: ...
+
+
+class Message(_Weakrefable):
+    @property
+    def type(self) -> str: ...
+    @property
+    def metadata(self) -> Buffer: ...
+    @property
+    def metadata_version(self) -> MetadataVersion: ...
+    @property
+    def body(self) -> Buffer | None: ...
+    def equals(self, other: Message) -> bool: ...
+
+    def serialize_to(self, sink: NativeFile, alignment: int = 8,
+                     memory_pool: MemoryPool | None = None): ...
+
+    def serialize(self, alignment: int = 8, memory_pool: MemoryPool |
+                  None = None) -> Buffer: ...
+
+
+class MessageReader(_Weakrefable):
+    @classmethod
+    def open_stream(cls, source: bytes | NativeFile |
+                    IOBase | SupportPyBuffer) -> Self: ...
+
+    def __iter__(self) -> Self: ...
+    def read_next_message(self) -> Message: ...
+
+    __next__ = read_next_message
+
+# ----------------------------------------------------------------------
+# File and stream readers and writers
+
+
+class _CRecordBatchWriter(_Weakrefable):
+    def write(self, table_or_batch: Table | RecordBatch): ...
+
+    def write_batch(
+        self,
+        batch: RecordBatch,
+        custom_metadata: Mapping[bytes, bytes] | KeyValueMetadata | None = None,
+    ): ...
+
+    def write_table(self, table: Table, max_chunksize: int | None = None) -> None: ...
+
+    def close(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def stats(self) -> WriteStats: ...
+
+
+class _RecordBatchStreamWriter(_CRecordBatchWriter):
+    @property
+    def _use_legacy_format(self) -> bool: ...
+    @property
+    def _metadata_version(self) -> MetadataVersion: ...
+
+    def _open(
+        self,
+        sink,
+        schema: Schema,
+        options: IpcWriteOptions = IpcWriteOptions(),  # noqa: Y011
+        metadata: dict[bytes, bytes] | None = None,
+    ): ...
+
+
+class _ReadPandasMixin:
+    def read_pandas(self, **options) -> pd.DataFrame: ...
+
+
+class RecordBatchReader(_ReadPandasMixin, _Weakrefable):
+    def __iter__(self) -> Self: ...
+    def read_next_batch(self) -> RecordBatch: ...
+
+    __next__ = read_next_batch
+    @property
+    def schema(self) -> Schema: ...
+
+    def read_next_batch_with_custom_metadata(self) -> RecordBatchWithMetadata: ...
+
+    def iter_batches_with_custom_metadata(
+        self,
+    ) -> Iterator[RecordBatchWithMetadata]: ...
+
+    def read_all(self) -> Table: ...
+
+    def close(self) -> None: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    def cast(self, target_schema: Schema) -> Self: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self: ...
+
+    @classmethod
+    def from_stream(cls, data: Any,
+                    schema: Any = None) -> Self: ...
+
+    @classmethod
+    def from_batches(cls, schema: Any, batches: Iterable[RecordBatch]) -> Self: ...
+
+
+class _RecordBatchStreamReader(RecordBatchReader):
+    @property
+    def stats(self) -> ReadStats: ...
+
+    def _open(
+        self,
+        source,
+        options: IpcReadOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Self: ...
+
+
+class _RecordBatchFileWriter(_RecordBatchStreamWriter):
+    ...
+
+
+class RecordBatchWithMetadata(NamedTuple):
+    batch: RecordBatch
+    custom_metadata: KeyValueMetadata
+
+
+class _RecordBatchFileReader(_ReadPandasMixin, _Weakrefable):
+    @property
+    def num_record_batches(self) -> int: ...
+
+    def get_batch(self, i: int) -> RecordBatch: ...
+
+    get_record_batch = get_batch
+    def get_batch_with_custom_metadata(self, i: int) -> RecordBatchWithMetadata: ...
+
+    def read_all(self) -> Table: ...
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def stats(self) -> ReadStats: ...
+    @property
+    def metadata(self) -> KeyValueMetadata | None: ...
+
+    def _open(
+        self,
+        source,
+        footer_offset: int | None = None,
+        options: IpcReadOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Self: ...
+
+
+def get_tensor_size(tensor: Tensor) -> int: ...
+
+
+def get_record_batch_size(batch: RecordBatch) -> int: ...
+
+
+def write_tensor(tensor: Tensor, dest: NativeFile) -> int: ...
+
+
+def read_tensor(source: NativeFile) -> Tensor: ...
+
+
+def read_message(source: NativeFile | IOBase | SupportPyBuffer) -> Message: ...
+
+
+def read_schema(obj: Buffer | Message | BufferReader, dictionary_memo: DictionaryMemo |
+                None = None) -> Schema: ...
+
+
+def read_record_batch(
+    obj: Message | SupportPyBuffer,
+    schema: Schema,
+    dictionary_memo: DictionaryMemo | None = None) -> RecordBatch: ...
+
+
+__all__ = [
+    "MetadataVersion",
+    "Alignment",
+    "WriteStats",
+    "ReadStats",
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "_CRecordBatchWriter",
+    "_RecordBatchStreamWriter",
+    "_ReadPandasMixin",
+    "RecordBatchReader",
+    "_RecordBatchStreamReader",
+    "_RecordBatchFileWriter",
+    "RecordBatchWithMetadata",
+    "_RecordBatchFileReader",
+    "get_tensor_size",
+    "get_record_batch_size",
+    "write_tensor",
+    "read_tensor",
+    "read_message",
+    "read_schema",
+    "read_record_batch",
+]
diff --git a/python/pyarrow-stubs/pyarrow/_json.pyi b/python/pyarrow-stubs/pyarrow/_json.pyi
new file mode 100644
index 00000000000..bae2ff404f0
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_json.pyi
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from .lib import MemoryPool, RecordBatchReader, Schema, Table, _Weakrefable
+
+
+class ReadOptions(_Weakrefable):
+    use_threads: bool
+    block_size: int
+
+    def __init__(self, use_threads: bool | None = None,
+                 block_size: int | None = None): ...
+
+    def equals(self, other: ReadOptions) -> bool: ...
+
+
+class ParseOptions(_Weakrefable):
+    explicit_schema: Schema
+    newlines_in_values: bool
+    unexpected_field_behavior: Literal["ignore", "error", "infer"]
+
+    def __init__(
+        self,
+        explicit_schema: Schema | None = None,
+        newlines_in_values: bool | None = None,
+        unexpected_field_behavior: Literal["ignore", "error", "infer"] = "infer",
+    ): ...
+    def equals(self, other: ParseOptions) -> bool: ...
+
+
+class JSONStreamingReader(RecordBatchReader):
+    ...
+
+
+def read_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Table: ...
+
+
+def open_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> JSONStreamingReader: ...
diff --git a/python/pyarrow-stubs/pyarrow/_orc.pyi b/python/pyarrow-stubs/pyarrow/_orc.pyi
new file mode 100644
index 00000000000..faa0f57c1fd
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_orc.pyi
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import IO, Literal, Any
+
+from .lib import (
+    Buffer,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+
+class ORCReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def open(self, source: str | NativeFile | Buffer, use_memory_map: bool = True): ...
+    def metadata(self) -> KeyValueMetadata: ...
+    def schema(self) -> Schema: ...
+    def nrows(self) -> int: ...
+    def nstripes(self) -> int: ...
+    def file_version(self) -> str: ...
+    def software_version(self) -> str: ...
+    def compression(self) -> Literal["UNCOMPRESSED",
+                                     "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+
+    def compression_size(self) -> int: ...
+    def row_index_stride(self) -> int: ...
+    def writer(self) -> str: ...
+    def writer_version(self) -> str: ...
+    def nstripe_statistics(self) -> int: ...
+    def content_length(self) -> int: ...
+    def stripe_statistics_length(self) -> int: ...
+    def file_footer_length(self) -> int: ...
+    def file_postscript_length(self) -> int: ...
+    def file_length(self) -> int: ...
+    def serialized_file_tail(self) -> int: ...
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch: ...
+    def read(self, columns: list[str] | None = None) -> Table: ...
+
+
+class ORCWriter(_Weakrefable):
+    def open(
+        self,
+        where: str | NativeFile | IO,
+        *,
+        file_version: str | None = None,
+        batch_size: int | None = None,
+        stripe_size: int | None = None,
+        compression: Any = 'UNCOMPRESSED',
+        compression_block_size: int | None = None,
+        compression_strategy: Any = 'SPEED',
+        row_index_stride: int | None = None,
+        padding_tolerance: float | None = None,
+        dictionary_key_size_threshold: float | None = None,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float | None = None,
+    ) -> None: ...
+    def write(self, table: Table) -> None: ...
+    def close(self) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/_parquet.pyi b/python/pyarrow-stubs/pyarrow/_parquet.pyi
new file mode 100644
index 00000000000..2521936ad5c
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_parquet.pyi
@@ -0,0 +1,524 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Iterable, Iterator, Sequence
+from typing import IO, Any, Literal, TypeAlias, TypedDict
+
+from _typeshed import StrPath
+
+from ._stubs_typing import Order
+from .lib import (
+    Buffer,
+    ChunkedArray,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+    DataType,
+    ListType,
+    LargeListType
+)
+
+_PhysicalType: TypeAlias = Literal[
+    "BOOLEAN",
+    "INT32",
+    "INT64",
+    "INT96",
+    "FLOAT",
+    "DOUBLE",
+    "BYTE_ARRAY",
+    "FIXED_LEN_BYTE_ARRAY",
+    "UNKNOWN",
+]
+_LogicTypeName: TypeAlias = Literal[
+    "UNDEFINED",
+    "STRING",
+    "MAP",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME",
+    "TIMESTAMP",
+    "INT",
+    "FLOAT16",
+    "JSON",
+    "BSON",
+    "UUID",
+    "NONE",
+    "UNKNOWN",
+]
+_ConvertedType: TypeAlias = Literal[
+    "NONE",
+    "UTF8",
+    "MAP",
+    "MAP_KEY_VALUE",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME_MILLIS",
+    "TIME_MICROS",
+    "TIMESTAMP_MILLIS",
+    "TIMESTAMP_MICROS",
+    "UINT_8",
+    "UINT_16",
+    "UINT_32",
+    "UINT_64",
+    "INT_8",
+    "INT_16",
+    "INT_32",
+    "INT_64",
+    "JSON",
+    "BSON",
+    "INTERVAL",
+    "UNKNOWN",
+]
+_Encoding: TypeAlias = Literal[
+    "PLAIN",
+    "PLAIN_DICTIONARY",
+    "RLE",
+    "BIT_PACKED",
+    "DELTA_BINARY_PACKED",
+    "DELTA_LENGTH_BYTE_ARRAY",
+    "DELTA_BYTE_ARRAY",
+    "RLE_DICTIONARY",
+    "BYTE_STREAM_SPLIT",
+    "UNKNOWN",
+]
+_Compression: TypeAlias = Literal[
+    "UNCOMPRESSED",
+    "SNAPPY",
+    "GZIP",
+    "LZO",
+    "BROTLI",
+    "LZ4",
+    "ZSTD",
+    "UNKNOWN",
+]
+
+
+class _Statistics(TypedDict):
+    has_min_max: bool
+    min: Any | None
+    max: Any | None
+    null_count: int | None
+    distinct_count: int | None
+    num_values: int
+    physical_type: _PhysicalType
+
+
+class Statistics(_Weakrefable):
+    def to_dict(self) -> _Statistics: ...
+    def equals(self, other: Statistics) -> bool: ...
+    @property
+    def has_min_max(self) -> bool: ...
+    @property
+    def has_null_count(self) -> bool: ...
+    @property
+    def has_distinct_count(self) -> bool: ...
+    @property
+    def min_raw(self) -> Any | None: ...
+    @property
+    def max_raw(self) -> Any | None: ...
+    @property
+    def min(self) -> Any | None: ...
+    @property
+    def max(self) -> Any | None: ...
+    @property
+    def null_count(self) -> int | None: ...
+    @property
+    def distinct_count(self) -> int | None: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+    @property
+    def is_min_exact(self) -> bool: ...
+    @property
+    def is_max_exact(self) -> bool: ...
+
+
+class ParquetLogicalType(_Weakrefable):
+    def to_json(self) -> str: ...
+    @property
+    def type(self) -> _LogicTypeName: ...
+
+
+class _ColumnChunkMetaData(TypedDict):
+    file_offset: int
+    file_path: str | None
+    physical_type: _PhysicalType
+    num_values: int
+    path_in_schema: str
+    is_stats_set: bool
+    statistics: Statistics | None
+    compression: _Compression
+    encodings: tuple[_Encoding, ...]
+    has_dictionary_page: bool
+    dictionary_page_offset: int | None
+    data_page_offset: int
+    total_compressed_size: int
+    total_uncompressed_size: int
+
+
+class ColumnChunkMetaData(_Weakrefable):
+    def to_dict(self) -> _ColumnChunkMetaData: ...
+    def equals(self, other: ColumnChunkMetaData) -> bool: ...
+    @property
+    def file_offset(self) -> int: ...
+    @property
+    def file_path(self) -> str | None: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def path_in_schema(self) -> str: ...
+    @property
+    def is_stats_set(self) -> bool: ...
+    @property
+    def statistics(self) -> Statistics | None: ...
+    @property
+    def compression(self) -> _Compression: ...
+    @property
+    def encodings(self) -> tuple[_Encoding, ...]: ...
+    @property
+    def has_dictionary_page(self) -> bool: ...
+    @property
+    def dictionary_page_offset(self) -> int | None: ...
+    @property
+    def data_page_offset(self) -> int: ...
+    @property
+    def has_index_page(self) -> bool: ...
+    @property
+    def index_page_offset(self) -> int: ...
+    @property
+    def total_compressed_size(self) -> int: ...
+    @property
+    def total_uncompressed_size(self) -> int: ...
+    @property
+    def has_offset_index(self) -> bool: ...
+    @property
+    def has_column_index(self) -> bool: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def max_definition_level(self) -> int: ...
+    @property
+    def max_repetition_level(self) -> int: ...
+    @property
+    def converted_type(self) -> _ConvertedType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+
+
+class _SortingColumn(TypedDict):
+    column_index: int
+    descending: bool
+    nulls_first: bool
+
+
+class SortingColumn:
+    def __init__(
+        self, column_index: int, descending: bool = False, nulls_first: bool = False
+    ) -> None: ...
+
+    @classmethod
+    def from_ordering(
+        cls,
+        schema: Schema,
+        sort_keys: Sequence[str]
+        | Sequence[tuple[str, Order]]
+        | Sequence[str | tuple[str, Order]],
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> tuple[SortingColumn, ...]: ...
+
+    @staticmethod
+    def to_ordering(
+        schema: Schema, sorting_columns: tuple[SortingColumn, ...] | list[SortingColumn]
+    ) -> tuple[Sequence[tuple[str, Order]], Literal["at_start", "at_end"]]: ...
+    def __hash__(self) -> int: ...
+    @property
+    def column_index(self) -> int: ...
+    @property
+    def descending(self) -> bool: ...
+    @property
+    def nulls_first(self) -> bool: ...
+    def to_dict(self) -> _SortingColumn: ...
+
+
+class _RowGroupMetaData(TypedDict):
+    num_columns: int
+    num_rows: int
+    total_byte_size: int
+    columns: list[ColumnChunkMetaData]
+    sorting_columns: list[SortingColumn]
+
+
+class RowGroupMetaData(_Weakrefable):
+    def __init__(self, parent: FileMetaData, index: int) -> None: ...
+    def equals(self, other: RowGroupMetaData) -> bool: ...
+    def column(self, i: int) -> ColumnChunkMetaData: ...
+    def to_dict(self) -> _RowGroupMetaData: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def sorting_columns(self) -> list[SortingColumn]: ...
+
+
+class _FileMetaData(TypedDict):
+    created_by: str
+    num_columns: int
+    num_rows: int
+    num_row_groups: int
+    format_version: str
+    serialized_size: int
+    row_groups: list[Any]  # List of row group metadata dictionaries
+
+
+class FileMetaData(_Weakrefable):
+    def __hash__(self) -> int: ...
+    def to_dict(self) -> _FileMetaData: ...
+    def equals(self, other: FileMetaData) -> bool: ...
+    @property
+    def schema(self) -> ParquetSchema: ...
+    @property
+    def serialized_size(self) -> int: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    @property
+    def format_version(self) -> str: ...
+    @property
+    def created_by(self) -> str: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+    def row_group(self, i: int) -> RowGroupMetaData: ...
+    def set_file_path(self, path: str) -> None: ...
+    def append_row_groups(self, other: FileMetaData) -> None: ...
+    def write_metadata_file(self, where: StrPath | Buffer |
+                            NativeFile | IO) -> None: ...
+
+
+class ParquetSchema(_Weakrefable):
+    def __init__(self, container: FileMetaData) -> None: ...
+    def __getitem__(self, i: int) -> ColumnSchema: ...
+    def __hash__(self) -> int: ...
+    def __len__(self) -> int: ...
+    @property
+    def names(self) -> list[str]: ...
+    def to_arrow_schema(self) -> Schema: ...
+    def equals(self, other: ParquetSchema) -> bool: ...
+    def column(self, i: int) -> ColumnSchema: ...
+
+
+class ColumnSchema(_Weakrefable):
+    def __init__(self, schema: ParquetSchema, index: int) -> None: ...
+    def equals(self, other: ColumnSchema) -> bool: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def path(self) -> str: ...
+    @property
+    def max_definition_level(self) -> int: ...
+    @property
+    def max_repetition_level(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+    @property
+    def length(self) -> int | None: ...
+    @property
+    def precision(self) -> int | None: ...
+    @property
+    def scale(self) -> int | None: ...
+
+
+class ParquetReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+
+    def open(
+        self,
+        source: StrPath | Buffer | NativeFile | IO,
+        *,
+        use_memory_map: bool = False,
+        read_dictionary: Iterable[int] | Iterable[str] | None = None,
+        metadata: FileMetaData | None = None,
+        binary_type: DataType | None = None,
+        list_type: ListType | LargeListType | None = None,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+        arrow_extensions_enabled: bool | None = None,
+    ) -> None: ...
+
+    @property
+    def column_paths(self) -> list[str]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def schema_arrow(self) -> Schema: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    def set_use_threads(self, use_threads: bool) -> None: ...
+    def set_batch_size(self, batch_size: int) -> None: ...
+
+    def iter_batches(
+        self,
+        batch_size: int = 65536,
+        row_groups: list[int] | range | None = None,
+        column_indices: list[str] | list[int] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Iterator[RecordBatch]: ...
+
+    def read_row_group(
+        self, i: int, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+
+    def read_row_groups(
+        self,
+        row_groups: Sequence[int] | range,
+        column_indices: list[str] | list[int] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table: ...
+
+    def read_all(
+        self, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+
+    def scan_contents(
+        self, columns: Sequence[str] | Sequence[int] | None = None,
+        batch_size: int = 65536
+    ) -> int: ...
+
+    def column_name_idx(self, column_name: str) -> int: ...
+    def read_column(self, column_index: int) -> ChunkedArray: ...
+    def close(self) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+
+
+class ParquetWriter(_Weakrefable):
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        schema: Schema,
+        use_dictionary: bool | list[str] | None = None,
+        compression: _Compression | dict[str, _Compression] | str | None = None,
+        version: str | None = None,
+        write_statistics: bool | list[str] | None = None,
+        memory_pool: MemoryPool | None = None,
+        use_deprecated_int96_timestamps: bool = False,
+        coerce_timestamps: Literal["ms", "us"] | None = None,
+        data_page_size: int | None = None,
+        allow_truncated_timestamps: bool = False,
+        compression_level: int | dict[str, int] | None = None,
+        use_byte_stream_split: bool | list[str] = False,
+        column_encoding: _Encoding | dict[str, _Encoding] | None = None,
+        writer_engine_version: str | None = None,
+        data_page_version: str | None = None,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileDecryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: tuple[SortingColumn, ...] | None = None,
+        store_decimal_as_integer: bool = False,
+        write_time_adjusted_to_utc: bool = False,
+        max_rows_per_page: int | None = None,
+    ): ...
+    def close(self) -> None: ...
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
+    def add_key_value_metadata(self, key_value_metadata: KeyValueMetadata) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def use_dictionary(self) -> bool | list[str] | None: ...
+    @property
+    def use_deprecated_int96_timestamps(self) -> bool: ...
+    @property
+    def use_byte_stream_split(self) -> bool | list[str]: ...
+    @property
+    def column_encoding(self) -> _Encoding | dict[str, _Encoding] | None: ...
+    @property
+    def coerce_timestamps(self) -> Literal["ms", "us"] | None: ...
+    @property
+    def allow_truncated_timestamps(self) -> bool: ...
+    @property
+    def compression(self) -> _Compression | dict[str, _Compression] | None: ...
+    @property
+    def compression_level(self) -> int | dict[str, int] | None: ...
+    @property
+    def data_page_version(self) -> str | None: ...
+    @property
+    def use_compliant_nested_type(self) -> bool: ...
+    @property
+    def version(self) -> str | None: ...
+    @property
+    def write_statistics(self) -> bool | list[str] | None: ...
+    @property
+    def writer_engine_version(self) -> str: ...
+    @property
+    def row_group_size(self) -> int: ...
+    @property
+    def data_page_size(self) -> int: ...
+    @property
+    def encryption_properties(self) -> FileDecryptionProperties: ...
+    @property
+    def write_batch_size(self) -> int: ...
+    @property
+    def dictionary_pagesize_limit(self) -> int: ...
+    @property
+    def store_schema(self) -> bool: ...
+    @property
+    def store_decimal_as_integer(self) -> bool: ...
+
+
+class FileEncryptionProperties:
+    ...
+
+
+class FileDecryptionProperties:
+    ...
diff --git a/python/pyarrow-stubs/pyarrow/_parquet_encryption.pyi b/python/pyarrow-stubs/pyarrow/_parquet_encryption.pyi
new file mode 100644
index 00000000000..74b50ce665d
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_parquet_encryption.pyi
@@ -0,0 +1,141 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+import pathlib
+
+from collections.abc import Callable
+
+from pyarrow._fs import FileSystem
+from ._parquet import FileDecryptionProperties, FileEncryptionProperties
+from .lib import _Weakrefable
+
+
+class EncryptionConfiguration(_Weakrefable):
+    footer_key: str
+    column_keys: dict[str, list[str]]
+    encryption_algorithm: str
+    plaintext_footer: bool
+    double_wrapping: bool
+    cache_lifetime: dt.timedelta
+    internal_key_material: bool
+    data_key_length_bits: int
+    uniform_encryption: bool
+
+    def __init__(
+        self,
+        footer_key: str,
+        *,
+        column_keys: dict[str, str | list[str]] | None = None,
+        encryption_algorithm: str | None = None,
+        plaintext_footer: bool | None = None,
+        double_wrapping: bool | None = None,
+        cache_lifetime: dt.timedelta | None = None,
+        internal_key_material: bool | None = None,
+        data_key_length_bits: int | None = None,
+        uniform_encryption: bool | None = None,
+    ) -> None: ...
+
+
+class DecryptionConfiguration(_Weakrefable):
+    cache_lifetime: dt.timedelta
+    def __init__(self, *, cache_lifetime: dt.timedelta | None = None): ...
+
+
+class KmsConnectionConfig(_Weakrefable):
+    kms_instance_id: str
+    kms_instance_url: str
+    key_access_token: str
+    custom_kms_conf: dict[str, str]
+
+    def __init__(
+        self,
+        *,
+        kms_instance_id: str | None = None,
+        kms_instance_url: str | None = None,
+        key_access_token: str | None = None,
+        custom_kms_conf: dict[str, str] | None = None,
+    ) -> None: ...
+    def refresh_key_access_token(self, value: str) -> None: ...
+
+
+class KmsClient(_Weakrefable):
+    def wrap_key(self, key_bytes: bytes, master_key_identifier: str) -> str: ...
+    def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> bytes: ...
+
+
+class CryptoFactory(_Weakrefable):
+    def __init__(self, kms_client_factory: Callable[[
+                 KmsConnectionConfig], KmsClient]): ...
+
+    def file_encryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> FileEncryptionProperties: ...
+
+    def file_decryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        decryption_config: DecryptionConfiguration | None = None,
+    ) -> FileDecryptionProperties: ...
+    def remove_cache_entries_for_token(self, access_token: str) -> None: ...
+    def remove_cache_entries_for_all_tokens(self) -> None: ...
+    def rotate_master_keys(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        parquet_file_path: str | pathlib.Path,
+        filesystem: FileSystem | None = None,
+        double_wrapping: bool = True,
+        cache_lifetime_seconds: int | float = 600,
+    ) -> None: ...
+
+
+class KeyMaterial(_Weakrefable):
+    @property
+    def is_footer_key(self) -> bool: ...
+    @property
+    def is_double_wrapped(self) -> bool: ...
+    @property
+    def master_key_id(self) -> str: ...
+    @property
+    def wrapped_dek(self) -> str: ...
+    @property
+    def kek_id(self) -> str: ...
+    @property
+    def wrapped_kek(self) -> str: ...
+    @property
+    def kms_instance_id(self) -> str: ...
+    @property
+    def kms_instance_url(self) -> str: ...
+    @staticmethod
+    def wrap(key_material: KeyMaterial) -> KeyMaterial: ...
+    @staticmethod
+    def parse(key_material_string: str) -> KeyMaterial: ...
+
+
+
+class FileSystemKeyMaterialStore(_Weakrefable):
+    def get_key_material(self, key_id: str) -> KeyMaterial: ...
+    def get_key_id_set(self) -> list[str]: ...
+    @classmethod
+    def for_file(
+            cls,
+            parquet_file_path: str | pathlib.Path, /,
+            filesystem: FileSystem | None = None
+    ) -> FileSystemKeyMaterialStore:
+        ...
diff --git a/python/pyarrow-stubs/pyarrow/_s3fs.pyi b/python/pyarrow-stubs/pyarrow/_s3fs.pyi
new file mode 100644
index 00000000000..f82f34d2cae
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_s3fs.pyi
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+
+from typing import Literal, TypedDict
+from typing_extensions import Required, NotRequired
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+
+class _ProxyOptions(TypedDict):
+    scheme: Required[Literal["http", "https"]]
+    host: Required[str]
+    port: Required[int]
+    username: NotRequired[str]
+    password: NotRequired[str]
+
+
+class S3LogLevel(enum.IntEnum):
+    Off = enum.auto()
+    Fatal = enum.auto()
+    Error = enum.auto()
+    Warn = enum.auto()
+    Info = enum.auto()
+    Debug = enum.auto()
+    Trace = enum.auto()
+
+
+Off = S3LogLevel.Off
+Fatal = S3LogLevel.Fatal
+Error = S3LogLevel.Error
+Warn = S3LogLevel.Warn
+Info = S3LogLevel.Info
+Debug = S3LogLevel.Debug
+Trace = S3LogLevel.Trace
+
+
+def initialize_s3(
+    log_level: S3LogLevel = S3LogLevel.Fatal, num_event_loop_threads: int = 1
+) -> None: ...
+def ensure_s3_initialized() -> None: ...
+def finalize_s3() -> None: ...
+def ensure_s3_finalized() -> None: ...
+def resolve_s3_region(bucket: str) -> str: ...
+
+
+class S3RetryStrategy:
+    max_attempts: int
+    def __init__(self, max_attempts=3) -> None: ...
+
+
+class AwsStandardS3RetryStrategy(S3RetryStrategy):
+    ...
+
+
+class AwsDefaultS3RetryStrategy(S3RetryStrategy):
+    ...
+
+
+class S3FileSystem(FileSystem):
+    def __init__(
+        self,
+        *,
+        access_key: str | None = None,
+        secret_key: str | None = None,
+        session_token: str | None = None,
+        anonymous: bool = False,
+        region: str | None = None,
+        request_timeout: float | None = None,
+        connect_timeout: float | None = None,
+        scheme: Literal["http", "https"] = "https",
+        endpoint_override: str | None = None,
+        background_writes: bool = True,
+        default_metadata: dict | list | KeyValueMetadata | None = None,
+        role_arn: str | None = None,
+        session_name: str | None = None,
+        external_id: str | None = None,
+        load_frequency: int = 900,
+        proxy_options: _ProxyOptions | dict | tuple | str | None = None,
+        allow_bucket_creation: bool = False,
+        allow_bucket_deletion: bool = False,
+        allow_delayed_open: bool = False,
+        check_directory_existence_before_creation: bool = False,
+        tls_ca_file_path: str | None = None,
+        retry_strategy: S3RetryStrategy =
+        AwsStandardS3RetryStrategy(max_attempts=3),  # noqa: Y011
+        force_virtual_addressing: bool = False,
+    ): ...
+    @property
+    def region(self) -> str: ...
diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi
new file mode 100644
index 00000000000..0715012fddc
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi
@@ -0,0 +1,133 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt
+
+from collections.abc import Collection, Iterator, Sequence
+from decimal import Decimal
+from typing import Any, Literal, Protocol, TypeAlias, TypeVar
+
+import numpy as np
+
+from numpy.typing import NDArray
+
+from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray
+
+ArrayLike: TypeAlias = Any
+ScalarLike: TypeAlias = Any
+Order: TypeAlias = Literal["ascending", "descending"]
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+Compression: TypeAlias = Literal[
+    "gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy"
+]
+NullEncoding: TypeAlias = Literal["mask", "encode"]
+NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"]
+TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"]
+Mask: TypeAlias = (
+    Sequence[bool | None]
+    | NDArray[np.bool_]
+    | BooleanArray
+    | ChunkedArray[Any]
+)
+Indices: TypeAlias = (
+    Sequence[int | None]
+    | NDArray[np.integer[Any]]
+    | IntegerArray
+    | ChunkedArray[Any]
+)
+
+PyScalar: TypeAlias = (bool | int | float | Decimal | str | bytes |
+                       dt.date | dt.datetime | dt.time | dt.timedelta)
+
+_T = TypeVar("_T")
+_V = TypeVar("_V", covariant=True)
+
+SingleOrList: TypeAlias = list[_T] | _T
+
+
+class SupportEq(Protocol):
+    def __eq__(self, other) -> bool: ...
+
+
+class SupportLt(Protocol):
+    def __lt__(self, other) -> bool: ...
+
+
+class SupportGt(Protocol):
+    def __gt__(self, other) -> bool: ...
+
+
+class SupportLe(Protocol):
+    def __le__(self, other) -> bool: ...
+
+
+class SupportGe(Protocol):
+    def __ge__(self, other) -> bool: ...
+
+
+FilterTuple: TypeAlias = (
+    tuple[str, Literal["=", "==", "!="], SupportEq]
+    | tuple[str, Literal["<"], SupportLt]
+    | tuple[str, Literal[">"], SupportGt]
+    | tuple[str, Literal["<="], SupportLe]
+    | tuple[str, Literal[">="], SupportGe]
+    | tuple[str, Literal["in", "not in"], Collection]
+    | tuple[str, str, Any]  # Allow general str for operator to avoid type errors
+)
+
+
+class Buffer(Protocol):
+    ...
+
+
+class SupportPyBuffer(Protocol):
+    ...
+
+
+class SupportArrowStream(Protocol):
+    def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
+
+
+class SupportPyArrowArray(Protocol):
+    def __arrow_array__(self, type=None) -> Any: ...
+
+
+class SupportArrowArray(Protocol):
+    def __arrow_c_array__(self, requested_schema=None) -> Any: ...
+
+
+class SupportArrowDeviceArray(Protocol):
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
+
+
+class SupportArrowSchema(Protocol):
+    def __arrow_c_schema__(self) -> Any: ...
+
+
+class NullableCollection(Protocol[_V]):  # type: ignore[reportInvalidTypeVarUse]
+    def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ...
+    def __len__(self) -> int: ...
+    def __contains__(self, item: Any, /) -> bool: ...
diff --git a/python/pyarrow-stubs/pyarrow/_substrait.pyi b/python/pyarrow-stubs/pyarrow/_substrait.pyi
new file mode 100644
index 00000000000..6818d9822ab
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_substrait.pyi
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Callable
+from typing import Any
+
+from ._compute import Expression
+from .lib import Buffer, RecordBatchReader, Schema, Table, _Weakrefable
+
+
+def run_query(
+    plan: Buffer | int,
+    *,
+    table_provider: Callable[[list[str], Schema], Table] | None = None,
+    use_threads: bool = True,
+) -> RecordBatchReader: ...
+def _parse_json_plan(plan: bytes) -> Buffer: ...
+
+
+class SubstraitSchema:
+    schema: bytes
+    expression: bytes
+    def __init__(self, schema: bytes, expression: bytes) -> None: ...
+    def to_pysubstrait(self) -> Any: ...
+
+
+def serialize_schema(schema: Schema) -> SubstraitSchema: ...
+def deserialize_schema(buf: Buffer | bytes | SubstraitSchema) -> Schema: ...
+
+
+def serialize_expressions(
+    exprs: list[Expression],
+    names: list[str],
+    schema: Schema,
+    *,
+    allow_arrow_extensions: bool = False,
+) -> Buffer: ...
+
+
+class BoundExpressions(_Weakrefable):
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def expressions(self) -> dict[str, Expression]: ...
+    @classmethod
+    def from_substrait(cls, message: Buffer | bytes | Any) -> BoundExpressions: ...
+
+
+def deserialize_expressions(buf: Buffer | bytes) -> BoundExpressions: ...
+def get_supported_functions() -> list[str]: ...
diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi
new file mode 100644
index 00000000000..6b7a58ccfe6
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/_types.pyi
@@ -0,0 +1,966 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime as dt  # noqa: F401
+import sys
+
+from collections.abc import Mapping, Sequence, Iterable, Iterator
+from decimal import Decimal  # noqa: F401
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from typing import Any, Generic, Literal
+
+import numpy as np
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportArrowSchema
+from pyarrow.lib import (  # noqa: F401
+    Array,
+    ChunkedArray,
+    ExtensionArray,
+    MemoryPool,
+    MonthDayNano,
+    Table,
+)
+from typing_extensions import TypeVar, deprecated
+
+from .io import Buffer
+from .scalar import ExtensionScalar
+from ._stubs_typing import TimeUnit
+
+class _Weakrefable:
+    ...
+
+
+class _Metadata(_Weakrefable):
+    ...
+
+
+class DataType(_Weakrefable):
+    def field(self, i: int) -> Field: ...
+
+    @property
+    def id(self) -> int: ...
+    @property
+    def bit_width(self) -> int: ...
+
+    @property
+    def byte_width(self) -> int: ...
+
+    @property
+    def num_fields(self) -> int: ...
+
+    @property
+    def num_buffers(self) -> int: ...
+
+    @property
+    def has_variadic_buffers(self) -> bool: ...
+
+    # Properties that exist on specific subtypes but accessed generically
+    @property
+    def list_size(self) -> int: ...
+
+    def __hash__(self) -> int: ...
+
+    def equals(self, other: DataType | str, *,
+               check_metadata: bool = False) -> bool: ...
+
+    def to_pandas_dtype(self) -> np.generic: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_schema__(self) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema) -> Self: ...
+
+
+_AsPyType = TypeVar("_AsPyType")
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
+
+
+class _BasicDataType(DataType, Generic[_AsPyType]):
+    ...
+
+
+class NullType(_BasicDataType[None]):
+    ...
+
+
+class BoolType(_BasicDataType[bool]):
+    ...
+
+
+class UInt8Type(_BasicDataType[int]):
+    ...
+
+
+class Int8Type(_BasicDataType[int]):
+    ...
+
+
+class UInt16Type(_BasicDataType[int]):
+    ...
+
+
+class Int16Type(_BasicDataType[int]):
+    ...
+
+
+class UInt32Type(_BasicDataType[int]):
+    ...
+
+
+class Int32Type(_BasicDataType[int]):
+    ...
+
+
+class UInt64Type(_BasicDataType[int]):
+    ...
+
+
+class Int64Type(_BasicDataType[int]):
+    ...
+
+
+class Float16Type(_BasicDataType[float]):
+    ...
+
+
+class Float32Type(_BasicDataType[float]):
+    ...
+
+
+class Float64Type(_BasicDataType[float]):
+    ...
+
+
+class Date32Type(_BasicDataType[dt.date]):
+    ...
+
+
+class Date64Type(_BasicDataType[dt.date]):
+    ...
+
+
+class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]):
+    ...
+
+
+class StringType(_BasicDataType[str]):
+    ...
+
+
+class LargeStringType(_BasicDataType[str]):
+    ...
+
+
+class StringViewType(_BasicDataType[str]):
+    ...
+
+
+class BinaryType(_BasicDataType[bytes]):
+    ...
+
+
+class LargeBinaryType(_BasicDataType[bytes]):
+    ...
+
+
+class BinaryViewType(_BasicDataType[bytes]):
+    ...
+
+
+_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"])
+_Tz = TypeVar("_Tz", str, None, default=None)
+
+
+class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
+
+    @property
+    def unit(self) -> _Unit: ...
+
+    @property
+    def tz(self) -> _Tz: ...
+
+
+_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"])
+
+
+class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]):
+    @property
+    def unit(self) -> _Time32Unit: ...
+
+
+_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"])
+
+
+class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]):
+    @property
+    def unit(self) -> _Time64Unit: ...
+
+
+class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]):
+    @property
+    def unit(self) -> _Unit: ...
+
+
+class FixedSizeBinaryType(_BasicDataType[Decimal]):
+    ...
+
+
+_Precision = TypeVar("_Precision", default=Any)
+_Scale = TypeVar("_Scale", default=Any)
+
+
+class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    @property
+    def precision(self) -> _Precision: ...
+
+    @property
+    def scale(self) -> _Scale: ...
+
+
+class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    @property
+    def precision(self) -> _Precision: ...
+
+    @property
+    def scale(self) -> _Scale: ...
+
+
+class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    @property
+    def precision(self) -> _Precision: ...
+
+    @property
+    def scale(self) -> _Scale: ...
+
+
+class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    @property
+    def precision(self) -> _Precision: ...
+
+    @property
+    def scale(self) -> _Scale: ...
+
+
+class ListType(DataType, Generic[_DataTypeT]):
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+
+class LargeListType(DataType, Generic[_DataTypeT]):
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+
+class ListViewType(DataType, Generic[_DataTypeT]):
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+
+class LargeListViewType(DataType, Generic[_DataTypeT]):
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+
+class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]):
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+
+    @property
+    def value_type(self) -> _DataTypeT: ...
+
+    @property
+    def list_size(self) -> int: ...
+
+
+class DictionaryMemo(_Weakrefable):
+    ...
+
+
+_IndexT = TypeVar(
+    "_IndexT",
+    UInt8Type,
+    Int8Type,
+    UInt16Type,
+    Int16Type,
+    UInt32Type,
+    Int32Type,
+    UInt64Type,
+    Int64Type,
+)
+_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType)
+_ValueT = TypeVar("_ValueT", bound=DataType)
+_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False])
+
+
+class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]):
+    @property
+    def ordered(self) -> _Ordered: ...
+
+    @property
+    def index_type(self) -> _IndexT: ...
+
+    @property
+    def value_type(self) -> _BasicValueT: ...
+
+
+_K = TypeVar("_K", bound=DataType)
+
+
+class MapType(DataType, Generic[_K, _ValueT, _Ordered]):
+    @property
+    def key_field(self) -> Field[_K]: ...
+
+    @property
+    def key_type(self) -> _K: ...
+
+    @property
+    def item_field(self) -> Field[_ValueT]: ...
+
+    @property
+    def item_type(self) -> _ValueT: ...
+
+    @property
+    def keys_sorted(self) -> _Ordered: ...
+
+
+_Size = TypeVar("_Size", default=int)
+
+
+class StructType(DataType):
+    def get_field_index(self, name: str) -> int: ...
+
+    def field(self, i: int | str) -> Field: ...
+
+    def get_all_field_indices(self, name: str) -> list[int]: ...
+
+    def __len__(self) -> int: ...
+
+    def __iter__(self) -> Iterator[Field]: ...
+
+    __getitem__ = field
+    @property
+    def names(self) -> list[str]: ...
+
+    @property
+    def fields(self) -> list[Field]: ...
+
+
+class UnionType(DataType):
+    @property
+    def mode(self) -> Literal["sparse", "dense"]: ...
+
+    @property
+    def type_codes(self) -> list[int]: ...
+
+    def __len__(self) -> int: ...
+
+    def __iter__(self) -> Iterator[Field]: ...
+
+    def field(self, i: int) -> Field: ...
+
+    __getitem__ = field
+
+
+class SparseUnionType(UnionType):
+    @property
+    def mode(self) -> Literal["sparse"]: ...
+
+
+class DenseUnionType(UnionType):
+    @property
+    def mode(self) -> Literal["dense"]: ...
+
+
+_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type)
+
+
+class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]):
+    @property
+    def run_end_type(self) -> _RunEndType: ...
+    @property
+    def value_type(self) -> _BasicValueT: ...
+
+
+_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
+
+
+class BaseExtensionType(DataType):
+    def __arrow_ext_class__(self) -> type[ExtensionArray]: ...
+
+    def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ...
+
+    @property
+    def extension_name(self) -> str: ...
+
+    @property
+    def storage_type(self) -> DataType: ...
+
+    def wrap_array(self, storage: _StorageT) -> _StorageT: ...
+
+
+class ExtensionType(BaseExtensionType):
+    def __init__(self, storage_type: DataType, extension_name: str) -> None: ...
+
+    def __arrow_ext_serialize__(self) -> bytes: ...
+
+    @classmethod
+    def __arrow_ext_deserialize__(
+        cls, storage_type: DataType, serialized: bytes) -> Self: ...
+
+
+class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]):
+    @property
+    def value_type(self) -> _ValueT: ...
+
+    @property
+    def shape(self) -> list[int]: ...
+
+    @property
+    def dim_names(self) -> list[str] | None: ...
+
+    @property
+    def permutation(self) -> list[int] | None: ...
+
+
+class Bool8Type(BaseExtensionType):
+    ...
+
+
+class UuidType(BaseExtensionType):
+    ...
+
+
+class JsonType(BaseExtensionType):
+    ...
+
+
+class OpaqueType(BaseExtensionType):
+    @property
+    def type_name(self) -> str: ...
+
+    @property
+    def vendor_name(self) -> str: ...
+
+
+class UnknownExtensionType(ExtensionType):
+    def __init__(self, storage_type: DataType, serialized: bytes) -> None: ...
+
+
+def register_extension_type(ext_type: ExtensionType) -> None: ...
+
+
+def unregister_extension_type(type_name: str) -> None: ...
+
+
+class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]):
+    def __init__(
+        self, __arg0__: Mapping[str | bytes, str | bytes]
+        | Iterable[tuple[str, str]]
+        | KeyValueMetadata
+        | None = None, **kwargs: str
+    ) -> None: ...
+
+    def equals(self, other: KeyValueMetadata) -> bool: ...
+
+    def __len__(self) -> int: ...
+
+    def __contains__(self, /, __key: object) -> bool: ...  # type: ignore[override]
+
+    def __getitem__(self, /, __key: Any) -> Any: ...  # type: ignore[override]
+
+    def __iter__(self) -> Iterator[bytes]: ...
+
+    def get_all(self, key: str) -> list[bytes]: ...
+
+    def to_dict(self) -> dict[bytes, bytes]: ...
+
+
+class Field(_Weakrefable, Generic[_DataTypeT]):
+    def equals(self, other: Field, check_metadata: bool = False) -> bool: ...
+
+    def __hash__(self) -> int: ...
+
+    @property
+    def nullable(self) -> bool: ...
+
+    @property
+    def name(self) -> str: ...
+
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+
+    @property
+    def type(self) -> _DataTypeT: ...
+    def with_metadata(self, metadata: dict[bytes | str, bytes | str] |
+                      Mapping[bytes | str, bytes | str] | Any) -> Self: ...
+
+    def remove_metadata(self) -> Self: ...
+
+    def with_type(self, new_type: DataType) -> Field: ...
+
+    def with_name(self, name: str) -> Self: ...
+
+    def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ...
+
+    def flatten(self) -> list[Field]: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_schema__(self) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema) -> Self: ...
+
+
+class Schema(_Weakrefable):
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, key: str | int) -> Field: ...
+
+    _field = __getitem__
+    def __iter__(self) -> Iterator[Field]: ...
+
+    def __hash__(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+    @property
+    def pandas_metadata(self) -> dict: ...
+
+    @property
+    def names(self) -> list[str]: ...
+
+    @property
+    def types(self) -> list[DataType]: ...
+
+    @property
+    def metadata(self) -> dict[bytes, bytes]: ...
+
+    def empty_table(self) -> Table: ...
+
+    def equals(self, other: Schema, check_metadata: bool = False) -> bool: ...
+
+    @classmethod
+    def from_pandas(cls, df: pd.DataFrame, preserve_index: bool |
+                    None = None) -> Schema: ...
+
+    def field(self, i: int | str | bytes) -> Field: ...
+
+    @deprecated("Use 'field' instead")
+    def field_by_name(self, name: str) -> Field: ...
+
+    def get_field_index(self, name: str) -> int: ...
+
+    def get_all_field_indices(self, name: str) -> list[int]: ...
+
+    def append(self, field: Field) -> Schema: ...
+
+    def insert(self, i: int, field: Field) -> Schema: ...
+
+    def remove(self, i: int) -> Schema: ...
+
+    def set(self, i: int, field: Field) -> Schema: ...
+
+    @deprecated("Use 'with_metadata' instead")
+    def add_metadata(self, metadata: dict) -> Schema: ...
+
+    def with_metadata(self, metadata: dict) -> Schema: ...
+
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ...
+
+    def remove_metadata(self) -> Schema: ...
+
+    def to_string(
+        self,
+        truncate_metadata: bool = True,
+        show_field_metadata: bool = True,
+        show_schema_metadata: bool = True,
+        element_size_limit: int | None = None,
+    ) -> str: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Schema: ...
+
+    def __arrow_c_schema__(self) -> Any: ...
+
+    @staticmethod
+    def _import_from_c_capsule(schema: Any) -> Schema: ...
+
+
+def unify_schemas(
+    schemas: Sequence[Schema],
+    *,
+    promote_options: Literal["default", "permissive"] = "default"
+) -> Schema: ...
+
+
+def field(
+    name: SupportArrowSchema | str | Any, type: _DataTypeT | str | None = None,
+    nullable: bool = ...,
+    metadata: dict[Any, Any] | None = None
+) -> Field[_DataTypeT] | Field[Any]: ...
+
+
+def null() -> NullType: ...
+
+
+def bool_() -> BoolType: ...
+
+
+def uint8() -> UInt8Type: ...
+
+
+def int8() -> Int8Type: ...
+
+
+def uint16() -> UInt16Type: ...
+
+
+def int16() -> Int16Type: ...
+
+
+def uint32() -> UInt32Type: ...
+
+
+def int32() -> Int32Type: ...
+
+
+def int64() -> Int64Type: ...
+
+
+def uint64() -> UInt64Type: ...
+
+
+def timestamp(
+    unit: _Unit | str, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]: ...
+
+
+def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ...
+
+
+def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ...
+
+
+def duration(unit: _Unit | str) -> DurationType[_Unit]: ...
+
+
+def month_day_nano_interval() -> MonthDayNanoIntervalType: ...
+
+
+def date32() -> Date32Type: ...
+
+
+def date64() -> Date64Type: ...
+
+
+def float16() -> Float16Type: ...
+
+
+def float32() -> Float32Type: ...
+
+
+def float64() -> Float64Type: ...
+
+
+def decimal32(precision: _Precision, scale: _Scale |
+              None = None) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal64(precision: _Precision, scale: _Scale |
+              None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal128(precision: _Precision, scale: _Scale |
+               None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal256(precision: _Precision, scale: _Scale |
+               None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def string() -> StringType: ...
+
+
+utf8 = string
+
+
+def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType: ...
+
+
+def large_binary() -> LargeBinaryType: ...
+
+
+def large_string() -> LargeStringType: ...
+
+
+large_utf8 = large_string
+
+
+def binary_view() -> BinaryViewType: ...
+
+
+def string_view() -> StringViewType: ...
+
+
+def list_(
+    value_type: _DataTypeT | Field[_DataTypeT] | None = None,
+    list_size: Literal[-1] | _Size | None = None
+) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ...
+
+
+def large_list(value_type: _DataTypeT |
+               Field[_DataTypeT] | None = None) -> LargeListType[_DataTypeT]: ...
+
+
+def list_view(value_type: _DataTypeT |
+              Field[_DataTypeT] | None = None) -> ListViewType[_DataTypeT]: ...
+
+
+def large_list_view(
+    value_type: _DataTypeT | Field[_DataTypeT] | None = None
+) -> LargeListViewType[_DataTypeT]: ...
+
+
+def map_(
+    key_type: _K | Field | str | None = None,
+    item_type: _ValueT | Field | str | None = None,
+    keys_sorted: bool | None = None
+) -> MapType[_K, _ValueT, Literal[False]]: ...
+
+
+def dictionary(
+    index_type: _IndexT | str,
+    value_type: _BasicValueT | str,
+    ordered: _Ordered | None = None
+) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
+
+
+def struct(
+    fields: Iterable[
+        Field[Any]
+        | tuple[str, Field[Any] | None]
+        | tuple[str, DataType | None]
+    ] | Mapping[str, Field[Any] | DataType | None],
+) -> StructType: ...
+
+
+def sparse_union(
+    child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> SparseUnionType: ...
+
+
+def dense_union(
+    child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> DenseUnionType: ...
+
+
+def union(
+    child_fields: list[Field[Any]], mode: Literal["sparse" | "dense"] | int | str,
+    type_codes: list[int] | None = None) -> SparseUnionType | DenseUnionType: ...
+
+
+def run_end_encoded(
+    run_end_type: _RunEndType | str | None, value_type: _BasicValueT | str | None
+) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ...
+
+
+def json_(storage_type: DataType = ...) -> JsonType: ...
+
+
+def uuid() -> UuidType: ...
+
+
+def fixed_shape_tensor(
+    value_type: _ValueT,
+    shape: Sequence[int],
+    dim_names: Sequence[str] | None = None,
+    permutation: Sequence[int] | None = None,
+) -> FixedShapeTensorType[_ValueT]: ...
+
+
+def bool8() -> Bool8Type: ...
+
+
+def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ...
+
+
+def type_for_alias(name: Any) -> DataType: ...
+
+
+def schema(
+    fields: (
+        Iterable[Field[Any]]
+        | Iterable[tuple[str, DataType | str | None]]
+        | Mapping[Any, DataType | str | None]
+    ),
+    metadata: Mapping[bytes, bytes]
+    | Mapping[str, str]
+    | Mapping[bytes, str]
+    | Mapping[str, bytes] | None = None,
+) -> Schema: ...
+
+
+def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ...
+
+
+__all__ = [
+    "_Weakrefable",
+    "_Metadata",
+    "DataType",
+    "_BasicDataType",
+    "NullType",
+    "BoolType",
+    "UInt8Type",
+    "Int8Type",
+    "UInt16Type",
+    "Int16Type",
+    "UInt32Type",
+    "Int32Type",
+    "UInt64Type",
+    "Int64Type",
+    "Float16Type",
+    "Float32Type",
+    "Float64Type",
+    "Date32Type",
+    "Date64Type",
+    "MonthDayNanoIntervalType",
+    "StringType",
+    "LargeStringType",
+    "StringViewType",
+    "BinaryType",
+    "LargeBinaryType",
+    "BinaryViewType",
+    "TimestampType",
+    "Time32Type",
+    "Time64Type",
+    "DurationType",
+    "FixedSizeBinaryType",
+    "Decimal32Type",
+    "Decimal64Type",
+    "Decimal128Type",
+    "Decimal256Type",
+    "ListType",
+    "LargeListType",
+    "ListViewType",
+    "LargeListViewType",
+    "FixedSizeListType",
+    "DictionaryMemo",
+    "DictionaryType",
+    "MapType",
+    "StructType",
+    "UnionType",
+    "SparseUnionType",
+    "DenseUnionType",
+    "RunEndEncodedType",
+    "BaseExtensionType",
+    "ExtensionType",
+    "FixedShapeTensorType",
+    "Bool8Type",
+    "UuidType",
+    "JsonType",
+    "OpaqueType",
+    "UnknownExtensionType",
+    "register_extension_type",
+    "unregister_extension_type",
+    "KeyValueMetadata",
+    "Field",
+    "Schema",
+    "unify_schemas",
+    "field",
+    "null",
+    "bool_",
+    "uint8",
+    "int8",
+    "uint16",
+    "int16",
+    "uint32",
+    "int32",
+    "int64",
+    "uint64",
+    "timestamp",
+    "time32",
+    "time64",
+    "duration",
+    "month_day_nano_interval",
+    "date32",
+    "date64",
+    "float16",
+    "float32",
+    "float64",
+    "decimal32",
+    "decimal64",
+    "decimal128",
+    "decimal256",
+    "string",
+    "utf8",
+    "binary",
+    "large_binary",
+    "large_string",
+    "large_utf8",
+    "binary_view",
+    "string_view",
+    "list_",
+    "large_list",
+    "list_view",
+    "large_list_view",
+    "map_",
+    "dictionary",
+    "struct",
+    "sparse_union",
+    "dense_union",
+    "union",
+    "run_end_encoded",
+    "json_",
+    "uuid",
+    "fixed_shape_tensor",
+    "bool8",
+    "opaque",
+    "type_for_alias",
+    "schema",
+    "from_numpy_dtype",
+    "_Unit",
+    "_Tz",
+    "_Time32Unit",
+    "_Time64Unit",
+    "_DataTypeT",
+]
diff --git a/python/pyarrow-stubs/pyarrow/array.pyi b/python/pyarrow-stubs/pyarrow/array.pyi
new file mode 100644
index 00000000000..547e9c949d5
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/array.pyi
@@ -0,0 +1,894 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+from collections.abc import Iterable, Iterator, Sequence
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from typing import (
+    Any,
+    Generic,
+    Literal,
+    TypeVar,
+)
+
+import numpy as np
+import pandas as pd
+
+from pyarrow._compute import CastOptions
+from pyarrow._stubs_typing import (
+    ArrayLike,
+    Indices,
+    Mask,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+    SupportPyArrowArray,
+)
+from pyarrow.lib import (
+    Buffer,
+    Device,
+    MemoryManager,
+    MemoryPool,
+    Tensor,
+    _Weakrefable,
+)
+from typing_extensions import deprecated
+import builtins
+
+from .scalar import (  # noqa: F401
+    BinaryScalar,
+    BinaryViewScalar,
+    BooleanScalar,
+    Date32Scalar,
+    Date64Scalar,
+    DictionaryScalar,
+    DoubleScalar,
+    DurationScalar,
+    ExtensionScalar,
+    FixedSizeBinaryScalar,
+    FixedSizeListScalar,
+    FloatScalar,
+    HalfFloatScalar,
+    Int16Scalar,
+    Int32Scalar,
+    Int64Scalar,
+    Int8Scalar,
+    LargeBinaryScalar,
+    LargeListScalar,
+    LargeStringScalar,
+    ListScalar,
+    ListViewScalar,
+    MapScalar,
+    MonthDayNanoIntervalScalar,
+    NullScalar,
+    RunEndEncodedScalar,
+    Scalar,
+    StringScalar,
+    StringViewScalar,
+    StructScalar,
+    Time32Scalar,
+    Time64Scalar,
+    TimestampScalar,
+    UInt16Scalar,
+    UInt32Scalar,
+    UInt64Scalar,
+    UInt8Scalar,
+    UnionScalar,
+)
+from .device import DeviceAllocationType
+from ._types import (  # noqa: F401
+    BaseExtensionType,
+    BinaryType,
+    DataType,
+    Field,
+    Float64Type,
+    Int64Type,
+    MapType,
+    StringType,
+    StructType,
+    _AsPyType,
+    _BasicDataType,
+    _BasicValueT,
+    _DataTypeT,
+    _IndexT,
+    _RunEndType,
+    _Size,
+    _Time32Unit,
+    _Time64Unit,
+    _Tz,
+    _Unit,
+)
+from ._stubs_typing import NullableCollection
+
+
+def array(
+    values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray
+    | SupportArrowDeviceArray | SupportPyArrowArray,
+    type: Any | None = None,
+    mask: Mask | pd.Series[bool] | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> ArrayLike: ...
+
+
+def asarray(
+    values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray
+    | SupportArrowDeviceArray,
+    type: _DataTypeT | Any | None = None,
+) -> Array[Scalar[_DataTypeT]] | ArrayLike: ...
+
+
+def nulls(
+    size: int,
+    type: Any | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ArrayLike: ...
+
+
+def repeat(
+    value: Any,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> ArrayLike: ...
+
+
+def infer_type(values: Iterable[Any], mask: Mask | None = None,
+               from_pandas: bool = False) -> DataType: ...
+
+
+class ArrayStatistics(_Weakrefable):
+    @property
+    def null_count(self) -> int | None: ...
+
+    @property
+    def distinct_count(self) -> int | None: ...
+
+    @property
+    def is_null_count_exact(self) -> bool | None: ...
+
+    @property
+    def is_distinct_count_exact(self) -> bool | None: ...
+
+    @property
+    def min(self) -> Any | None: ...
+
+    @property
+    def is_min_exact(self) -> bool | None: ...
+
+    @property
+    def max(self) -> Any | None: ...
+
+    @property
+    def is_max_exact(self) -> bool | None: ...
+
+
+_ConvertAs = TypeVar("_ConvertAs", pd.DataFrame, pd.Series)
+
+
+class _PandasConvertible(_Weakrefable, Generic[_ConvertAs]):
+    def to_pandas(
+        self,
+        memory_pool: MemoryPool | None = None,
+        categories: list | tuple | None = None,
+        strings_to_categorical: bool = False,
+        zero_copy_only: bool = False,
+        integer_object_nulls: bool = False,
+        date_as_object: bool = True,
+        timestamp_as_object: bool = False,
+        use_threads: bool = True,
+        deduplicate_objects: bool = True,
+        ignore_metadata: bool = False,
+        safe: bool = True,
+        split_blocks: bool = False,
+        self_destruct: bool = False,
+        maps_as_pydicts: Literal["None", "lossy", "strict"] | None = None,
+        types_mapper: Any = None,  # Callable[[DataType], ExtensionDtype | None] | None
+        coerce_temporal_nanoseconds: bool = False,
+    ) -> _ConvertAs: ...
+
+
+_CastAs = TypeVar("_CastAs", bound=DataType)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+
+
+class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+    def as_py(self) -> list[Any]: ...
+
+    def diff(self, other: Self) -> str: ...
+
+    # Private attribute used internally (e.g., for column names in batches)
+    _name: str | None
+
+    def cast(
+        self,
+        target_type: _CastAs | str,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_CastAs]]: ...
+
+    def view(self, target_type: _CastAs) -> Array[Scalar[_CastAs]]: ...
+
+    def sum(self, **kwargs) -> _Scalar_co: ...
+
+    @property
+    def type(self: Array[Scalar[_DataTypeT]]) -> _DataTypeT: ...
+    def unique(self) -> Self: ...
+
+    def dictionary_encode(self, null_encoding: str = "mask") -> DictionaryArray: ...
+
+    def value_counts(self) -> StructArray: ...
+
+    @staticmethod
+    def from_pandas(
+        obj: pd.Series | np.ndarray | ArrayLike,
+        *,
+        mask: Mask | None = None,
+        type: _DataTypeT | None = None,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_DataTypeT]] | Array[Scalar]: ...
+
+    @staticmethod
+    def from_buffers(
+        type: _DataTypeT,
+        length: int,
+        buffers: Sequence[Buffer | None],
+        null_count: int = -1,
+        offset=0,
+        children: NullableCollection[Array[Scalar[_DataTypeT]]] | None = None,
+    ) -> Array[Scalar[_DataTypeT]]: ...
+
+    @property
+    def null_count(self) -> int: ...
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+
+    def to_string(
+        self,
+        *,
+        indent: int = 2,
+        top_level_indent: int = 0,
+        window: int = 10,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str: ...
+
+    format = to_string
+    def equals(self, other: Array | Any) -> bool: ...
+
+    def __len__(self) -> int: ...
+
+    def is_null(self, *, nan_is_null: bool = False) -> BooleanArray: ...
+
+    def is_nan(self) -> BooleanArray: ...
+
+    def is_valid(self) -> BooleanArray: ...
+
+    def fill_null(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]], fill_value: _AsPyType
+    ) -> Array[Scalar[_BasicDataType[_AsPyType]]]: ...
+
+    def __getitem__(self, key: int | builtins.slice) -> _Scalar_co | Self: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def drop_null(self) -> Self: ...
+
+    def filter(
+        self,
+        mask: Mask,
+        *,
+        null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    ) -> Self: ...
+
+    def index(
+        self: Array[_ScalarT] | Array[Scalar[_BasicDataType[_AsPyType]]],
+        value: _ScalarT | _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+
+    def sort(self, order: Order = "ascending", **kwargs) -> Self: ...
+
+    def __array__(self, dtype: np.dtype | None = None,
+                  copy: bool | None = None) -> np.ndarray: ...
+
+    def to_numpy(self, zero_copy_only: bool = True,
+                 writable: bool = False) -> np.ndarray: ...
+
+    def to_pylist(
+        self,
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[Any]: ...
+
+    tolist = to_pylist
+    def validate(self, *, full: bool = False) -> None: ...
+
+    @property
+    def offset(self) -> int: ...
+
+    def buffers(self) -> list[Buffer | None]: ...
+
+    def copy_to(self, destination: MemoryManager | Device) -> Self: ...
+
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, type: int | DataType) -> Self: ...
+
+    def __arrow_c_array__(self, requested_schema=None) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, type: DataType | int) -> Self: ...
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
+
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def __dlpack__(self, stream: int | None = None) -> Any: ...
+
+    def __dlpack_device__(self) -> tuple[int, int]: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    @property
+    def statistics(self) -> ArrayStatistics | None: ...
+
+
+class NullArray(Array[NullScalar]):
+    ...
+
+
+class BooleanArray(Array[BooleanScalar]):
+    @property
+    def false_count(self) -> int: ...
+    @property
+    def true_count(self) -> int: ...
+
+
+class NumericArray(Array[_ScalarT]):
+    ...
+
+
+class IntegerArray(NumericArray[_ScalarT]):
+    ...
+
+
+class FloatingPointArray(NumericArray[_ScalarT]):
+    ...
+
+
+class Int8Array(IntegerArray[Int8Scalar]):
+    ...
+
+
+class UInt8Array(IntegerArray[UInt8Scalar]):
+    ...
+
+
+class Int16Array(IntegerArray[Int16Scalar]):
+    ...
+
+
+class UInt16Array(IntegerArray[UInt16Scalar]):
+    ...
+
+
+class Int32Array(IntegerArray[Int32Scalar]):
+    ...
+
+
+class UInt32Array(IntegerArray[UInt32Scalar]):
+    ...
+
+
+class Int64Array(IntegerArray[Int64Scalar]):
+    ...
+
+
+class UInt64Array(IntegerArray[UInt64Scalar]):
+    ...
+
+
+class Date32Array(NumericArray[Date32Scalar]):
+    ...
+
+
+class Date64Array(NumericArray[Date64Scalar]):
+    ...
+
+
+class TimestampArray(NumericArray[TimestampScalar[_Unit, _Tz]]):
+    ...
+
+
+class Time32Array(NumericArray[Time32Scalar[_Time32Unit]]):
+    ...
+
+
+class Time64Array(NumericArray[Time64Scalar[_Time64Unit]]):
+    ...
+
+
+class DurationArray(NumericArray[DurationScalar[_Unit]]):
+    ...
+
+
+class MonthDayNanoIntervalArray(Array[MonthDayNanoIntervalScalar]):
+    ...
+
+
+class HalfFloatArray(FloatingPointArray[HalfFloatScalar]):
+    ...
+
+
+class FloatArray(FloatingPointArray[FloatScalar]):
+    ...
+
+
+class DoubleArray(FloatingPointArray[DoubleScalar]):
+    ...
+
+
+class FixedSizeBinaryArray(Array[FixedSizeBinaryScalar]):
+    ...
+
+
+class Decimal32Array(FixedSizeBinaryArray):
+    ...
+
+
+class Decimal64Array(FixedSizeBinaryArray):
+    ...
+
+
+class Decimal128Array(FixedSizeBinaryArray):
+    ...
+
+
+class Decimal256Array(FixedSizeBinaryArray):
+    ...
+
+
+class BaseListArray(Array[_ScalarT]):
+    def flatten(self, recursive: bool = False) -> Array: ...
+
+    def value_parent_indices(self) -> Int64Array: ...
+
+    def value_lengths(self) -> Int32Array: ...
+
+
+class ListArray(BaseListArray[_ScalarT]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int] | list[int | None],
+        values: Array[Scalar[_DataTypeT]] | list[int] | list[float] | list[str]
+        | list[bytes] | list,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> (ListArray[ListScalar[
+        _DataTypeT | Int64Type | Float64Type | StringType | BinaryType
+    ]] | ListArray): ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int32Array: ...
+
+
+class LargeListArray(BaseListArray[LargeListScalar[_DataTypeT]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array | list[int] | list[int | None],
+        values: Array[Scalar[_DataTypeT]] | Array,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListArray[_DataTypeT]: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int64Array: ...
+
+
+class ListViewArray(BaseListArray[ListViewScalar[_DataTypeT]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListViewArray[_DataTypeT]: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int32Array: ...
+
+    @property
+    def sizes(self) -> Int32Array: ...
+
+
+class LargeListViewArray(BaseListArray[LargeListScalar[_DataTypeT]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
+        *,
+        type: _DataTypeT | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListViewArray[_DataTypeT]: ...
+
+    @property
+    def values(self) -> Array: ...
+
+    @property
+    def offsets(self) -> Int64Array: ...
+
+    @property
+    def sizes(self) -> Int64Array: ...
+
+
+class FixedSizeListArray(BaseListArray[FixedSizeListScalar[_DataTypeT, _Size]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        values: Array[Scalar[_DataTypeT]],
+        list_size: _Size | None = None,
+        *,
+        type: DataType | None = None,
+        mask: Mask | None = None,
+    ) -> FixedSizeListArray[_DataTypeT, _Size | None]: ...
+
+    @property
+    def values(self) -> BaseListArray[ListScalar[_DataTypeT]]: ...
+
+
+_MapKeyT = TypeVar("_MapKeyT", bound=_BasicDataType)
+_MapItemT = TypeVar("_MapItemT", bound=_BasicDataType)
+
+
+class MapArray(BaseListArray[MapScalar[_MapKeyT, _MapItemT]]):
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array | list[int] | None,
+        keys: Array[Scalar[_MapKeyT]] | np.ndarray | list | None = None,
+        items: Array[Scalar[_MapItemT]] | np.ndarray | list | None = None,
+        values: Array | DataType | None = None,
+        *,
+        type: DataType | None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> MapArray[_MapKeyT, _MapItemT]: ...
+
+    @property
+    def keys(self) -> Array: ...
+
+    @property
+    def items(self) -> Array: ...
+
+
+class UnionArray(Array[UnionScalar]):
+    @deprecated("Use fields() instead")
+    def child(self, pos: int) -> Field: ...
+
+    def field(self, pos: int) -> Array: ...
+
+    @property
+    def type_codes(self) -> Int8Array: ...
+
+    @property
+    def offsets(self) -> Int32Array: ...
+
+    @staticmethod
+    def from_dense(
+        types: Int8Array,
+        value_offsets: Int32Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | list[int] | None = None,
+    ) -> UnionArray: ...
+
+    @staticmethod
+    def from_sparse(
+        types: Int8Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | list[int] | None = None,
+    ) -> UnionArray: ...
+
+
+class StringArray(Array[StringScalar]):
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray: ...
+
+
+class LargeStringArray(Array[LargeStringScalar]):
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray: ...
+
+
+class StringViewArray(Array[StringViewScalar]):
+    ...
+
+
+class BinaryArray(Array[BinaryScalar]):
+    @property
+    def total_values_length(self) -> int: ...
+
+
+class LargeBinaryArray(Array[LargeBinaryScalar]):
+    @property
+    def total_values_length(self) -> int: ...
+
+
+class BinaryViewArray(Array[BinaryViewScalar]):
+    ...
+
+
+class DictionaryArray(Array[DictionaryScalar[_IndexT, _BasicValueT]]):
+    def dictionary_encode(self) -> Self: ...  # type: ignore[override]
+    def dictionary_decode(self) -> Array[Scalar[_BasicValueT]]: ...
+
+    @property
+    def indices(self) -> Array[Scalar[_IndexT]]: ...
+    @property
+    def dictionary(self) -> Array[Scalar[_BasicValueT]]: ...
+
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        type: _BasicValueT,
+        length: int,
+        buffers: list[Buffer],
+        dictionary: Array | np.ndarray | pd.Series,
+        null_count: int = -1,
+        offset: int = 0,
+    ) -> DictionaryArray[Any, _BasicValueT]: ...
+
+    @staticmethod
+    def from_arrays(
+        indices: Indices | Sequence[int | None],
+        dictionary: Array | np.ndarray | pd.Series | list[Any],
+        mask: np.ndarray | pd.Series | BooleanArray | None = None,
+        ordered: bool = False,
+        from_pandas: bool = False,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> DictionaryArray: ...
+
+
+class StructArray(Array[StructScalar]):
+    def field(self, index: int | str) -> Array: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]: ...
+
+    @staticmethod
+    def from_arrays(
+        arrays: Iterable[Array | np.ndarray | list],
+        names: Sequence[str] | list[Field] | None = None,
+        fields: list[Field] | None = None,
+        mask=None,
+        memory_pool: MemoryPool | None = None,
+        type: StructType | None = None,
+    ) -> StructArray: ...
+
+    def sort(self, order: Order = "ascending", by: str |
+             None = None, **kwargs) -> StructArray: ...
+
+
+class RunEndEncodedArray(Array[RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
+    @staticmethod
+    def from_arrays(
+        run_ends: Int16Array | Int32Array | Int64Array | list[int],
+        values: Array | list[Any], type: DataType | None = None,
+    ) -> RunEndEncodedArray[Any, _BasicValueT]: ...
+
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        type: DataType,
+        length: int,
+        buffers: list[Buffer] | list[None],
+        null_count: int = -1,
+        offset=0,
+        children: tuple[Array, Array] | list[list[int]] | None = None,
+    ) -> RunEndEncodedArray[Any, _BasicValueT]: ...
+
+    @property
+    def run_ends(self) -> Array[Scalar[_RunEndType]]: ...
+
+    @property
+    def values(self) -> Array[Scalar[_BasicValueT]]: ...
+
+    def find_physical_offset(self) -> int: ...
+
+    def find_physical_length(self) -> int: ...
+
+
+_ArrayT = TypeVar("_ArrayT", bound=Array)
+
+
+class ExtensionArray(Array[ExtensionScalar], Generic[_ArrayT]):
+    @property
+    def storage(self) -> Any: ...
+
+    @staticmethod
+    def from_storage(typ: BaseExtensionType,
+                     storage: _ArrayT) -> ExtensionArray[_ArrayT]: ...
+
+
+class JsonArray(ExtensionArray[_ArrayT]):
+    ...
+
+
+class UuidArray(ExtensionArray[_ArrayT]):
+    ...
+
+
+class FixedShapeTensorArray(ExtensionArray[_ArrayT]):
+    def to_numpy_ndarray(self) -> np.ndarray: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    @classmethod
+    def from_numpy_ndarray(
+        cls, obj: np.ndarray,
+        dim_names: list[str] | tuple[str, ...] | None = None
+    ) -> Self: ...
+
+
+class OpaqueArray(ExtensionArray[_ArrayT]):
+    ...
+
+
+class Bool8Array(ExtensionArray):
+    def to_numpy(self, zero_copy_only: bool = ...,
+                 writable: bool = ...) -> np.ndarray: ...
+
+    @classmethod
+    def from_storage(cls, storage: Int8Array) -> Self: ...  # type: ignore[override]
+
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray) -> Self: ...
+
+
+def concat_arrays(arrays: Iterable[_ArrayT],
+                  memory_pool: MemoryPool | None = None) -> _ArrayT: ...
+
+
+def _empty_array(type: _DataTypeT) -> Array[Scalar[_DataTypeT]]: ...
+
+
+__all__ = [
+    "array",
+    "asarray",
+    "nulls",
+    "repeat",
+    "infer_type",
+    "_PandasConvertible",
+    "Array",
+    "NullArray",
+    "BooleanArray",
+    "NumericArray",
+    "IntegerArray",
+    "FloatingPointArray",
+    "Int8Array",
+    "UInt8Array",
+    "Int16Array",
+    "UInt16Array",
+    "Int32Array",
+    "UInt32Array",
+    "Int64Array",
+    "UInt64Array",
+    "Date32Array",
+    "Date64Array",
+    "TimestampArray",
+    "Time32Array",
+    "Time64Array",
+    "DurationArray",
+    "MonthDayNanoIntervalArray",
+    "HalfFloatArray",
+    "FloatArray",
+    "DoubleArray",
+    "FixedSizeBinaryArray",
+    "Decimal32Array",
+    "Decimal64Array",
+    "Decimal128Array",
+    "Decimal256Array",
+    "BaseListArray",
+    "ListArray",
+    "LargeListArray",
+    "ListViewArray",
+    "LargeListViewArray",
+    "FixedSizeListArray",
+    "MapArray",
+    "UnionArray",
+    "StringArray",
+    "LargeStringArray",
+    "StringViewArray",
+    "BinaryArray",
+    "LargeBinaryArray",
+    "BinaryViewArray",
+    "DictionaryArray",
+    "StructArray",
+    "RunEndEncodedArray",
+    "ExtensionArray",
+    "Bool8Array",
+    "UuidArray",
+    "JsonArray",
+    "OpaqueArray",
+    "FixedShapeTensorArray",
+    "concat_arrays",
+    "_empty_array",
+    "_CastAs",
+]
diff --git a/python/pyarrow-stubs/pyarrow/builder.pyi b/python/pyarrow-stubs/pyarrow/builder.pyi
new file mode 100644
index 00000000000..9001d9835b6
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/builder.pyi
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Iterable
+
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .array import StringArray, StringViewArray
+
+
+class StringBuilder(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | float | None): ...
+
+    def append_values(self, values: Iterable[str | bytes | float | None]): ...
+
+    def finish(self) -> StringArray: ...
+
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+
+class StringViewBuilder(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | float | None): ...
+
+    def append_values(self, values: Iterable[str | bytes | float | None]): ...
+
+    def finish(self) -> StringViewArray: ...
+
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+
+__all__ = ["StringBuilder", "StringViewBuilder"]
diff --git a/python/pyarrow-stubs/pyarrow/cffi.pyi b/python/pyarrow-stubs/pyarrow/cffi.pyi
new file mode 100644
index 00000000000..e4f077d7155
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/cffi.pyi
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import cffi
+
+c_source: str
+ffi: cffi.FFI
diff --git a/python/pyarrow-stubs/pyarrow/compat.pyi b/python/pyarrow-stubs/pyarrow/compat.pyi
new file mode 100644
index 00000000000..30e3ec13e0d
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/compat.pyi
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+def encode_file_path(path: str | bytes) -> bytes: ...
+def tobytes(o: str | bytes) -> bytes: ...
+def frombytes(o: bytes, *, safe: bool = False): ...
+
+
+__all__ = ["encode_file_path", "tobytes", "frombytes"]
diff --git a/python/pyarrow-stubs/pyarrow/compute.pyi b/python/pyarrow-stubs/pyarrow/compute.pyi
new file mode 100644
index 00000000000..809bccd1b92
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/compute.pyi
@@ -0,0 +1,1834 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Callable, Hashable, Iterable, Sequence, Mapping
+from typing import Literal, TypeAlias, TypeVar, Any, ParamSpec
+
+import numpy as np
+
+# Option classes
+from pyarrow._compute import ArraySortOptions as ArraySortOptions
+from pyarrow._compute import AssumeTimezoneOptions as AssumeTimezoneOptions
+from pyarrow._compute import CastOptions as CastOptions
+from pyarrow._compute import CountOptions as CountOptions
+from pyarrow._compute import CumulativeOptions as CumulativeOptions  # noqa: F401
+from pyarrow._compute import CumulativeSumOptions as CumulativeSumOptions
+from pyarrow._compute import DayOfWeekOptions as DayOfWeekOptions
+from pyarrow._compute import (  # noqa: F401
+    DictionaryEncodeOptions as DictionaryEncodeOptions)
+from pyarrow._compute import ElementWiseAggregateOptions as ElementWiseAggregateOptions
+
+# Expressions
+from pyarrow._compute import Expression as Expression
+from pyarrow._compute import ExtractRegexOptions as ExtractRegexOptions
+from pyarrow._compute import (  # noqa: F401
+    ExtractRegexSpanOptions as ExtractRegexSpanOptions)
+from pyarrow._compute import FilterOptions as FilterOptions
+from pyarrow._compute import FunctionOptions as FunctionOptions  # noqa: F401
+from pyarrow._compute import IndexOptions as IndexOptions  # noqa: F401
+from pyarrow._compute import JoinOptions as JoinOptions  # noqa: F401
+from pyarrow._compute import ListFlattenOptions as ListFlattenOptions
+from pyarrow._compute import ListSliceOptions as ListSliceOptions
+from pyarrow._compute import MakeStructOptions as MakeStructOptions
+from pyarrow._compute import MapLookupOptions as MapLookupOptions
+from pyarrow._compute import MatchSubstringOptions as MatchSubstringOptions
+from pyarrow._compute import ModeOptions as ModeOptions
+from pyarrow._compute import NullOptions as NullOptions
+from pyarrow._compute import PadOptions as PadOptions
+from pyarrow._compute import PairwiseOptions as PairwiseOptions
+from pyarrow._compute import PartitionNthOptions as PartitionNthOptions
+from pyarrow._compute import PivotWiderOptions as PivotWiderOptions
+from pyarrow._compute import QuantileOptions as QuantileOptions
+from pyarrow._compute import RandomOptions as RandomOptions
+from pyarrow._compute import RankOptions as RankOptions
+from pyarrow._compute import RankQuantileOptions as RankQuantileOptions
+from pyarrow._compute import ReplaceSliceOptions as ReplaceSliceOptions
+from pyarrow._compute import ReplaceSubstringOptions as ReplaceSubstringOptions
+from pyarrow._compute import RoundBinaryOptions as RoundBinaryOptions
+from pyarrow._compute import RoundOptions as RoundOptions
+from pyarrow._compute import RoundTemporalOptions as RoundTemporalOptions
+from pyarrow._compute import RoundToMultipleOptions as RoundToMultipleOptions
+from pyarrow._compute import RunEndEncodeOptions as RunEndEncodeOptions
+from pyarrow._compute import ScalarAggregateOptions as ScalarAggregateOptions
+from pyarrow._compute import SelectKOptions as SelectKOptions
+from pyarrow._compute import SetLookupOptions as SetLookupOptions
+from pyarrow._compute import SkewOptions as SkewOptions
+from pyarrow._compute import SliceOptions as SliceOptions
+from pyarrow._compute import SortOptions as SortOptions
+from pyarrow._compute import SplitOptions as SplitOptions
+from pyarrow._compute import SplitPatternOptions as SplitPatternOptions  # noqa: F401
+from pyarrow._compute import StrftimeOptions as StrftimeOptions
+from pyarrow._compute import StrptimeOptions as StrptimeOptions
+from pyarrow._compute import StructFieldOptions as StructFieldOptions
+from pyarrow._compute import TakeOptions as TakeOptions
+from pyarrow._compute import TDigestOptions as TDigestOptions
+from pyarrow._compute import TrimOptions as TrimOptions
+from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions
+from pyarrow._compute import VarianceOptions as VarianceOptions
+from pyarrow._compute import WeekOptions as WeekOptions
+from pyarrow._compute import WinsorizeOptions as WinsorizeOptions
+from pyarrow._compute import ZeroFillOptions as ZeroFillOptions
+
+# Functions
+from pyarrow._compute import call_function as call_function  # noqa: F401
+from pyarrow._compute import (  # noqa: F401
+    call_tabular_function as call_tabular_function)
+from pyarrow._compute import get_function as get_function  # noqa: F401
+from pyarrow._compute import list_functions as list_functions  # noqa: F401
+from pyarrow._compute import (  # noqa: F401
+    register_scalar_function as register_scalar_function)
+from pyarrow._compute import (  # noqa: F401
+    register_aggregate_function as register_aggregate_function)
+from pyarrow._compute import (  # noqa: F401
+    register_vector_function as register_vector_function)
+from pyarrow._compute import (  # noqa: F401
+    register_tabular_function as register_tabular_function)
+
+# Function and Kernel classes
+from pyarrow._compute import Function as Function  # noqa: F401
+from pyarrow._compute import Kernel as Kernel  # noqa: F401
+from pyarrow._compute import ScalarFunction as ScalarFunction  # noqa: F401
+from pyarrow._compute import ScalarKernel as ScalarKernel  # noqa: F401
+from pyarrow._compute import VectorFunction as VectorFunction  # noqa: F401
+from pyarrow._compute import VectorKernel as VectorKernel  # noqa: F401
+from pyarrow._compute import (  # noqa: F401
+    ScalarAggregateFunction as ScalarAggregateFunction)
+from pyarrow._compute import (  # noqa: F401
+    ScalarAggregateKernel as ScalarAggregateKernel)
+from pyarrow._compute import (  # noqa: F401
+    HashAggregateFunction as HashAggregateFunction)
+from pyarrow._compute import HashAggregateKernel as HashAggregateKernel  # noqa: F401
+
+# Udf
+
+from pyarrow._compute import _Order, _Placement
+from pyarrow._stubs_typing import ArrayLike, ScalarLike, PyScalar, TimeUnit
+from pyarrow._types import _RunEndType
+from . import lib
+
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+
+
+class _ExprComparable(Expression):
+    def __ge__(self, other: Any) -> Expression: ...
+    def __le__(self, other: Any) -> Expression: ...
+    def __gt__(self, other: Any) -> Expression: ...
+    def __lt__(self, other: Any) -> Expression: ...
+
+
+def field(*name_or_index: str | bytes | tuple[str | int, ...] | int) -> Expression: ...
+def __ge__(self, other: Any) -> Expression: ...
+
+
+def scalar(value: PyScalar | lib.Scalar[Any] | Mapping | lib.int64()) -> Expression: ...
+
+
+def _clone_signature(f: Callable[_P, _R]) -> Callable[_P, _R]: ...
+
+
+# ============= compute functions =============
+_DataTypeT = TypeVar("_DataTypeT", bound=lib.DataType)
+_Scalar_CoT = TypeVar("_Scalar_CoT", bound=lib.Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=lib.Scalar)
+_ArrayT = TypeVar("_ArrayT", bound=lib.Array | lib.ChunkedArray)
+_ScalarOrArrayT = TypeVar("_ScalarOrArrayT", bound=lib.Array |
+                          lib.Scalar | lib.ChunkedArray)
+ArrayOrChunkedArray: TypeAlias = lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]
+ScalarOrArray: TypeAlias = ArrayOrChunkedArray[_Scalar_CoT] | _Scalar_CoT
+
+SignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.Int8Type]
+    | lib.Scalar[lib.Int16Type]
+    | lib.Scalar[lib.Int32Type]
+    | lib.Scalar[lib.Int64Type]
+)
+UnsignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.UInt8Type]
+    | lib.Scalar[lib.UInt16Type]
+    | lib.Scalar[lib.UInt32Type]
+    | lib.Scalar[lib.UInt64Type]
+)
+IntegerScalar: TypeAlias = SignedIntegerScalar | UnsignedIntegerScalar
+FloatScalar: TypeAlias = (lib.Scalar[lib.Float16Type] | lib.Scalar[lib.Float32Type]
+                          | lib.Scalar[lib.Float64Type])
+DecimalScalar: TypeAlias = (
+    lib.Scalar[lib.Decimal32Type]
+    | lib.Scalar[lib.Decimal64Type]
+    | lib.Scalar[lib.Decimal128Type]
+    | lib.Scalar[lib.Decimal256Type]
+)
+NonFloatNumericScalar: TypeAlias = IntegerScalar | DecimalScalar
+NumericScalar: TypeAlias = IntegerScalar | FloatScalar | DecimalScalar
+BinaryScalar: TypeAlias = (
+    lib.Scalar[lib.BinaryType]
+    | lib.Scalar[lib.LargeBinaryType]
+    | lib.Scalar[lib.FixedSizeBinaryType]
+)
+StringScalar: TypeAlias = lib.Scalar[lib.StringType] | lib.Scalar[lib.LargeStringType]
+StringOrBinaryScalar: TypeAlias = StringScalar | BinaryScalar
+_ListScalar: TypeAlias = (
+    lib.ListViewScalar[_DataTypeT] | lib.FixedSizeListScalar[_DataTypeT, Any]
+)
+_LargeListScalar: TypeAlias = (
+    lib.LargeListScalar[_DataTypeT] | lib.LargeListViewScalar[_DataTypeT]
+)
+ListScalar: TypeAlias = (
+    lib.ListScalar[_DataTypeT] | _ListScalar[_DataTypeT] | _LargeListScalar[_DataTypeT]
+)
+TemporalScalar: TypeAlias = (
+    lib.Date32Scalar
+    | lib.Date64Scalar
+    | lib.Time32Scalar[Any]
+    | lib.Time64Scalar[Any]
+    | lib.TimestampScalar[Any]
+    | lib.DurationScalar[Any]
+    | lib.MonthDayNanoIntervalScalar
+)
+NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
+NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
+
+_NumericOrTemporalScalarT = TypeVar(
+    "_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar)
+_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
+NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT]
+_NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray)
+_NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar)
+NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar]
+_NumericOrDurationArrayT = TypeVar(
+    "_NumericOrDurationArrayT", bound=NumericOrDurationArray)
+NumericOrTemporalArray: TypeAlias = ArrayOrChunkedArray[_NumericOrTemporalScalarT]
+_NumericOrTemporalArrayT = TypeVar(
+    "_NumericOrTemporalArrayT", bound=NumericOrTemporalArray)
+BooleanArray: TypeAlias = ArrayOrChunkedArray[lib.BooleanScalar]
+_BooleanArrayT = TypeVar("_BooleanArrayT", bound=BooleanArray)
+IntegerArray: TypeAlias = ArrayOrChunkedArray[IntegerScalar]
+_FloatScalarT = TypeVar("_FloatScalarT", bound=FloatScalar)
+FloatArray: TypeAlias = ArrayOrChunkedArray[FloatScalar]
+_FloatArrayT = TypeVar("_FloatArrayT", bound=FloatArray)
+_StringScalarT = TypeVar("_StringScalarT", bound=StringScalar)
+StringArray: TypeAlias = ArrayOrChunkedArray[StringScalar]
+_StringArrayT = TypeVar("_StringArrayT", bound=StringArray)
+_BinaryScalarT = TypeVar("_BinaryScalarT", bound=BinaryScalar)
+BinaryArray: TypeAlias = ArrayOrChunkedArray[BinaryScalar]
+_BinaryArrayT = TypeVar("_BinaryArrayT", bound=BinaryArray)
+_StringOrBinaryScalarT = TypeVar("_StringOrBinaryScalarT", bound=StringOrBinaryScalar)
+StringOrBinaryArray: TypeAlias = StringArray | BinaryArray
+_StringOrBinaryArrayT = TypeVar("_StringOrBinaryArrayT", bound=StringOrBinaryArray)
+_TemporalScalarT = TypeVar("_TemporalScalarT", bound=TemporalScalar)
+TemporalArray: TypeAlias = ArrayOrChunkedArray[TemporalScalar]
+_TemporalArrayT = TypeVar("_TemporalArrayT", bound=TemporalArray)
+_ListArray: TypeAlias = ArrayOrChunkedArray[_ListScalar[_DataTypeT]]
+_LargeListArray: TypeAlias = ArrayOrChunkedArray[_LargeListScalar[_DataTypeT]]
+ListArray: TypeAlias = ArrayOrChunkedArray[ListScalar[_DataTypeT]]
+
+# =============================== 1. Aggregation ===============================
+
+
+def array_take(
+    array: _ArrayT | lib.Scalar | lib.Table | Expression,
+    indices: list[int]
+    | list[int | None]
+    | lib.Int16Array
+    | lib.Int32Array
+    | lib.Int64Array
+    | lib.UInt64Array
+    | lib.ChunkedArray[lib.Int16Scalar]
+    | lib.ChunkedArray[lib.Int32Scalar]
+    | lib.ChunkedArray[lib.Int64Scalar]
+    | np.ndarray
+    | Expression,
+    /,
+    *,
+    boundscheck: bool | None = None,
+    options: TakeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT | Expression: ...
+
+
+# ========================= 1.1 functions =========================
+
+
+def all(
+    array: lib.BooleanScalar | BooleanArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+
+
+any = _clone_signature(all)
+
+
+def approximate_median(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+
+
+def count(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+
+
+def count_distinct(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+
+
+def first(
+    array: lib.Array[_ScalarT] | lib.ChunkedArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT: ...
+
+last = _clone_signature(first)
+
+def first_last(
+    array: lib.Array[Any] | lib.ChunkedArray[Any] | list[Any],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | Mapping[Any, Any] | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar: ...
+
+
+def index(
+    data: lib.Array[Any] | lib.ChunkedArray[Any],
+    value: ScalarLike,
+    start: int | None = None,
+    end: int | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+
+
+last = _clone_signature(first)
+max = _clone_signature(first)
+min = _clone_signature(first)
+min_max = _clone_signature(first_last)
+
+
+def mean(
+    array: FloatScalar | FloatArray
+    | lib.NumericArray[lib.Scalar[Any]]
+    | lib.ChunkedArray[lib.Scalar[Any]]
+    | lib.Scalar[Any],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Scalar[Any]: ...
+
+
+def mode(
+    array: NumericScalar | NumericArray,
+    /,
+    n: int = 1,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: ModeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray: ...
+
+
+def product(
+    array: _ScalarT | lib.NumericArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT: ...
+
+
+def quantile(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float | Sequence[float] = 0.5,
+    *,
+    interpolation: Literal["linear", "lower",
+                           "higher", "nearest", "midpoint"] = "linear",
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: QuantileOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+
+
+def stddev(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    ddof: float = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+
+
+def sum(
+    array: _NumericScalarT | NumericArray[_NumericScalarT] | lib.Expression,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | lib.Expression: ...
+
+
+def tdigest(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float | Sequence[float] = 0.5,
+    *,
+    delta: int = 100,
+    buffer_size: int = 500,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: TDigestOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+
+
+def variance(
+    array: NumericScalar | NumericArray | ArrayLike,
+    /,
+    *,
+    ddof: int = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+
+
+def winsorize(
+    array: _NumericArrayT,
+    /,
+    lower_limit: float = 0.0,
+    upper_limit: float = 1.0,
+    *,
+    options: WinsorizeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+
+
+def skew(
+    array: NumericScalar | NumericArray | ArrayLike,
+    /,
+    *,
+    skip_nulls: bool = True,
+    biased: bool = True,
+    min_count: int = 0,
+    options: SkewOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+
+
+def kurtosis(
+    array: NumericScalar | NumericArray | ArrayLike,
+    /,
+    *,
+    skip_nulls: bool = True,
+    biased: bool = True,
+    min_count: int = 0,
+    options: SkewOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+
+
+def top_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array: ...
+
+
+def bottom_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array: ...
+
+
+# ========================= 2. Element-wise (“scalar”) functions =========
+
+# ========================= 2.1 Arithmetic =========================
+def abs(x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *,
+        memory_pool: lib.MemoryPool | None = None) -> (
+    _NumericOrDurationT | _NumericOrDurationArrayT | Expression): ...
+
+
+abs_checked = _clone_signature(abs)
+
+
+def add(
+    x: (_NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT
+        | ArrayLike | int | Expression),
+    y: (_NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT
+        | ArrayLike | int | Expression),
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression: ...
+
+
+add_checked = _clone_signature(add)
+
+
+def divide(
+    x: (_NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT
+        | Expression),
+    y: (_NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT
+        | Expression),
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression: ...
+
+
+divide_checked = _clone_signature(divide)
+
+
+def exp(
+    exponent: _FloatArrayT | ArrayOrChunkedArray[NonFloatNumericScalar] | _FloatScalarT
+    | NonFloatNumericScalar | lib.DoubleScalar | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> (
+    _FloatArrayT | lib.DoubleArray | _FloatScalarT | lib.DoubleScalar | Expression): ...
+
+
+expm1 = _clone_signature(exp)
+multiply = _clone_signature(add)
+multiply_checked = _clone_signature(add)
+
+
+def negate(
+    x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None) -> (
+    _NumericOrDurationT | _NumericOrDurationArrayT | Expression): ...
+
+
+negate_checked = _clone_signature(negate)
+
+
+def power(
+    base: _NumericScalarT | Expression | _NumericArrayT | NumericScalar,
+    exponent: _NumericScalarT | Expression | _NumericArrayT | NumericScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+power_checked = _clone_signature(power)
+
+
+def sign(
+    x: NumericOrDurationArray | NumericOrDurationScalar | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.NumericArray[lib.Int8Scalar]
+    | lib.NumericArray[lib.FloatScalar]
+    | lib.NumericArray[lib.DoubleScalar]
+    | lib.Int8Scalar | lib.FloatScalar | lib.DoubleScalar | Expression
+): ...
+
+
+def sqrt(
+    x: NumericArray | NumericScalar | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None) -> (
+    FloatArray | FloatScalar | Expression): ...
+
+
+sqrt_checked = _clone_signature(sqrt)
+
+subtract = _clone_signature(add)
+subtract_checked = _clone_signature(add)
+
+# ========================= 2.1 Bit-wise functions =========================
+
+
+def bit_wise_and(
+    x: _NumericScalarT | _NumericArrayT | NumericScalar | Expression
+    | ArrayOrChunkedArray[NumericScalar],
+    y: _NumericScalarT | _NumericArrayT | NumericScalar | Expression
+    | ArrayOrChunkedArray[NumericScalar],
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+def bit_wise_not(
+    x: _NumericScalarT | _NumericArrayT | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+bit_wise_or = _clone_signature(bit_wise_and)
+bit_wise_xor = _clone_signature(bit_wise_and)
+shift_left = _clone_signature(bit_wise_and)
+shift_left_checked = _clone_signature(bit_wise_and)
+shift_right = _clone_signature(bit_wise_and)
+shift_right_checked = _clone_signature(bit_wise_and)
+
+# ========================= 2.2 Rounding functions =========================
+
+
+def ceil(
+    x: _FloatScalarT | _FloatArrayT | Expression, /, *, memory_pool: lib.MemoryPool |
+    None = None) -> _FloatScalarT | _FloatArrayT | Expression: ...
+
+
+floor = _clone_signature(ceil)
+
+
+def round(
+    x: _NumericScalarT | _NumericArrayT | Expression | list,
+    /,
+    ndigits: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+def round_to_multiple(
+    x: _NumericScalarT | _NumericArrayT | list | Expression,
+    /,
+    multiple: int | float | NumericScalar = 1.0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundToMultipleOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
+
+def round_binary(
+    x: _NumericScalarT | _NumericArrayT | float | list | Expression,
+    s: lib.Int8Scalar
+    | lib.Int16Scalar
+    | lib.Int32Scalar
+    | lib.Int64Scalar
+    | lib.Scalar
+    | Iterable
+    | float
+    | Expression,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> (
+    _NumericScalarT | lib.NumericArray[_NumericScalarT] | _NumericArrayT
+    | Expression): ...
+
+
+trunc = _clone_signature(ceil)
+
+# ========================= 2.3 Logarithmic functions =========================
+
+
+def ln(
+    x: FloatScalar | FloatArray | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar]
+    | lib.NumericArray[lib.DoubleScalar] | Expression): ...
+
+
+ln_checked = _clone_signature(ln)
+log10 = _clone_signature(ln)
+log10_checked = _clone_signature(ln)
+log1p = _clone_signature(ln)
+log1p_checked = _clone_signature(ln)
+log2 = _clone_signature(ln)
+log2_checked = _clone_signature(ln)
+
+
+def logb(
+    x: FloatScalar | FloatArray | Expression | Any,
+    b: FloatScalar | FloatArray | Expression | Any,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar]
+    | lib.NumericArray[lib.DoubleScalar] | Expression | Any): ...
+
+
+logb_checked = _clone_signature(logb)
+
+# ========================= 2.4 Trigonometric functions =========================
+acos = _clone_signature(ln)
+acos_checked = _clone_signature(ln)
+acosh = _clone_signature(ln)
+acosh_checked = _clone_signature(ln)
+asin = _clone_signature(ln)
+asin_checked = _clone_signature(ln)
+asinh = _clone_signature(ln)
+atan = _clone_signature(ln)
+atanh_checked = _clone_signature(ln)
+atanh = _clone_signature(ln)
+cos = _clone_signature(ln)
+cos_checked = _clone_signature(ln)
+cosh = _clone_signature(ln)
+sin = _clone_signature(ln)
+sin_checked = _clone_signature(ln)
+sinh = _clone_signature(ln)
+tan = _clone_signature(ln)
+tan_checked = _clone_signature(ln)
+tanh = _clone_signature(ln)
+
+
+def atan2(
+    y: FloatScalar | FloatArray | Expression | Any,
+    x: FloatScalar | FloatArray | Expression | Any,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar]
+    | lib.NumericArray[lib.DoubleScalar] | Expression): ...
+
+
+# ========================= 2.5 Comparisons functions =========================
+def equal(
+    x: lib.Scalar | lib.Array | lib.ChunkedArray | list | Expression | Any,
+    y: lib.Scalar | lib.Array | lib.ChunkedArray | list | Expression | Any,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+greater = _clone_signature(equal)
+greater_equal = _clone_signature(equal)
+less = _clone_signature(equal)
+less_equal = _clone_signature(equal)
+not_equal = _clone_signature(equal)
+
+
+def max_element_wise(
+    *args: ScalarOrArray[_Scalar_CoT] | Expression | ScalarLike | ArrayLike,
+    skip_nulls: bool = True,
+    options: ElementWiseAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _Scalar_CoT | Expression | lib.Scalar | lib.Array: ...
+
+
+min_element_wise = _clone_signature(max_element_wise)
+
+# ========================= 2.6 Logical functions =========================
+
+
+def and_(
+    x: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar],
+    y: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar],
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.BooleanScalar | lib.BooleanArray | Expression
+    | ScalarOrArray[lib.BooleanScalar]): ...
+
+
+and_kleene = _clone_signature(and_)
+and_not = _clone_signature(and_)
+and_not_kleene = _clone_signature(and_)
+or_ = _clone_signature(and_)
+or_kleene = _clone_signature(and_)
+xor = _clone_signature(and_)
+
+
+def invert(
+    x: lib.BooleanScalar | _BooleanArrayT | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | _BooleanArrayT | Expression: ...
+
+
+# ========================= 2.10 String predicates =========================
+def ascii_is_alnum(
+    strings: StringScalar | StringArray | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+ascii_is_alpha = _clone_signature(ascii_is_alnum)
+ascii_is_decimal = _clone_signature(ascii_is_alnum)
+ascii_is_lower = _clone_signature(ascii_is_alnum)
+ascii_is_printable = _clone_signature(ascii_is_alnum)
+ascii_is_space = _clone_signature(ascii_is_alnum)
+ascii_is_upper = _clone_signature(ascii_is_alnum)
+utf8_is_alnum = _clone_signature(ascii_is_alnum)
+utf8_is_alpha = _clone_signature(ascii_is_alnum)
+utf8_is_decimal = _clone_signature(ascii_is_alnum)
+utf8_is_digit = _clone_signature(ascii_is_alnum)
+utf8_is_lower = _clone_signature(ascii_is_alnum)
+utf8_is_numeric = _clone_signature(ascii_is_alnum)
+utf8_is_printable = _clone_signature(ascii_is_alnum)
+utf8_is_space = _clone_signature(ascii_is_alnum)
+utf8_is_upper = _clone_signature(ascii_is_alnum)
+ascii_is_title = _clone_signature(ascii_is_alnum)
+utf8_is_title = _clone_signature(ascii_is_alnum)
+string_is_ascii = _clone_signature(ascii_is_alnum)
+
+# ========================= 2.11 String transforms =========================
+
+
+def ascii_capitalize(
+    strings: _StringScalarT | _StringArrayT | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+ascii_lower = _clone_signature(ascii_capitalize)
+ascii_reverse = _clone_signature(ascii_capitalize)
+ascii_swapcase = _clone_signature(ascii_capitalize)
+ascii_title = _clone_signature(ascii_capitalize)
+ascii_upper = _clone_signature(ascii_capitalize)
+
+
+def binary_length(
+    strings: ScalarOrArray[StringOrBinaryScalar] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> (
+    lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array
+    | Expression
+): ...
+
+
+def binary_repeat(
+    strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
+    num_repeats: int | list[int] | list[int | None],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> (
+    _StringOrBinaryScalarT | lib.Array[_StringOrBinaryScalarT] | _StringOrBinaryArrayT
+    | Expression): ...
+
+
+def binary_replace_slice(
+    strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression: ...
+
+
+def binary_reverse(
+    strings: _BinaryScalarT | _BinaryArrayT | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> _BinaryScalarT | _BinaryArrayT | Expression: ...
+
+
+def replace_substring(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    pattern: str | bytes,
+    replacement: str | bytes,
+    *,
+    max_replacements: int | None = None,
+    options: ReplaceSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+replace_substring_regex = _clone_signature(replace_substring)
+
+
+def utf8_capitalize(
+    strings: _StringScalarT | _StringArrayT | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+def utf8_length(
+    strings: lib.StringScalar | lib.LargeStringScalar | lib.StringArray
+    | lib.ChunkedArray[lib.StringScalar] | lib.LargeStringArray
+    | lib.ChunkedArray[lib.LargeStringScalar] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array
+    | Expression): ...
+
+
+utf8_lower = _clone_signature(utf8_capitalize)
+
+
+def utf8_replace_slice(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+utf8_reverse = _clone_signature(utf8_capitalize)
+utf8_swapcase = _clone_signature(utf8_capitalize)
+utf8_title = _clone_signature(utf8_capitalize)
+utf8_upper = _clone_signature(utf8_capitalize)
+
+# ========================= 2.12 String padding =========================
+
+
+def ascii_center(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    width: int | None = None,
+    padding: str = " ",
+    lean_left_on_odd_padding: bool = True,
+    *,
+    options: PadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+ascii_lpad = _clone_signature(ascii_center)
+ascii_rpad = _clone_signature(ascii_center)
+utf8_center = _clone_signature(ascii_center)
+utf8_lpad = _clone_signature(ascii_center)
+utf8_rpad = _clone_signature(ascii_center)
+
+
+def utf8_zero_fill(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    width: int | None = None,
+    padding: str = "0",
+    *,
+    options: ZeroFillOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+utf8_zfill = utf8_zero_fill
+
+# ========================= 2.13 String trimming =========================
+
+
+def ascii_ltrim(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    characters: str,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+ascii_rtrim = _clone_signature(ascii_ltrim)
+ascii_trim = _clone_signature(ascii_ltrim)
+utf8_ltrim = _clone_signature(ascii_ltrim)
+utf8_rtrim = _clone_signature(ascii_ltrim)
+utf8_trim = _clone_signature(ascii_ltrim)
+
+
+def ascii_ltrim_whitespace(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+ascii_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+ascii_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+utf8_ltrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+utf8_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+utf8_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+
+# ========================= 2.14 String splitting =========================
+
+
+def ascii_split_whitespace(
+    strings: _StringScalarT | lib.Array[lib.Scalar[_DataTypeT]] | Expression,
+    /,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> (
+    lib.ListArray[_StringScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]]
+    | Expression): ...
+
+
+def split_pattern(
+    strings: _StringOrBinaryScalarT | lib.Array[lib.Scalar[_DataTypeT]] | Expression,
+    /,
+    pattern: str,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> (
+    lib.ListArray[_StringOrBinaryScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]]
+    | Expression): ...
+
+
+split_pattern_regex = _clone_signature(split_pattern)
+utf8_split_whitespace = _clone_signature(ascii_split_whitespace)
+
+# ========================= 2.15 String component extraction =========================
+
+
+def extract_regex(
+    strings: StringOrBinaryScalar | StringOrBinaryArray | Expression,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar | lib.StructArray | Expression: ...
+
+
+extract_regex_span = _clone_signature(extract_regex)
+
+
+# ========================= 2.16 String join =========================
+def binary_join(
+    strings, separator, /, *, memory_pool: lib.MemoryPool | None = None
+) -> StringScalar | StringArray: ...
+
+
+def binary_join_element_wise(
+    *strings: str
+    | bytes
+    | _StringOrBinaryScalarT
+    | _StringOrBinaryArrayT
+    | Expression
+    | list,
+    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
+    null_replacement: str = "",
+    options: JoinOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression: ...
+
+
+# ========================= 2.17 String Slicing =========================
+def binary_slice(
+    strings: _BinaryScalarT | _BinaryArrayT | Expression | lib.Scalar,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _BinaryScalarT | _BinaryArrayT | Expression: ...
+
+
+def utf8_slice_codeunits(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+def utf8_normalize(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    form: Literal["NFC", "NFKC", "NFD", "NFKD"] = "NFC",
+    *,
+    options: Utf8NormalizeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
+
+# ========================= 2.18 Containment tests =========================
+def count_substring(
+    strings: lib.StringScalar | lib.BinaryScalar | lib.LargeStringScalar
+    | lib.LargeBinaryScalar | lib.StringArray | lib.BinaryArray
+    | lib.ChunkedArray[lib.StringScalar] | lib.ChunkedArray[lib.BinaryScalar]
+    | lib.LargeStringArray | lib.LargeBinaryArray
+    | lib.ChunkedArray[lib.LargeStringScalar] | lib.ChunkedArray[lib.LargeBinaryScalar]
+    | Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> (
+    lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array
+    | Expression): ...
+
+
+count_substring_regex = _clone_signature(count_substring)
+
+
+def ends_with(
+    strings: StringScalar | BinaryScalar | StringArray | BinaryArray | Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+find_substring = _clone_signature(count_substring)
+find_substring_regex = _clone_signature(count_substring)
+
+
+def index_in(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray | Expression,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar | lib.Int32Array | Expression: ...
+
+def index_in_meta_binary(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    value_set: lib.Array | lib.ChunkedArray | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar | lib.Int32Array | Expression: ...
+
+def is_in(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray | Expression,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+is_in_meta_binary = _clone_signature(index_in_meta_binary)
+match_like = _clone_signature(ends_with)
+match_substring = _clone_signature(ends_with)
+match_substring_regex = _clone_signature(ends_with)
+starts_with = _clone_signature(ends_with)
+
+# ========================= 2.19 Categorizations =========================
+
+
+def is_finite(
+    values: NumericScalar | lib.NullScalar | NumericArray | lib.NullArray | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+is_inf = _clone_signature(is_finite)
+is_nan = _clone_signature(is_finite)
+
+
+def is_null(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /,
+    *,
+    nan_is_null: bool = False,
+    options: NullOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+def is_valid(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression | ArrayLike,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+true_unless_null = _clone_signature(is_valid)
+
+# ========================= 2.20 Selecting / multiplexing =========================
+
+
+def case_when(
+    cond: lib.StructScalar
+    | lib.StructArray
+    | lib.ChunkedArray[lib.StructScalar]
+    | Expression,
+    /,
+    *cases: _ScalarOrArrayT | ArrayLike, memory_pool: lib.MemoryPool | None = None
+) -> _ScalarOrArrayT | lib.Array | Expression: ...
+
+
+def choose(
+    indices: ArrayLike | ScalarLike,
+    /,
+    *values: ArrayLike | ScalarLike,
+    memory_pool: lib.MemoryPool | None = None,
+) -> ArrayLike | ScalarLike: ...
+
+
+def coalesce(
+    *values: _ScalarOrArrayT | Expression, memory_pool: lib.MemoryPool | None = None
+) -> _ScalarOrArrayT | Expression: ...
+
+
+def fill_null(
+    values: _ScalarOrArrayT | ScalarLike, fill_value: ArrayLike | ScalarLike
+) -> _ScalarOrArrayT | ScalarLike: ...
+
+
+def if_else(
+    cond: ArrayLike | ScalarLike,
+    left: ArrayLike | ScalarLike,
+    right: ArrayLike | ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> ArrayLike | ScalarLike: ...
+
+
+# ========================= 2.21 Structural transforms =========================
+
+def list_value_length(
+    lists: _ListArray[Any] | _LargeListArray[Any] | ListArray[Any] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array | lib.Int64Array | Expression: ...
+
+
+def make_struct(
+    *args: lib.Scalar | lib.Array | lib.ChunkedArray | Expression | ArrayLike,
+    field_names: list[str] | tuple[str, ...] = (),
+    field_nullability: bool | None = None,
+    field_metadata: list[lib.KeyValueMetadata] | None = None,
+    options: MakeStructOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar | lib.StructArray | Expression: ...
+
+
+# ========================= 2.22 Conversions =========================
+def ceil_temporal(
+    timestamps: _TemporalScalarT | _TemporalArrayT | Expression,
+    /,
+    multiple: int = 1,
+    unit: Literal[
+        "year",
+        "quarter",
+        "month",
+        "week",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "millisecond",
+        "microsecond",
+        "nanosecond",
+    ] = "day",
+    *,
+    week_starts_monday: bool = True,
+    ceil_is_strictly_greater: bool = False,
+    calendar_based_origin: bool = False,
+    options: RoundTemporalOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _TemporalScalarT | _TemporalArrayT | Expression: ...
+
+
+floor_temporal = _clone_signature(ceil_temporal)
+round_temporal = _clone_signature(ceil_temporal)
+
+
+def cast(
+    arr: lib.Scalar | lib.Array | lib.ChunkedArray | lib.Table,
+    target_type: _DataTypeT | str | None = None,
+    safe: bool | None = None,
+    options: CastOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> (
+    lib.Scalar[_DataTypeT] | lib.Scalar[Any] | lib.Array[lib.Scalar[_DataTypeT]]
+    | lib.Array[lib.Scalar[Any]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]]
+    | lib.ChunkedArray[lib.Scalar[Any]] | lib.Table
+): ...
+
+
+def strftime(
+    timestamps: TemporalScalar | TemporalArray | Expression,
+    /,
+    format: str = "%Y-%m-%dT%H:%M:%S",
+    locale: str = "C",
+    *,
+    options: StrftimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StringScalar | lib.StringArray | Expression: ...
+
+
+def strptime(
+    strings: StringScalar | StringArray | Expression,
+    /,
+    format: str,
+    unit: TimeUnit,
+    error_is_null: bool = False,
+    *,
+    options: StrptimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampScalar | lib.TimestampArray | Expression: ...
+
+
+# ========================= 2.23 Temporal component extraction =========================
+def day(
+    values: TemporalScalar | TemporalArray | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None) -> (
+        lib.Int64Scalar | lib.Int64Array | Expression
+): ...
+
+
+def day_of_week(
+    values: TemporalScalar | TemporalArray | Expression,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
+
+day_of_year = _clone_signature(day)
+
+
+def hour(
+    values: lib.TimestampScalar[Any] | lib.Time32Scalar[Any] | lib.Time64Scalar[Any]
+    | lib.TimestampArray[Any] | lib.Time32Array[Any] | lib.Time64Array[Any]
+    | lib.ChunkedArray[lib.TimestampScalar[Any]]
+    | lib.ChunkedArray[lib.Time32Scalar[Any]]
+    | lib.ChunkedArray[lib.Time64Scalar[Any]] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
+
+def is_dst(
+    values: lib.TimestampScalar | lib.TimestampArray[Any]
+    | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+def iso_week(
+    values: lib.TimestampScalar | lib.TimestampArray[Any]
+    | lib.ChunkedArray[lib.TimestampScalar[Any]] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
+
+iso_year = _clone_signature(iso_week)
+
+
+def is_leap_year(
+    values: lib.TimestampScalar[Any] | lib.Date32Scalar | lib.Date64Scalar
+    | lib.TimestampArray
+    | lib.Date32Array
+    | lib.Date64Array
+    | lib.ChunkedArray[lib.TimestampScalar]
+    | lib.ChunkedArray[lib.Date32Scalar]
+    | lib.ChunkedArray[lib.Date64Scalar] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
+
+microsecond = _clone_signature(iso_week)
+millisecond = _clone_signature(iso_week)
+minute = _clone_signature(iso_week)
+month = _clone_signature(day_of_week)
+nanosecond = _clone_signature(hour)
+quarter = _clone_signature(day_of_week)
+second = _clone_signature(hour)
+subsecond = _clone_signature(hour)
+us_week = _clone_signature(iso_week)
+us_year = _clone_signature(iso_week)
+year = _clone_signature(iso_week)
+
+
+def week(
+    values: lib.TimestampScalar | lib.TimestampArray
+    | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /,
+    *,
+    week_starts_monday: bool = True,
+    count_from_zero: bool = False,
+    first_week_is_fully_in_year: bool = False,
+    options: WeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
+
+def year_month_day(
+    values: TemporalScalar | TemporalArray | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> lib.StructScalar | lib.StructArray | Expression: ...
+
+
+iso_calendar = _clone_signature(year_month_day)
+
+
+# ========================= 2.24 Temporal difference =========================
+def day_time_interval_between(start, end, /, *,
+                              memory_pool: lib.MemoryPool | None = None): ...
+
+
+def days_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array: ...
+
+
+hours_between = _clone_signature(days_between)
+microseconds_between = _clone_signature(days_between)
+milliseconds_between = _clone_signature(days_between)
+minutes_between = _clone_signature(days_between)
+
+
+def month_day_nano_interval_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.MonthDayNanoIntervalScalar | lib.MonthDayNanoIntervalArray: ...
+
+
+def month_interval_between(start, end, /, *,
+                           memory_pool: lib.MemoryPool | None = None): ...
+
+
+nanoseconds_between = _clone_signature(days_between)
+quarters_between = _clone_signature(days_between)
+seconds_between = _clone_signature(days_between)
+
+
+def weeks_between(
+    start,
+    end,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array: ...
+
+
+years_between = _clone_signature(days_between)
+
+# ========================= 2.25 Timezone handling =========================
+
+
+def assume_timezone(
+    timestamps: lib.TimestampScalar | lib.Scalar[lib.TimestampType] | lib.TimestampArray
+    | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /,
+    timezone: str | None = None,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: AssumeTimezoneOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> (
+    lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar]
+    | Expression
+): ...
+
+
+def local_timestamp(
+    timestamps: lib.TimestampScalar | lib.TimestampArray
+    | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.TimestampScalar | lib.TimestampArray | Expression: ...
+
+
+# ========================= 2.26 Random number generation =========================
+def random(
+    n: int,
+    *,
+    initializer: Hashable = "system",
+    options: RandomOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+
+
+# ========================= 3. Array-wise (“vector”) functions =========================
+
+# ========================= 3.1 Cumulative Functions =========================
+def cumulative_sum(
+    values: _NumericArrayT | ArrayLike | Expression,
+    /,
+    start: int | float | lib.Scalar | None = None,
+    *,
+    skip_nulls: bool = False,
+    options: CumulativeSumOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT | Expression | lib.Array: ...
+
+
+cumulative_sum_checked = _clone_signature(cumulative_sum)
+cumulative_prod = _clone_signature(cumulative_sum)
+cumulative_prod_checked = _clone_signature(cumulative_sum)
+cumulative_max = _clone_signature(cumulative_sum)
+cumulative_min = _clone_signature(cumulative_sum)
+cumulative_mean = _clone_signature(cumulative_sum)
+# ========================= 3.2 Associative transforms =========================
+
+
+def dictionary_encode(
+    array: _ScalarOrArrayT | Expression,
+    /,
+    null_encoding: Literal["mask", "encode"] = "mask",
+    *,
+    options=None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarOrArrayT | Expression: ...
+
+
+def dictionary_decode(
+    array: _ScalarOrArrayT | Expression,
+    /,
+    *,
+    options=None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarOrArrayT | Expression: ...
+
+
+def unique(array: _ArrayT | Expression, /, *, memory_pool: lib.MemoryPool |
+           None = None) -> _ArrayT | Expression: ...
+
+
+def value_counts(
+    array: lib.Array | lib.ChunkedArray | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> lib.StructArray | Expression: ...
+
+# ========================= 3.3 Selections =========================
+
+
+def array_filter(
+    array: _ArrayT | Expression,
+    selection_filter: list[bool] | list[bool | None] | BooleanArray,
+    /,
+    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    *,
+    options: FilterOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT | Expression: ...
+
+
+def drop_null(input: _ArrayT | Expression, /, *, memory_pool: lib.MemoryPool |
+              None = None) -> _ArrayT | Expression: ...
+
+
+filter = array_filter
+take = array_take
+
+# ========================= 3.4 Containment tests  =========================
+
+
+def indices_nonzero(
+    values: lib.BooleanArray
+    | lib.NullArray
+    | NumericArray
+    | lib.Decimal128Array
+    | lib.Decimal256Array | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+# ========================= 3.5 Sorts and partitions  =========================
+def array_sort_indices(
+    array: lib.Array | lib.ChunkedArray | Expression,
+    /,
+    order: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: ArraySortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+def partition_nth_indices(
+    array: lib.Array | lib.ChunkedArray | Expression | Iterable,
+    /,
+    pivot: int,
+    *,
+    null_placement: _Placement = "at_end",
+    options: PartitionNthOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+def pivot_wider(
+    keys: lib.Array | lib.ChunkedArray | Sequence[str],
+    values: lib.Array | lib.ChunkedArray | Sequence[Any],
+    /,
+    key_names: Sequence[str] | None = None,
+    *,
+    unexpected_key_behavior: Literal["ignore", "raise"] = "ignore",
+    options: PivotWiderOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar: ...
+
+
+def rank(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    sort_keys: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    options: RankOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+
+
+def rank_quantile(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    sort_keys: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: RankQuantileOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+
+
+def rank_normal(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    sort_keys: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: RankQuantileOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+
+
+def select_k_unstable(
+    input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table | Expression,
+    /,
+    k: int | None = None,
+    sort_keys: Sequence[tuple[str | Expression, str]] | None = None,
+    *,
+    options: SelectKOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+def sort_indices(
+    input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table | Expression,
+    /,
+    sort_keys: Sequence[tuple[str | Expression, _Order]] | None = None,
+    *,
+    null_placement: _Placement = "at_end",
+    options: SortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression: ...
+
+
+# ========================= 3.6 Structural transforms =========================
+def list_element(
+    lists: lib.Array[ListScalar[_DataTypeT]] | lib.ChunkedArray[ListScalar[_DataTypeT]]
+    | ListScalar[_DataTypeT] | Expression,
+    index: ScalarLike, /, *, memory_pool: lib.MemoryPool | None = None
+) -> (lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]]
+      | _DataTypeT | Expression): ...
+
+
+def list_flatten(
+    lists: ArrayOrChunkedArray[ListScalar[Any]] | Expression,
+    /,
+    recursive: bool = False,
+    *,
+    options: ListFlattenOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any] | Expression: ...
+
+
+def list_parent_indices(
+    lists: ArrayOrChunkedArray[Any] | Expression, /, *,
+    memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Array | Expression: ...
+
+
+def list_slice(
+    lists: ArrayOrChunkedArray[Any] | Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    return_fixed_size_list: bool | None = None,
+    *,
+    options: ListSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any] | Expression: ...
+
+
+def map_lookup(
+    container,
+    /,
+    query_key,
+    occurrence: str,
+    *,
+    options: MapLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+): ...
+
+
+def struct_field(
+    values,
+    /,
+    indices,
+    *,
+    options: StructFieldOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+): ...
+
+
+def fill_null_backward(
+    values: _ScalarOrArrayT | ScalarLike | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarOrArrayT | ScalarLike | Expression: ...
+
+
+def fill_null_forward(
+    values: _ScalarOrArrayT | ScalarLike | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarOrArrayT | ScalarLike | Expression: ...
+
+
+def replace_with_mask(
+    values: _ScalarOrArrayT | Expression,
+    mask: list[bool] | list[bool | None] | BooleanArray,
+    replacements,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarOrArrayT | Expression: ...
+
+
+# ========================= 3.7 Pairwise functions =========================
+def pairwise_diff(
+    input: _NumericOrTemporalArrayT | Expression,
+    /,
+    period: int = 1,
+    *,
+    options: PairwiseOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT | Expression: ...
+
+
+def run_end_encode(
+    input: _NumericOrTemporalArrayT | Expression,
+    /,
+    *,
+    run_end_type: _RunEndType | None = None,
+    options: RunEndEncodeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrTemporalArrayT | Expression: ...
+
+
+def run_end_decode(
+    input: _NumericOrTemporalArrayT | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrTemporalArrayT | Expression: ...
+
+
+pairwise_diff_checked = _clone_signature(pairwise_diff)
diff --git a/python/pyarrow-stubs/pyarrow/config.pyi b/python/pyarrow-stubs/pyarrow/config.pyi
new file mode 100644
index 00000000000..069b70e553a
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/config.pyi
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import NamedTuple
+
+
+class VersionInfo(NamedTuple):
+    major: int
+    minor: int
+    patch: int
+
+
+class CppBuildInfo(NamedTuple):
+    version: str
+    version_info: VersionInfo
+    so_version: str
+    full_so_version: str
+    compiler_id: str
+    compiler_version: str
+    compiler_flags: str
+    git_id: str
+    git_description: str
+    package_kind: str
+    build_type: str
+
+
+class BuildInfo(NamedTuple):
+    build_type: str
+    cpp_build_info: CppBuildInfo
+
+
+class RuntimeInfo(NamedTuple):
+    simd_level: str
+    detected_simd_level: str
+
+
+build_info: BuildInfo
+cpp_build_info: CppBuildInfo
+cpp_version: str
+cpp_version_info: VersionInfo
+
+
+def runtime_info() -> RuntimeInfo: ...
+def set_timezone_db_path(path: str) -> None: ...
+
+
+__all__ = [
+    "VersionInfo",
+    "BuildInfo",
+    "CppBuildInfo",
+    "RuntimeInfo",
+    "build_info",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "set_timezone_db_path",
+]
diff --git a/python/pyarrow-stubs/pyarrow/csv.pyi b/python/pyarrow-stubs/pyarrow/csv.pyi
new file mode 100644
index 00000000000..a7abd413aab
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/csv.pyi
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._csv import (
+    ISO8601,
+    ConvertOptions,
+    CSVStreamingReader,
+    CSVWriter,
+    InvalidRow,
+    ParseOptions,
+    ReadOptions,
+    WriteOptions,
+    open_csv,
+    read_csv,
+    write_csv,
+)
+
+__all__ = [
+    "ISO8601",
+    "ConvertOptions",
+    "CSVStreamingReader",
+    "CSVWriter",
+    "InvalidRow",
+    "ParseOptions",
+    "ReadOptions",
+    "WriteOptions",
+    "open_csv",
+    "read_csv",
+    "write_csv",
+]
diff --git a/python/pyarrow-stubs/pyarrow/cuda.pyi b/python/pyarrow-stubs/pyarrow/cuda.pyi
new file mode 100644
index 00000000000..0394965bb73
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/cuda.pyi
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._cuda import (
+    BufferReader,
+    BufferWriter,
+    Context,
+    CudaBuffer,
+    HostBuffer,
+    IpcMemHandle,
+    new_host_buffer,
+    read_message,
+    read_record_batch,
+    serialize_record_batch,
+)
+
+__all__ = [
+    "BufferReader",
+    "BufferWriter",
+    "Context",
+    "CudaBuffer",
+    "HostBuffer",
+    "IpcMemHandle",
+    "new_host_buffer",
+    "read_message",
+    "read_record_batch",
+    "serialize_record_batch",
+]
diff --git a/python/pyarrow-stubs/pyarrow/dataset.pyi b/python/pyarrow-stubs/pyarrow/dataset.pyi
new file mode 100644
index 00000000000..66d86b14a25
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/dataset.pyi
@@ -0,0 +1,199 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Callable, Iterable, Sequence
+from typing import Literal, TypeAlias, Any
+
+from _typeshed import StrPath
+from pyarrow._dataset import (
+    CsvFileFormat,
+    CsvFragmentScanOptions,
+    Dataset,
+    DatasetFactory,
+    DirectoryPartitioning,
+    FeatherFileFormat,
+    FileFormat,
+    FileFragment,
+    FilenamePartitioning,
+    FileSystemDataset,
+    FileSystemDatasetFactory,
+    FileSystemFactoryOptions,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    HivePartitioning,
+    InMemoryDataset,
+    IpcFileFormat,
+    IpcFileWriteOptions,
+    JsonFileFormat,
+    JsonFragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+    Scanner,
+    TaggedRecordBatch,
+    UnionDataset,
+    UnionDatasetFactory,
+    WrittenFile,
+    get_partition_keys,
+)
+from pyarrow._dataset_orc import OrcFileFormat
+from pyarrow._dataset_parquet import (
+    ParquetDatasetFactory,
+    ParquetFactoryOptions,
+    ParquetFileFormat,
+    ParquetFileFragment,
+    ParquetFileWriteOptions,
+    ParquetFragmentScanOptions,
+    ParquetReadOptions,
+    RowGroupInfo,
+)
+from pyarrow._dataset_parquet_encryption import (
+    ParquetDecryptionConfig,
+    ParquetEncryptionConfig,
+)
+from pyarrow.compute import Expression, field, scalar
+from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
+
+from ._fs import SupportedFileSystem
+
+_orc_available: bool
+_parquet_available: bool
+
+__all__ = [
+    "CsvFileFormat",
+    "CsvFragmentScanOptions",
+    "Dataset",
+    "DatasetFactory",
+    "DirectoryPartitioning",
+    "FeatherFileFormat",
+    "FileFormat",
+    "FileFragment",
+    "FilenamePartitioning",
+    "FileSystemDataset",
+    "FileSystemDatasetFactory",
+    "FileSystemFactoryOptions",
+    "FileWriteOptions",
+    "Fragment",
+    "FragmentScanOptions",
+    "HivePartitioning",
+    "InMemoryDataset",
+    "IpcFileFormat",
+    "IpcFileWriteOptions",
+    "JsonFileFormat",
+    "JsonFragmentScanOptions",
+    "Partitioning",
+    "PartitioningFactory",
+    "Scanner",
+    "TaggedRecordBatch",
+    "UnionDataset",
+    "UnionDatasetFactory",
+    "WrittenFile",
+    "get_partition_keys",
+    # Orc
+    "OrcFileFormat",
+    # Parquet
+    "ParquetDatasetFactory",
+    "ParquetFactoryOptions",
+    "ParquetFileFormat",
+    "ParquetFileFragment",
+    "ParquetFileWriteOptions",
+    "ParquetFragmentScanOptions",
+    "ParquetReadOptions",
+    "RowGroupInfo",
+    # Parquet Encryption
+    "ParquetDecryptionConfig",
+    "ParquetEncryptionConfig",
+    # Compute
+    "Expression",
+    "field",
+    "scalar",
+    # Dataset
+    "partitioning",
+    "parquet_dataset",
+    "write_dataset",
+]
+
+_DatasetFormat: TypeAlias = (
+    Literal["parquet", "ipc", "arrow", "feather", "csv", "json", "orc", str]
+)
+
+
+def partitioning(
+    schema: Schema = None,
+    *,
+    field_names: list[str] = None,
+    flavor: Literal["hive"] = None,
+    dictionaries: dict[str, Array] | Literal["infer"] | None = None,
+) -> Partitioning | PartitioningFactory: ...
+
+
+def parquet_dataset(
+    metadata_path: StrPath,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    format: ParquetFileFormat | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | None = None,
+    partition_base_dir: str | None = None,
+) -> FileSystemDataset: ...
+
+
+def dataset(
+    source: StrPath
+    | Sequence[Dataset]
+    | Sequence[StrPath]
+    | Iterable[RecordBatch]
+    | Iterable[Table]
+    | RecordBatchReader
+    | RecordBatch
+    | Table,
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> FileSystemDataset | UnionDataset | InMemoryDataset | Dataset: ...
+
+
+def write_dataset(
+    data: Any | Dataset | Table | RecordBatch | RecordBatchReader | list[Table]
+    | Iterable[RecordBatch] | Scanner,
+    base_dir: StrPath,
+    *,
+    basename_template: str | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    partitioning: Partitioning | PartitioningFactory | list[str] | None = None,
+    partitioning_flavor: str | None = None,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    file_options: FileWriteOptions | None = None,
+    use_threads: bool | None = True,
+    max_partitions: int = 1024,
+    max_open_files: int = 1024,
+    max_rows_per_file: int = 0,
+    min_rows_per_group: int = 0,
+    max_rows_per_group: int = 1024 * 1024,  # noqa: Y011
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior:
+    Literal["error", "overwrite_or_ignore", "delete_matching"] = "error",
+    create_dir: bool = True,
+    preserve_order: bool | None = None,
+): ...
+
+
+def _get_partition_keys(partition_expression: Expression) -> dict[str, Any]: ...
diff --git a/python/pyarrow-stubs/pyarrow/device.pyi b/python/pyarrow-stubs/pyarrow/device.pyi
new file mode 100644
index 00000000000..7787ac44deb
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/device.pyi
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+
+from pyarrow.lib import _Weakrefable
+
+
+class DeviceAllocationType(enum.Enum):
+    CPU = enum.auto()
+    CUDA = enum.auto()
+    CUDA_HOST = enum.auto()
+    OPENCL = enum.auto()
+    VULKAN = enum.auto()
+    METAL = enum.auto()
+    VPI = enum.auto()
+    ROCM = enum.auto()
+    ROCM_HOST = enum.auto()
+    EXT_DEV = enum.auto()
+    CUDA_MANAGED = enum.auto()
+    ONEAPI = enum.auto()
+    WEBGPU = enum.auto()
+    HEXAGON = enum.auto()
+
+
+class Device(_Weakrefable):
+    @property
+    def type_name(self) -> str: ...
+
+    @property
+    def device_id(self) -> int: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+
+class MemoryManager(_Weakrefable):
+    @property
+    def device(self) -> Device: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+
+def default_cpu_memory_manager() -> MemoryManager: ...
+
+
+__all__ = ["DeviceAllocationType", "Device",
+           "MemoryManager", "default_cpu_memory_manager"]
diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi
new file mode 100644
index 00000000000..eac936afcb5
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/error.pyi
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+
+class ArrowException(Exception):
+    ...
+
+
+class ArrowInvalid(ValueError, ArrowException):
+    ...
+
+
+class ArrowMemoryError(MemoryError, ArrowException):
+    ...
+
+
+class ArrowKeyError(KeyError, ArrowException):
+    ...
+
+
+class ArrowTypeError(TypeError, ArrowException):
+    ...
+
+
+class ArrowNotImplementedError(NotImplementedError, ArrowException):
+    ...
+
+
+class ArrowCapacityError(ArrowException):
+    ...
+
+
+class ArrowIndexError(IndexError, ArrowException):
+    ...
+
+
+class ArrowSerializationError(ArrowException):
+    ...
+
+
+class ArrowCancelled(ArrowException):
+    signum: int | None
+    def __init__(self, message: str, signum: int | None = None) -> None: ...
+
+
+ArrowIOError = IOError
+
+
+class StopToken:
+    ...
+
+
+def enable_signal_handlers(enable: bool) -> None: ...
+
+
+have_signal_refcycle: bool
+
+
+class SignalStopHandler:
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, exc_tb) -> None: ...
+    def __dealloc__(self) -> None: ...
+    @property
+    def stop_token(self) -> StopToken: ...
+
+
+__all__ = [
+    "ArrowException",
+    "ArrowInvalid",
+    "ArrowMemoryError",
+    "ArrowKeyError",
+    "ArrowTypeError",
+    "ArrowNotImplementedError",
+    "ArrowCapacityError",
+    "ArrowIndexError",
+    "ArrowSerializationError",
+    "ArrowCancelled",
+    "ArrowIOError",
+    "StopToken",
+    "enable_signal_handlers",
+    "have_signal_refcycle",
+    "SignalStopHandler",
+]
diff --git a/python/pyarrow-stubs/pyarrow/feather.pyi b/python/pyarrow-stubs/pyarrow/feather.pyi
new file mode 100644
index 00000000000..cf9d3402091
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/feather.pyi
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Iterable
+from typing import IO, Literal
+
+import pandas as pd
+
+from pyarrow import lib
+from pyarrow.lib import Table
+from pyarrow._typing import StrPath
+from ._feather import FeatherError
+
+
+class FeatherDataset:
+    path_or_paths: str | list[str]
+    validate_schema: bool
+
+    def __init__(self, path_or_paths: str |
+                 list[str], validate_schema: bool = True) -> None: ...
+
+    def read_table(self, columns: list[str] | None = None) -> Table: ...
+    def validate_schemas(self, piece, table: Table) -> None: ...
+
+    def read_pandas(
+        self, columns: list[str] | None = None, use_threads: bool = True
+    ) -> pd.DataFrame: ...
+
+
+def check_chunked_overflow(name: str, col) -> None: ...
+
+
+def write_feather(
+    df: pd.DataFrame | Table | lib.ChunkedArray,
+    dest: StrPath | IO,
+    compression: Literal["zstd", "lz4", "uncompressed", "snappy"] | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: Literal[1, 2] = 2,
+) -> None: ...
+
+
+def read_feather(
+    source: StrPath | IO | lib.NativeFile,
+    columns: list[str] | None = None,
+    use_threads: bool = True,
+    memory_map: bool = False,
+    **kwargs,
+) -> pd.DataFrame: ...
+
+
+def read_table(
+    source: StrPath | IO | lib.NativeFile,
+    columns: list[str | int] | Iterable[str | int] | None = None,
+    memory_map: bool = False,
+    use_threads: bool = True,
+) -> Table: ...
+
+
+__all__ = [
+    "FeatherError",
+    "FeatherDataset",
+    "check_chunked_overflow",
+    "write_feather",
+    "read_feather",
+    "read_table",
+]
diff --git a/python/pyarrow-stubs/pyarrow/flight.pyi b/python/pyarrow-stubs/pyarrow/flight.pyi
new file mode 100644
index 00000000000..dcc6ee2244b
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/flight.pyi
@@ -0,0 +1,112 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._flight import (
+    Action,
+    ActionType,
+    BasicAuth,
+    CallInfo,
+    CertKeyPair,
+    ClientAuthHandler,
+    ClientMiddleware,
+    ClientMiddlewareFactory,
+    DescriptorType,
+    FlightCallOptions,
+    FlightCancelledError,
+    FlightClient,
+    FlightDataStream,
+    FlightDescriptor,
+    FlightEndpoint,
+    FlightError,
+    FlightInfo,
+    FlightInternalError,
+    FlightMetadataReader,
+    FlightMetadataWriter,
+    FlightMethod,
+    FlightServerBase,
+    FlightServerError,
+    FlightStreamChunk,
+    FlightStreamReader,
+    FlightStreamWriter,
+    FlightTimedOutError,
+    FlightUnauthenticatedError,
+    FlightUnauthorizedError,
+    FlightUnavailableError,
+    FlightWriteSizeExceededError,
+    GeneratorStream,
+    Location,
+    MetadataRecordBatchReader,
+    MetadataRecordBatchWriter,
+    RecordBatchStream,
+    Result,
+    SchemaResult,
+    ServerAuthHandler,
+    ServerCallContext,
+    ServerMiddleware,
+    ServerMiddlewareFactory,
+    Ticket,
+    TracingServerMiddlewareFactory,
+    connect,
+)
+
+__all__ = [
+    "Action",
+    "ActionType",
+    "BasicAuth",
+    "CallInfo",
+    "CertKeyPair",
+    "ClientAuthHandler",
+    "ClientMiddleware",
+    "ClientMiddlewareFactory",
+    "DescriptorType",
+    "FlightCallOptions",
+    "FlightCancelledError",
+    "FlightClient",
+    "FlightDataStream",
+    "FlightDescriptor",
+    "FlightEndpoint",
+    "FlightError",
+    "FlightInfo",
+    "FlightInternalError",
+    "FlightMetadataReader",
+    "FlightMetadataWriter",
+    "FlightMethod",
+    "FlightServerBase",
+    "FlightServerError",
+    "FlightStreamChunk",
+    "FlightStreamReader",
+    "FlightStreamWriter",
+    "FlightTimedOutError",
+    "FlightUnauthenticatedError",
+    "FlightUnauthorizedError",
+    "FlightUnavailableError",
+    "FlightWriteSizeExceededError",
+    "GeneratorStream",
+    "Location",
+    "MetadataRecordBatchReader",
+    "MetadataRecordBatchWriter",
+    "RecordBatchStream",
+    "Result",
+    "SchemaResult",
+    "ServerAuthHandler",
+    "ServerCallContext",
+    "ServerMiddleware",
+    "ServerMiddlewareFactory",
+    "Ticket",
+    "TracingServerMiddlewareFactory",
+    "connect",
+]
diff --git a/python/pyarrow-stubs/pyarrow/fs.pyi b/python/pyarrow-stubs/pyarrow/fs.pyi
new file mode 100644
index 00000000000..77bf9193900
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/fs.pyi
@@ -0,0 +1,112 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._fs import (
+    FileSelector,
+    FileType,
+    FileInfo,
+    FileSystem,
+    LocalFileSystem,
+    SubTreeFileSystem,
+    _MockFileSystem,
+    FileSystemHandler,
+    PyFileSystem,
+    SupportedFileSystem,
+)
+from pyarrow._azurefs import AzureFileSystem
+from pyarrow._hdfs import HadoopFileSystem
+from pyarrow._gcsfs import GcsFileSystem
+from pyarrow._s3fs import (
+    AwsDefaultS3RetryStrategy,
+    AwsStandardS3RetryStrategy,
+    S3FileSystem,
+    S3LogLevel,
+    S3RetryStrategy,
+    ensure_s3_initialized,
+    finalize_s3,
+    ensure_s3_finalized,
+    initialize_s3,
+    resolve_s3_region,
+)
+
+FileStats = FileInfo
+
+
+def copy_files(
+    source: str,
+    destination: str,
+    source_filesystem: SupportedFileSystem | None = None,
+    destination_filesystem: SupportedFileSystem | None = None,
+    *,
+    chunk_size: int = 1024 * 1024,  # noqa: Y011
+    use_threads: bool = True,
+) -> None: ...
+
+
+def _ensure_filesystem(
+    filesystem: FileSystem | str | object,
+    *,
+    use_mmap: bool = False
+) -> FileSystem: ...
+
+
+def _resolve_filesystem_and_path(
+    path: str | object,
+    filesystem: FileSystem | str | object | None = None,
+    *,
+    memory_map: bool = False
+) -> tuple[FileSystem, str]: ...
+
+
+class FSSpecHandler(FileSystemHandler):  # type: ignore[misc]  # All abstract methods implemented via fsspec delegation # noqa: E501
+    fs: SupportedFileSystem
+    def __init__(self, fs: SupportedFileSystem) -> None: ...
+
+
+__all__ = [
+    # _fs
+    "FileSelector",
+    "FileType",
+    "FileInfo",
+    "FileSystem",
+    "LocalFileSystem",
+    "SubTreeFileSystem",
+    "_MockFileSystem",
+    "FileSystemHandler",
+    "PyFileSystem",
+    # _azurefs
+    "AzureFileSystem",
+    # _hdfs
+    "HadoopFileSystem",
+    # _gcsfs
+    "GcsFileSystem",
+    # _s3fs
+    "AwsDefaultS3RetryStrategy",
+    "AwsStandardS3RetryStrategy",
+    "S3FileSystem",
+    "S3LogLevel",
+    "S3RetryStrategy",
+    "ensure_s3_initialized",
+    "finalize_s3",
+    "ensure_s3_finalized",
+    "initialize_s3",
+    "resolve_s3_region",
+    # fs
+    "FileStats",
+    "copy_files",
+    "FSSpecHandler",
+]
diff --git a/python/pyarrow-stubs/pyarrow/gandiva.pyi b/python/pyarrow-stubs/pyarrow/gandiva.pyi
new file mode 100644
index 00000000000..7e129d3ed1d
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/gandiva.pyi
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Iterable
+from typing import Literal
+
+from .lib import Array, DataType, Field, MemoryPool, RecordBatch, Schema, _Weakrefable
+
+
+class Node(_Weakrefable):
+    def return_type(self) -> DataType: ...
+
+
+class Expression(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+
+class Condition(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+
+class SelectionVector(_Weakrefable):
+    def to_array(self) -> Array: ...
+
+
+class Projector(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+
+    def evaluate(
+        self, batch: RecordBatch, selection: SelectionVector | None = None
+    ) -> list[Array]: ...
+
+
+class Filter(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+
+    def evaluate(
+        self, batch: RecordBatch, pool: MemoryPool, dtype: DataType | str = "int32"
+    ) -> SelectionVector: ...
+
+
+class TreeExprBuilder(_Weakrefable):
+    def make_literal(self, value: float | str | bytes |
+                     bool, dtype: DataType | str | None) -> Node: ...
+
+    def make_expression(
+        self, root_node: Node | None, return_field: Field) -> Expression: ...
+
+    def make_function(
+        self, name: str, children: list[Node | None],
+        return_type: DataType) -> Node: ...
+
+    def make_field(self, field: Field | None) -> Node: ...
+
+    def make_if(
+        self, condition: Node, this_node: Node | None,
+        else_node: Node | None, return_type: DataType | None
+    ) -> Node: ...
+    def make_and(self, children: list[Node | None]) -> Node: ...
+    def make_or(self, children: list[Node | None]) -> Node: ...
+    def make_in_expression(self, node: Node | None, values: Iterable,
+                           dtype: DataType) -> Node: ...
+
+    def make_condition(self, condition: Node | None) -> Condition: ...
+
+
+class Configuration(_Weakrefable):
+    def __init__(self, optimize: bool = True, dump_ir: bool = False) -> None: ...
+
+
+def make_projector(
+    schema: Schema,
+    children: list[Expression | None],
+    pool: MemoryPool | None = None,
+    selection_mode: Literal["NONE", "UINT16", "UINT32", "UINT64"] = "NONE",
+    configuration: Configuration | None = None,
+) -> Projector: ...
+
+
+def make_filter(
+    schema: Schema, condition: Condition | None,
+    configuration: Configuration | None = None
+) -> Filter: ...
+
+
+class FunctionSignature(_Weakrefable):
+    def return_type(self) -> DataType: ...
+    def param_types(self) -> list[DataType]: ...
+    def name(self) -> str: ...
+
+
+def get_registered_function_signatures() -> list[FunctionSignature]: ...
diff --git a/python/pyarrow-stubs/pyarrow/interchange/__init__.pyi b/python/pyarrow-stubs/pyarrow/interchange/__init__.pyi
new file mode 100644
index 00000000000..fd5ae83c569
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/interchange/__init__.pyi
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .from_dataframe import from_dataframe as from_dataframe
+
+__all__ = ["from_dataframe"]
diff --git a/python/pyarrow-stubs/pyarrow/interchange/buffer.pyi b/python/pyarrow-stubs/pyarrow/interchange/buffer.pyi
new file mode 100644
index 00000000000..e1d8ae949c9
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/interchange/buffer.pyi
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+
+from pyarrow.lib import Buffer
+
+
+class DlpackDeviceType(enum.IntEnum):
+    CPU = 1
+    CUDA = 2
+    CPU_PINNED = 3
+    OPENCL = 4
+    VULKAN = 7
+    METAL = 8
+    VPI = 9
+    ROCM = 10
+
+
+class _PyArrowBuffer:
+    def __init__(self, x: Buffer, allow_copy: bool = True) -> None: ...
+    @property
+    def bufsize(self) -> int: ...
+    @property
+    def ptr(self) -> int: ...
+    def __dlpack__(self): ...
+    def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: ...
diff --git a/python/pyarrow-stubs/pyarrow/interchange/column.pyi b/python/pyarrow-stubs/pyarrow/interchange/column.pyi
new file mode 100644
index 00000000000..67508ac0689
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/interchange/column.pyi
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import enum
+
+from collections.abc import Iterable
+from typing import Any, TypeAlias, TypedDict
+
+from pyarrow.lib import Array, ChunkedArray
+
+from .buffer import _PyArrowBuffer
+
+
+class DtypeKind(enum.IntEnum):
+    INT = 0
+    UINT = 1
+    FLOAT = 2
+    BOOL = 20
+    STRING = 21  # UTF-8
+    DATETIME = 22
+    CATEGORICAL = 23
+
+
+Dtype: TypeAlias = tuple[DtypeKind, int, str, str]
+
+
+class ColumnNullType(enum.IntEnum):
+    NON_NULLABLE = 0
+    USE_NAN = 1
+    USE_SENTINEL = 2
+    USE_BITMASK = 3
+    USE_BYTEMASK = 4
+
+
+class ColumnBuffers(TypedDict):
+    data: tuple[_PyArrowBuffer, Dtype]
+    validity: tuple[_PyArrowBuffer, Dtype] | None
+    offsets: tuple[_PyArrowBuffer, Dtype] | None
+
+
+class CategoricalDescription(TypedDict):
+    is_ordered: bool
+    is_dictionary: bool
+    categories: _PyArrowColumn | None
+
+
+class Endianness(enum.Enum):
+    LITTLE = "<"
+    BIG = ">"
+    NATIVE = "="
+    NA = "|"
+
+
+class NoBufferPresent(Exception):
+    ...
+
+
+class _PyArrowColumn:
+    _col: Array | ChunkedArray
+
+    def __init__(self, column: Array | ChunkedArray,
+                 allow_copy: bool = True) -> None: ...
+
+    def size(self) -> int: ...
+    @property
+    def offset(self) -> int: ...
+    @property
+    def dtype(self) -> tuple[DtypeKind, int, str, str]: ...
+    @property
+    def describe_categorical(self) -> CategoricalDescription: ...
+    @property
+    def describe_null(self) -> tuple[ColumnNullType, Any]: ...
+    @property
+    def null_count(self) -> int: ...
+    @property
+    def metadata(self) -> dict[str, Any]: ...
+    def num_chunks(self) -> int: ...
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[_PyArrowColumn]: ...
+    def get_buffers(self) -> ColumnBuffers: ...
diff --git a/python/pyarrow-stubs/pyarrow/interchange/dataframe.pyi b/python/pyarrow-stubs/pyarrow/interchange/dataframe.pyi
new file mode 100644
index 00000000000..419b3e2cdb3
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/interchange/dataframe.pyi
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from collections.abc import Iterable, Sequence
+from typing import Any
+
+from pyarrow.interchange.column import _PyArrowColumn
+from pyarrow.lib import RecordBatch, Table
+
+
+class _PyArrowDataFrame:
+    def __init__(
+        self,
+        df: Table | RecordBatch,
+        nan_as_null: bool = False,
+        allow_copy: bool = True) -> None: ...
+
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame: ...
+    @property
+    def metadata(self) -> dict[str, Any]: ...
+    def num_columns(self) -> int: ...
+    def num_rows(self) -> int: ...
+    def num_chunks(self) -> int: ...
+    def column_names(self) -> Iterable[str]: ...
+    def get_column(self, i: int) -> _PyArrowColumn: ...
+    def get_column_by_name(self, name: str) -> _PyArrowColumn: ...
+    def get_columns(self) -> Iterable[_PyArrowColumn]: ...
+    def select_columns(self, indices: Sequence[int]) -> Self: ...
+    def select_columns_by_name(self, names: Sequence[str]) -> Self: ...
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[Self]: ...
diff --git a/python/pyarrow-stubs/pyarrow/interchange/from_dataframe.pyi b/python/pyarrow-stubs/pyarrow/interchange/from_dataframe.pyi
new file mode 100644
index 00000000000..d6ad272dfc6
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/interchange/from_dataframe.pyi
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Protocol, TypeAlias
+
+from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
+
+from .column import (
+    ColumnBuffers,
+    ColumnNullType,
+    Dtype,
+    DtypeKind,
+)
+
+
+class DataFrameObject(Protocol):
+    def __dataframe__(self, nan_as_null: bool = False,
+                      allow_copy: bool = True) -> Any: ...
+
+
+ColumnObject: TypeAlias = Any
+
+
+def from_dataframe(df: DataFrameObject, allow_copy=True) -> Table: ...
+
+
+def _from_dataframe(df: DataFrameObject, allow_copy=True) -> Table: ...
+
+
+def protocol_df_chunk_to_pyarrow(
+    df: DataFrameObject, allow_copy: bool = True) -> RecordBatch: ...
+
+
+def column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array: ...
+
+
+def bool_column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array: ...
+
+
+def categorical_column_to_dictionary(
+    col: ColumnObject, allow_copy: bool = True
+) -> DictionaryArray: ...
+
+
+def parse_datetime_format_str(format_str: str) -> tuple[str, str]: ...
+
+
+def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> DataType: ...
+
+
+def buffers_to_array(
+    buffers: ColumnBuffers,
+    data_type: tuple[DtypeKind, int, str, str],
+    length: int,
+    describe_null: ColumnNullType,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Array: ...
+
+
+def validity_buffer_from_mask(
+    validity_buff: Buffer,
+    validity_dtype: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer: ...
+
+
+def validity_buffer_nan_sentinel(
+    data_pa_buffer: Buffer,
+    data_type: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer: ...
diff --git a/python/pyarrow-stubs/pyarrow/io.pyi b/python/pyarrow-stubs/pyarrow/io.pyi
new file mode 100644
index 00000000000..be6a07d5418
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/io.pyi
@@ -0,0 +1,430 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+from collections.abc import Callable
+from io import IOBase
+
+from _typeshed import StrPath
+
+import numpy as np
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from typing import Any, Literal, SupportsIndex
+import builtins
+
+from pyarrow._stubs_typing import Compression, SupportPyBuffer
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .device import Device, DeviceAllocationType, MemoryManager
+from ._types import KeyValueMetadata
+
+
+def have_libhdfs() -> bool: ...
+
+
+def io_thread_count() -> int: ...
+
+
+def set_io_thread_count(count: int) -> None: ...
+
+
+Mode: TypeAlias = Literal["rb", "wb", "rb+", "ab"]
+
+
+class NativeFile(_Weakrefable):
+    _default_chunk_size: int
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args) -> None: ...
+    @property
+    def mode(self) -> Mode: ...
+
+    def readable(self) -> bool: ...
+    def seekable(self) -> bool: ...
+    def isatty(self) -> bool: ...
+    def fileno(self) -> int: ...
+
+    @property
+    def closed(self) -> bool: ...
+    def close(self) -> None: ...
+    def size(self) -> int: ...
+
+    def metadata(self) -> KeyValueMetadata: ...
+
+    def tell(self) -> int: ...
+
+    def seek(self, position: int, whence: int = 0) -> int: ...
+
+    def flush(self) -> None: ...
+
+    def write(self, data: bytes | SupportPyBuffer) -> int: ...
+
+    def read(self, nbytes: int | None = None) -> bytes: ...
+
+    def get_stream(self, file_offset: int, nbytes: int) -> Self: ...
+
+    def read_at(self, nbytes: int, offset: int) -> bytes: ...
+
+    def read1(self, nbytes: int | None = None) -> bytes: ...
+
+    def readall(self) -> bytes: ...
+    def readinto(self, b: SupportPyBuffer) -> int: ...
+
+    def readline(self, size: int | None = None) -> bytes: ...
+
+    def readlines(self, hint: int | None = None) -> list[bytes]: ...
+
+    def __iter__(self) -> Self: ...
+
+    def __next__(self) -> bytes: ...
+    def read_buffer(self, nbytes: int | None = None) -> Buffer: ...
+
+    def truncate(self, pos: int | None = None) -> int: ...
+
+    def writelines(self, lines: list[bytes]): ...
+
+    def download(self, stream_or_path: StrPath | IOBase,
+                 buffer_size: int | None = None) -> None: ...
+
+    def upload(self, stream: IOBase, buffer_size: int | None) -> None: ...
+
+    def writable(self): ...
+
+# ----------------------------------------------------------------------
+# Python file-like objects
+
+
+class PythonFile(NativeFile):
+    def __init__(self, handle: IOBase,
+                 mode: Literal["r", "w"] | None = None) -> None: ...
+
+
+class MemoryMappedFile(NativeFile):
+    @classmethod
+    def create(cls, path: str, size: float) -> Self: ...
+
+    def _open(self, path: str,
+              mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"): ...
+
+    def resize(self, new_size: int) -> None: ...
+
+
+def memory_map(
+    path: str, mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"
+) -> MemoryMappedFile: ...
+
+
+create_memory_map = MemoryMappedFile.create
+
+
+class OSFile(NativeFile):
+    name: str
+
+    def __init__(
+        self,
+        path: str,
+        mode: Literal["r", "rb", "w", "wb", "a", "ab"] = "r",
+        memory_pool: MemoryPool | None = None,
+    ) -> None: ...
+
+
+class FixedSizeBufferWriter(NativeFile):
+    def __init__(self, buffer: Buffer) -> None: ...
+    def set_memcopy_threads(self, num_threads: int) -> None: ...
+
+    def set_memcopy_blocksize(self, blocksize: int) -> None: ...
+
+    def set_memcopy_threshold(self, threshold: int) -> None: ...
+
+
+# ----------------------------------------------------------------------
+# Arrow buffers
+
+class Buffer(_Weakrefable):
+    def __len__(self) -> int: ...
+
+    def _assert_cpu(self) -> None: ...
+    @property
+    def size(self) -> int: ...
+
+    @property
+    def address(self) -> int: ...
+
+    def hex(self) -> bytes: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    @property
+    def device(self) -> Device: ...
+
+    @property
+    def memory_manager(self) -> MemoryManager: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+    @property
+    def parent(self) -> Buffer | None: ...
+
+    def __getitem__(self, key: int | builtins.slice) -> int | Self: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    def __buffer__(self, flags: int) -> memoryview: ...
+
+    def __reduce_ex__(self, protocol: SupportsIndex) -> str | tuple[Any, ...]: ...
+    def to_pybytes(self) -> bytes: ...
+
+
+class ResizableBuffer(Buffer):
+    def resize(self, new_size: int, shrink_to_fit: bool = False) -> None: ...
+
+
+def allocate_buffer(
+    size: int,
+    memory_pool: MemoryPool | None = None,
+    resizable: Literal[False] | Literal[True] | None = None  # noqa: Y030
+) -> Buffer | ResizableBuffer: ...
+
+
+# ----------------------------------------------------------------------
+# Arrow Stream
+class BufferOutputStream(NativeFile):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def getvalue(self) -> Buffer: ...
+
+
+class MockOutputStream(NativeFile):
+    ...
+
+
+class BufferReader(NativeFile):
+    def __init__(self, obj) -> None: ...
+
+
+class CompressedInputStream(NativeFile):
+    def __init__(
+        self,
+        stream: StrPath | NativeFile | IOBase,
+        compression: str | None,
+    ) -> None: ...
+
+
+class CompressedOutputStream(NativeFile):
+    def __init__(
+        self,
+        stream: StrPath | NativeFile | IOBase,
+        compression: str,
+    ) -> None: ...
+
+
+class BufferedInputStream(NativeFile):
+    def __init__(self, stream: NativeFile, buffer_size: int,
+                 memory_pool: MemoryPool | None = None) -> None: ...
+
+    def detach(self) -> NativeFile: ...
+
+
+class BufferedOutputStream(NativeFile):
+    def __init__(self, stream: NativeFile, buffer_size: int,
+                 memory_pool: MemoryPool | None = None) -> None: ...
+
+    def detach(self) -> NativeFile: ...
+
+
+class TransformInputStream(NativeFile):
+    def __init__(self, stream: NativeFile,
+                 transform_func: Callable[[Buffer], Any]) -> None: ...
+
+
+class Transcoder:
+    def __init__(self, decoder, encoder) -> None: ...
+    def __call__(self, buf: Buffer): ...
+
+
+def transcoding_input_stream(
+    stream: NativeFile, src_encoding: str, dest_encoding: str
+) -> TransformInputStream: ...
+
+
+def py_buffer(obj: SupportPyBuffer | np.ndarray) -> Buffer: ...
+
+
+def foreign_buffer(address: int, size: int, base: Any | None = None) -> Buffer: ...
+
+
+def as_buffer(o: Buffer | SupportPyBuffer) -> Buffer: ...
+
+# ---------------------------------------------------------------------
+
+
+class CacheOptions(_Weakrefable):
+    hole_size_limit: int
+    range_size_limit: int
+    lazy: bool
+    prefetch_limit: int
+
+    def __init__(
+        self,
+        *,
+        hole_size_limit: int | None = None,
+        range_size_limit: int | None = None,
+        lazy: bool = True,
+        prefetch_limit: int = 0,
+    ) -> None: ...
+
+    @classmethod
+    def from_network_metrics(
+        cls,
+        time_to_first_byte_millis: int,
+        transfer_bandwidth_mib_per_sec: int,
+        ideal_bandwidth_utilization_frac: float = 0.9,
+        max_ideal_request_size_mib: int = 64,
+    ) -> Self: ...
+
+
+class Codec(_Weakrefable):
+    def __init__(self, compression: Compression | str | None,
+                 compression_level: int | None = None) -> None: ...
+
+    @classmethod
+    def detect(cls, path: StrPath) -> Self: ...
+
+    @staticmethod
+    def is_available(compression: Compression | str) -> bool: ...
+
+    @staticmethod
+    def supports_compression_level(compression: Compression) -> int: ...
+
+    @staticmethod
+    def default_compression_level(compression: Compression) -> int: ...
+
+    @staticmethod
+    def minimum_compression_level(compression: Compression) -> int: ...
+
+    @staticmethod
+    def maximum_compression_level(compression: Compression) -> int: ...
+
+    @property
+    def name(self) -> Compression: ...
+
+    @property
+    def compression_level(self) -> int: ...
+
+    def compress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        *,
+        asbytes: Literal[False] | Literal[True] | None = None,  # noqa: Y030
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer | bytes: ...
+
+    def decompress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        decompressed_size: int | None = None,
+        *,
+        asbytes: Literal[False] | Literal[True] | None = None,  # noqa: Y030
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer | bytes: ...
+
+
+def compress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[False] | Literal[True] | None = None,  # noqa: Y030
+    memory_pool: MemoryPool | None = None,
+) -> Buffer | bytes: ...
+
+
+def decompress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    decompressed_size: int | None = None,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[False] | Literal[True] | None = None,  # noqa: Y030
+    memory_pool: MemoryPool | None = None,
+) -> Buffer | bytes: ...
+
+
+def input_stream(
+    source: StrPath | Buffer | NativeFile | IOBase | SupportPyBuffer,
+    compression:
+    Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] | None = "detect",
+    buffer_size: int | str | None = None,
+) -> BufferReader: ...
+
+
+def output_stream(
+    source: StrPath | Buffer | NativeFile | IOBase | SupportPyBuffer,
+    compression:
+    Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] | None = "detect",
+    buffer_size: int | None = None,
+) -> NativeFile: ...
+
+
+__all__ = [
+    "have_libhdfs",
+    "io_thread_count",
+    "set_io_thread_count",
+    "NativeFile",
+    "PythonFile",
+    "MemoryMappedFile",
+    "memory_map",
+    "create_memory_map",
+    "OSFile",
+    "FixedSizeBufferWriter",
+    "Buffer",
+    "ResizableBuffer",
+    "allocate_buffer",
+    "BufferOutputStream",
+    "MockOutputStream",
+    "BufferReader",
+    "CompressedInputStream",
+    "CompressedOutputStream",
+    "BufferedInputStream",
+    "BufferedOutputStream",
+    "TransformInputStream",
+    "Transcoder",
+    "transcoding_input_stream",
+    "py_buffer",
+    "foreign_buffer",
+    "as_buffer",
+    "CacheOptions",
+    "Codec",
+    "compress",
+    "decompress",
+    "input_stream",
+    "output_stream",
+]
diff --git a/python/pyarrow-stubs/pyarrow/ipc.pyi b/python/pyarrow-stubs/pyarrow/ipc.pyi
new file mode 100644
index 00000000000..d153ab0f46a
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/ipc.pyi
@@ -0,0 +1,162 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from io import IOBase
+from typing import Any
+
+from _typeshed import StrPath
+import pandas as pd
+import pyarrow.lib as lib
+
+from pyarrow.lib import (
+    Alignment,
+    IpcReadOptions,
+    IpcWriteOptions,
+    Message,
+    MessageReader,
+    MetadataVersion,
+    ReadStats,
+    RecordBatchReader,
+    WriteStats,
+    _ReadPandasMixin,
+    get_record_batch_size,
+    get_tensor_size,
+    read_message,
+    read_record_batch,
+    read_schema,
+    read_tensor,
+    write_tensor,
+)
+
+
+class RecordBatchStreamReader(lib._RecordBatchStreamReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        *,
+        options: IpcReadOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+
+class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+
+class RecordBatchFileReader(lib._RecordBatchFileReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        footer_offset: int | None = None,
+        *,
+        options: IpcReadOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+
+class RecordBatchFileWriter(lib._RecordBatchFileWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+
+def new_stream(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchStreamWriter: ...
+
+
+def open_stream(
+    source: bytes | int | lib.Buffer | lib.NativeFile | IOBase,
+    *,
+    options: Any = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchStreamReader: ...
+
+
+def new_file(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+    metadata: lib.KeyValueMetadata | dict[bytes, bytes] | None = None,
+) -> RecordBatchFileWriter: ...
+
+
+def open_file(
+    source: StrPath | bytes | lib.Buffer | lib.NativeFile | IOBase,
+    footer_offset: int | None = None,
+    *,
+    options: Any = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchFileReader: ...
+
+
+def serialize_pandas(
+    df: pd.DataFrame, *, nthreads: int | None = None, preserve_index: bool | None = None
+) -> lib.Buffer: ...
+
+
+def deserialize_pandas(
+    buf: lib.Buffer, *, use_threads: bool = True) -> pd.DataFrame: ...
+
+
+__all__ = [
+    "Alignment",
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "ReadStats",
+    "RecordBatchReader",
+    "WriteStats",
+    "_ReadPandasMixin",
+    "get_record_batch_size",
+    "get_tensor_size",
+    "read_message",
+    "read_record_batch",
+    "read_schema",
+    "read_tensor",
+    "write_tensor",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "new_stream",
+    "open_stream",
+    "new_file",
+    "open_file",
+    "serialize_pandas",
+    "deserialize_pandas",
+]
diff --git a/python/pyarrow-stubs/pyarrow/json.pyi b/python/pyarrow-stubs/pyarrow/json.pyi
new file mode 100644
index 00000000000..67768db42e4
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/json.pyi
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
+
+__all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/pyarrow-stubs/pyarrow/lib.pyi b/python/pyarrow-stubs/pyarrow/lib.pyi
new file mode 100644
index 00000000000..6bd9b7857bf
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/lib.pyi
@@ -0,0 +1,133 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Iterator, NamedTuple
+
+from .array import *  # noqa: F401, F403
+from .builder import *  # noqa: F401, F403
+from .compat import *  # noqa: F401, F403
+from .config import *  # noqa: F401, F403
+from .device import *  # noqa: F401, F403
+from .error import *  # noqa: F401, F403
+from .io import *  # noqa: F401, F403
+from ._ipc import *  # noqa: F401, F403
+from .memory import *  # noqa: F401, F403
+from .pandas_shim import *  # noqa: F401, F403
+from .scalar import *  # noqa: F401, F403
+from .table import *  # noqa: F401, F403
+from .tensor import *  # noqa: F401, F403
+from ._types import *  # noqa: F401, F403
+from .memory import MemoryPool
+from .array import Array
+from ._types import DataType
+
+
+class MonthDayNano(tuple):
+    months: int
+    days: int
+    nanoseconds: int
+
+    def __new__(
+        cls,
+        sequence: tuple[int, int, int] | list[int] = ...,
+    ) -> MonthDayNano: ...
+
+
+def cpu_count() -> int: ...
+
+
+def set_cpu_count(count: int) -> None: ...
+
+
+def is_threading_enabled() -> bool: ...
+
+
+def arange(
+    start: int, stop: int, step: int = 1, *, memory_pool: MemoryPool | None = None
+) -> Array: ...
+
+
+def is_boolean_value(obj: object) -> bool: ...
+
+
+def is_integer_value(obj: object) -> bool: ...
+
+
+def is_float_value(obj: object) -> bool: ...
+
+
+def tzinfo_to_string(tz: object) -> str: ...
+
+
+def string_to_tzinfo(tz: str) -> object: ...
+
+
+def _ndarray_to_arrow_type(values: object, type_: object) -> object: ...
+
+
+def _is_primitive(type_id: int) -> bool: ...
+
+
+def ensure_type(ty: object) -> DataType: ...
+
+
+Type_NA: int
+Type_BOOL: int
+Type_UINT8: int
+Type_INT8: int
+Type_UINT16: int
+Type_INT16: int
+Type_UINT32: int
+Type_INT32: int
+Type_UINT64: int
+Type_INT64: int
+Type_HALF_FLOAT: int
+Type_FLOAT: int
+Type_DOUBLE: int
+Type_DECIMAL32: int
+Type_DECIMAL64: int
+Type_DECIMAL128: int
+Type_DECIMAL256: int
+Type_DATE32: int
+Type_DATE64: int
+Type_TIMESTAMP: int
+Type_TIME32: int
+Type_TIME64: int
+Type_DURATION: int
+Type_INTERVAL_MONTHS: int
+Type_INTERVAL_DAY_TIME: int
+Type_INTERVAL_MONTH_DAY_NANO: int
+Type_BINARY: int
+Type_STRING: int
+Type_LARGE_BINARY: int
+Type_LARGE_STRING: int
+Type_FIXED_SIZE_BINARY: int
+Type_BINARY_VIEW: int
+Type_STRING_VIEW: int
+Type_LIST: int
+Type_LARGE_LIST: int
+Type_LIST_VIEW: int
+Type_LARGE_LIST_VIEW: int
+Type_MAP: int
+Type_FIXED_SIZE_LIST: int
+Type_STRUCT: int
+Type_SPARSE_UNION: int
+Type_DENSE_UNION: int
+Type_DICTIONARY: int
+Type_RUN_END_ENCODED: int
+UnionMode_SPARSE: int
+UnionMode_DENSE: int
diff --git a/python/pyarrow-stubs/pyarrow/memory.pyi b/python/pyarrow-stubs/pyarrow/memory.pyi
new file mode 100644
index 00000000000..f80e01ab21c
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/memory.pyi
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.lib import _Weakrefable
+
+
+class MemoryPool(_Weakrefable):
+    def release_unused(self) -> None: ...
+
+    def bytes_allocated(self) -> int: ...
+
+    def total_bytes_allocated(self) -> int: ...
+
+    def max_memory(self) -> int | None: ...
+
+    def num_allocations(self) -> int: ...
+
+    def print_stats(self) -> None: ...
+
+    @property
+    def backend_name(self) -> str: ...
+
+
+class LoggingMemoryPool(MemoryPool):
+    ...
+
+
+class ProxyMemoryPool(MemoryPool):
+    ...
+
+
+def default_memory_pool() -> MemoryPool: ...
+
+
+def proxy_memory_pool(parent: MemoryPool) -> ProxyMemoryPool: ...
+
+
+def logging_memory_pool(parent: MemoryPool) -> LoggingMemoryPool: ...
+
+
+def system_memory_pool() -> MemoryPool: ...
+
+
+def jemalloc_memory_pool() -> MemoryPool: ...
+
+
+def mimalloc_memory_pool() -> MemoryPool: ...
+
+
+def set_memory_pool(pool: MemoryPool) -> None: ...
+
+
+def log_memory_allocations(enable: bool = True) -> None: ...
+
+
+def total_allocated_bytes() -> int: ...
+
+
+def jemalloc_set_decay_ms(decay_ms: int) -> None: ...
+
+
+def supported_memory_backends() -> list[str]: ...
+
+
+__all__ = [
+    "MemoryPool",
+    "LoggingMemoryPool",
+    "ProxyMemoryPool",
+    "default_memory_pool",
+    "proxy_memory_pool",
+    "logging_memory_pool",
+    "system_memory_pool",
+    "jemalloc_memory_pool",
+    "mimalloc_memory_pool",
+    "set_memory_pool",
+    "log_memory_allocations",
+    "total_allocated_bytes",
+    "jemalloc_set_decay_ms",
+    "supported_memory_backends",
+]
diff --git a/python/pyarrow-stubs/pyarrow/orc.pyi b/python/pyarrow-stubs/pyarrow/orc.pyi
new file mode 100644
index 00000000000..f16350d0ffc
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/orc.pyi
@@ -0,0 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from . import _orc
+from ._fs import SupportedFileSystem
+from .lib import KeyValueMetadata, NativeFile, RecordBatch, Schema, Table
+
+
+class ORCFile:
+    reader: _orc.ORCReader
+    def __init__(self, source: StrPath | NativeFile | IO) -> None: ...
+    @property
+    def metadata(self) -> KeyValueMetadata: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def nrows(self) -> int: ...
+
+    @property
+    def nstripes(self) -> int: ...
+
+    @property
+    def file_version(self) -> str: ...
+
+    @property
+    def software_version(self) -> str: ...
+
+    @property
+    def compression(self) -> Literal["UNCOMPRESSED",
+                                     "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+
+    @property
+    def compression_size(self) -> int: ...
+
+    @property
+    def writer(self) -> str: ...
+
+    @property
+    def writer_version(self) -> str: ...
+
+    @property
+    def row_index_stride(self) -> int: ...
+
+    @property
+    def nstripe_statistics(self) -> int: ...
+
+    @property
+    def content_length(self) -> int: ...
+
+    @property
+    def stripe_statistics_length(self) -> int: ...
+
+    @property
+    def file_footer_length(self) -> int: ...
+
+    @property
+    def file_postscript_length(self) -> int: ...
+
+    @property
+    def file_length(self) -> int: ...
+
+    def read_stripe(
+        self, n: int, columns: list[str | int] | None = None
+    ) -> RecordBatch: ...
+
+    def read(self, columns: list[str | int] | None = None) -> Table: ...
+
+
+class ORCWriter:
+    writer: _orc.ORCWriter
+    is_open: bool
+
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        *,
+        file_version: Any = "0.12",
+        batch_size: Any = 1024,
+        stripe_size: Any = 64 * 1024 * 1024,  # noqa: Y011
+        compression: Any = "UNCOMPRESSED",
+        compression_block_size: Any = 65536,
+        compression_strategy: Any = "SPEED",
+        row_index_stride: Any = 10000,
+        padding_tolerance: Any = 0.0,
+        dictionary_key_size_threshold: Any = 0.0,
+        bloom_filter_columns: Any = None,
+        bloom_filter_fpp: Any = 0.05,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    def __getattr__(self, name: str) -> Any: ...
+    def write(self, table: Table) -> None: ...
+
+    def close(self) -> None: ...
+
+
+def read_table(
+    source: StrPath | NativeFile | IO,
+    columns: list[str | int] | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+) -> Table: ...
+
+
+# TODO: should not use Any here?
+def write_table(
+    table: Table,
+    where: StrPath | NativeFile | IO,
+    *,
+    file_version: Any = "0.12",
+    batch_size: Any = 1024,
+    stripe_size: Any = 64 * 1024 * 1024,  # noqa: Y011
+    compression: Any = 'UNCOMPRESSED',
+    compression_block_size: Any = 65536,
+    compression_strategy: Any = 'SPEED',
+    row_index_stride: Any = 10000,
+    padding_tolerance: Any = 0.0,
+    dictionary_key_size_threshold: Any = 0.0,
+    bloom_filter_columns: Any = None,
+    bloom_filter_fpp: Any = 0.05,
+) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/pandas_compat.pyi b/python/pyarrow-stubs/pyarrow/pandas_compat.pyi
new file mode 100644
index 00000000000..4e614c58a3f
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/pandas_compat.pyi
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, TypedDict, TypeVar
+
+import numpy as np
+import pandas as pd
+
+from pandas import DatetimeTZDtype
+
+from .lib import Array, DataType, Schema, Table, _pandas_api
+
+_T = TypeVar("_T")
+
+
+def get_logical_type_map() -> dict[int, str]: ...
+def get_logical_type(arrow_type: DataType) -> str: ...
+def get_numpy_logical_type_map() -> dict[type[np.generic], str]: ...
+def get_logical_type_from_numpy(pandas_collection) -> str: ...
+def get_extension_dtype_info(column) -> tuple[str, dict[str, Any]]: ...
+
+
+class _ColumnMetadata(TypedDict):
+    name: str
+    field_name: str
+    pandas_type: int
+    numpy_type: str
+    metadata: dict | None
+
+
+def get_column_metadata(
+    column: pd.Series | pd.Index, name: str, arrow_type: DataType, field_name: str
+) -> _ColumnMetadata: ...
+
+
+def construct_metadata(
+    columns_to_convert: list[pd.Series],
+    df: pd.DataFrame,
+    column_names: list[str],
+    index_levels: list[pd.Index],
+    index_descriptors: list[dict],
+    preserve_index: bool,
+    types: list[DataType],
+    column_field_names: list[str] = ...,
+) -> dict[bytes, bytes]: ...
+
+
+def dataframe_to_types(
+    df: pd.DataFrame, preserve_index: bool | None, columns: list[str] | None = None
+) -> tuple[list[str], list[DataType], dict[bytes, bytes]]: ...
+
+
+def dataframe_to_arrays(
+    df: pd.DataFrame,
+    schema: Schema,
+    preserve_index: bool | None,
+    nthreads: int = 1,
+    columns: list[str] | None = None,
+    safe: bool = True,
+) -> tuple[Array, Schema, int]: ...
+def get_datetimetz_type(values: _T, dtype, type_) -> tuple[_T, DataType]: ...
+def make_datetimetz(unit: str, tz: str) -> DatetimeTZDtype: ...
+
+
+def table_to_dataframe(
+    options,
+    table: Table,
+    categories=None,
+    ignore_metadata: bool = False,
+    types_mapper=None) -> pd.DataFrame: ...
+
+
+def make_tz_aware(series: pd.Series, tz: str) -> pd.Series: ...
+
+
+__all__ = [
+    "_pandas_api",
+]
diff --git a/python/pyarrow-stubs/pyarrow/pandas_shim.pyi b/python/pyarrow-stubs/pyarrow/pandas_shim.pyi
new file mode 100644
index 00000000000..181d78e7a0c
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/pandas_shim.pyi
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import types as stdlib_types
+from collections.abc import Iterable
+from typing import Any, TypeGuard
+
+from pandas import Categorical, DatetimeTZDtype, Index, Series, DataFrame
+
+from numpy import dtype
+from pandas.core.dtypes.base import ExtensionDtype
+
+
+class _PandasAPIShim:
+    has_sparse: bool
+
+    def series(self, *args, **kwargs) -> Series: ...
+    def data_frame(self, *args, **kwargs) -> DataFrame: ...
+    @property
+    def have_pandas(self) -> bool: ...
+    @property
+    def compat(self) -> stdlib_types.ModuleType: ...
+    @property
+    def pd(self) -> stdlib_types.ModuleType: ...
+    def infer_dtype(self, obj: Iterable) -> str: ...
+    def pandas_dtype(self, dtype: str) -> dtype: ...
+    @property
+    def loose_version(self) -> Any: ...
+    @property
+    def version(self) -> str: ...
+    def is_v1(self) -> bool: ...
+    def is_ge_v21(self) -> bool: ...
+    def is_ge_v23(self) -> bool: ...
+    def is_ge_v3(self) -> bool: ...
+    def uses_string_dtype(self) -> bool: ...
+    @property
+    def categorical_type(self) -> type[Categorical]: ...
+    @property
+    def datetimetz_type(self) -> type[DatetimeTZDtype]: ...
+    @property
+    def extension_dtype(self) -> type[ExtensionDtype]: ...
+
+    def is_array_like(
+        self, obj: Any
+    ) -> TypeGuard[Series | Index | Categorical | ExtensionDtype]: ...
+    def is_categorical(self, obj: Any) -> TypeGuard[Categorical]: ...
+    def is_datetimetz(self, obj: Any) -> TypeGuard[DatetimeTZDtype]: ...
+    def is_extension_array_dtype(self, obj: Any) -> TypeGuard[ExtensionDtype]: ...
+    def is_sparse(self, obj: Any) -> bool: ...
+    def is_data_frame(self, obj: Any) -> TypeGuard[DataFrame]: ...
+    def is_series(self, obj: Any) -> TypeGuard[Series]: ...
+    def is_index(self, obj: Any) -> TypeGuard[Index]: ...
+    def get_values(self, obj: Any) -> bool: ...
+    def get_rangeindex_attribute(self, level, name): ...
+
+
+_pandas_api: _PandasAPIShim
+
+__all__ = ["_PandasAPIShim", "_pandas_api"]
diff --git a/python/pyarrow-stubs/pyarrow/parquet/__init__.pyi b/python/pyarrow-stubs/pyarrow/parquet/__init__.pyi
new file mode 100644
index 00000000000..5329bd6c66a
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/parquet/__init__.pyi
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .core import *  # noqa: F401, F403
diff --git a/python/pyarrow-stubs/pyarrow/parquet/core.pyi b/python/pyarrow-stubs/pyarrow/parquet/core.pyi
new file mode 100644
index 00000000000..83326c717ae
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/parquet/core.pyi
@@ -0,0 +1,372 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+from pathlib import Path
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from collections.abc import Callable, Iterator, Iterable, Sequence
+from typing import IO, Literal
+
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from pyarrow import _parquet
+from pyarrow._compute import Expression
+from pyarrow._fs import FileSystem, SupportedFileSystem
+from pyarrow._parquet import (
+    ColumnChunkMetaData,
+    ColumnSchema,
+    FileDecryptionProperties,
+    FileEncryptionProperties,
+    FileMetaData,
+    ParquetLogicalType,
+    ParquetReader,
+    ParquetSchema,
+    RowGroupMetaData,
+    SortingColumn,
+    Statistics,
+)
+from pyarrow._stubs_typing import FilterTuple, SingleOrList
+from pyarrow.dataset import ParquetFileFragment, Partitioning, PartitioningFactory
+from pyarrow.lib import Buffer, NativeFile, RecordBatch, Schema, Table, ChunkedArray
+from typing_extensions import deprecated
+
+__all__ = (
+    "ColumnChunkMetaData",
+    "ColumnSchema",
+    "FileDecryptionProperties",
+    "FileEncryptionProperties",
+    "FileMetaData",
+    "ParquetDataset",
+    "ParquetFile",
+    "ParquetLogicalType",
+    "ParquetReader",
+    "ParquetSchema",
+    "ParquetWriter",
+    "RowGroupMetaData",
+    "SortingColumn",
+    "Statistics",
+    "read_metadata",
+    "read_pandas",
+    "read_schema",
+    "read_table",
+    "write_metadata",
+    "write_table",
+    "write_to_dataset",
+    "_filters_to_expression",
+    "filters_to_expression",
+)
+
+
+def filters_to_expression(
+    filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
+
+@deprecated("use filters_to_expression")
+def _filters_to_expression(
+    filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
+
+_Compression: TypeAlias = Literal["gzip", "bz2",
+                                  "brotli", "lz4", "zstd", "snappy", "none"]
+
+
+class ParquetFile:
+    reader: ParquetReader
+    common_metadata: FileMetaData
+
+    def __init__(
+        self,
+        source: str | Path | Buffer | NativeFile | IO,
+        *,
+        metadata: FileMetaData | None = None,
+        common_metadata: FileMetaData | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def schema(self) -> ParquetSchema: ...
+    @property
+    def schema_arrow(self) -> Schema: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    def close(self, force: bool = False) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+
+    def read_row_group(
+        self,
+        i: int,
+        columns: Sequence[str | int] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table: ...
+
+    def read_row_groups(
+        self,
+        row_groups: Sequence[int],
+        columns: Iterable[str | int] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table: ...
+
+    def iter_batches(
+        self,
+        batch_size: int = 65536,
+        row_groups: Sequence[int] | None = None,
+        columns: Iterable[str | int] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Iterator[RecordBatch]: ...
+
+    def read(
+        self,
+        columns: Sequence[str | int] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table: ...
+
+    def scan_contents(
+        self, columns: Iterable[str | int] | None = None, batch_size: int = 65536
+    ) -> int: ...
+
+
+class ParquetWriter:
+    flavor: str
+    schema_changed: bool
+    schema: ParquetSchema
+    where: str | Path | IO
+    file_handler: NativeFile | None
+    writer: _parquet.ParquetWriter
+    is_open: bool
+
+    def __init__(
+        self,
+        where: str | Path | IO | NativeFile,
+        schema: Schema,
+        filesystem: SupportedFileSystem | None = None,
+        flavor: str | None = None,
+        version: Literal["1.0", "2.4", "2.6"] = ...,
+        use_dictionary: bool = True,
+        compression: _Compression | dict[str, _Compression] = "snappy",
+        write_statistics: bool | list = True,
+        use_deprecated_int96_timestamps: bool | None = None,
+        compression_level: int | dict | None = None,
+        use_byte_stream_split: bool | list = False,
+        column_encoding: str | dict | None = None,
+        writer_engine_version=None,
+        data_page_version: Literal["1.0", "2.0"] = ...,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileEncryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: Sequence[SortingColumn] | None = None,
+        store_decimal_as_integer: bool = False,
+        max_rows_per_page: int | None = None,
+        **options,
+    ) -> None: ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> Literal[False]: ...
+
+    def write(
+        self, table_or_batch: RecordBatch | Table, row_group_size: int | None = None
+    ) -> None: ...
+    def write_batch(self, batch: RecordBatch,
+                    row_group_size: int | None = None) -> None: ...
+
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
+    def close(self) -> None: ...
+    def add_key_value_metadata(self, key_value_metadata: dict[str, str]) -> None: ...
+
+
+class ParquetDataset:
+    def __init__(
+        self,
+        path_or_paths: SingleOrList[str]
+        | SingleOrList[Path]
+        | SingleOrList[NativeFile]
+        | SingleOrList[IO],
+        filesystem: SupportedFileSystem | None = None,
+        schema: Schema | None = None,
+        *,
+        filters: Expression
+        | FilterTuple
+        | list[FilterTuple]
+        | list[list[FilterTuple]]
+        | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        partitioning: str
+        | list[str]
+        | Partitioning
+        | PartitioningFactory
+        | None = "hive",
+        ignore_prefixes: list[str] | None = None,
+        pre_buffer: bool = True,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def equals(self, other: ParquetDataset) -> bool: ...
+    @property
+    def schema(self) -> Schema: ...
+
+    def read(
+        self,
+        columns: list[str] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table: ...
+    def read_pandas(self, **kwargs) -> Table: ...
+    @property
+    def fragments(self) -> list[ParquetFileFragment]: ...
+    @property
+    def files(self) -> list[str]: ...
+    @property
+    def filesystem(self) -> FileSystem: ...
+    @property
+    def partitioning(self) -> Partitioning: ...
+
+
+def read_table(
+    source: SingleOrList[str]
+    | SingleOrList[Path] | SingleOrList[NativeFile] | SingleOrList[IO] | Buffer,
+    *,
+    columns: list | None = None,
+    use_threads: bool = True,
+    schema: Schema | None = None,
+    use_pandas_metadata: bool = False,
+    read_dictionary: list[str] | None = None,
+    memory_map: bool = False,
+    buffer_size: int = 0,
+    partitioning: str | list[str] | Partitioning | PartitioningFactory | None = "hive",
+    filesystem: SupportedFileSystem | str | None = None,
+    filters: Expression
+    | FilterTuple
+    | list[FilterTuple]
+    | Sequence[Sequence[tuple]]
+    | None = None,
+    ignore_prefixes: list[str] | None = None,
+    pre_buffer: bool = True,
+    coerce_int96_timestamp_unit: str | None = None,
+    decryption_properties: FileDecryptionProperties | None = None,
+    thrift_string_size_limit: int | None = None,
+    thrift_container_size_limit: int | None = None,
+    page_checksum_verification: bool = False,
+) -> Table: ...
+
+
+def read_pandas(
+    source: str | Path | NativeFile | IO | Buffer, columns: list | None = None, **kwargs
+) -> Table: ...
+
+
+def write_table(
+    table: Table,
+    where: str | Path | NativeFile | IO,
+    row_group_size: int | None = None,
+    version: Literal["1.0", "2.4", "2.6"] = "2.6",
+    use_dictionary: bool = True,
+    compression: _Compression | dict[str, _Compression] = "snappy",
+    write_statistics: bool | list = True,
+    use_deprecated_int96_timestamps: bool | None = None,
+    coerce_timestamps: str | None = None,
+    allow_truncated_timestamps: bool = False,
+    data_page_size: int | None = None,
+    flavor: str | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    compression_level: int | dict | None = None,
+    use_byte_stream_split: bool = False,
+    column_encoding: str | dict | None = None,
+    data_page_version: Literal["1.0", "2.0"] = ...,
+    use_compliant_nested_type: bool = True,
+    encryption_properties: FileEncryptionProperties | None = None,
+    write_batch_size: int | None = None,
+    dictionary_pagesize_limit: int | None = None,
+    store_schema: bool = True,
+    write_page_index: bool = False,
+    write_page_checksum: bool = False,
+    sorting_columns: Sequence[SortingColumn] | None = None,
+    store_decimal_as_integer: bool = False,
+    **kwargs,
+) -> None: ...
+
+
+def write_to_dataset(
+    table: Table | ChunkedArray,
+    root_path: str | Path,
+    partition_cols: list[str] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    schema: Schema | None = None,
+    partitioning: Partitioning | list[str] | None = None,
+    basename_template: str | None = None,
+    use_threads: bool | None = None,
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior: Literal["overwrite_or_ignore", "error", "delete_matching"]
+    | None = None,
+    **kwargs,
+) -> None: ...
+
+
+def write_metadata(
+    schema: Schema,
+    where: str | NativeFile,
+    metadata_collector: list[FileMetaData] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    **kwargs,
+) -> None: ...
+
+
+def read_metadata(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+) -> FileMetaData: ...
+
+
+def read_schema(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+) -> Schema: ...
diff --git a/python/pyarrow-stubs/pyarrow/parquet/encryption.pyi b/python/pyarrow-stubs/pyarrow/parquet/encryption.pyi
new file mode 100644
index 00000000000..7add1c6fa53
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/parquet/encryption.pyi
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._parquet_encryption import (
+    CryptoFactory,
+    DecryptionConfiguration,
+    EncryptionConfiguration,
+    FileSystemKeyMaterialStore,
+    KmsClient,
+    KmsConnectionConfig,
+)
+
+__all__ = [
+    "CryptoFactory",
+    "DecryptionConfiguration",
+    "EncryptionConfiguration",
+    "FileSystemKeyMaterialStore",
+    "KmsClient",
+    "KmsConnectionConfig",
+]
diff --git a/python/pyarrow-stubs/pyarrow/scalar.pyi b/python/pyarrow-stubs/pyarrow/scalar.pyi
new file mode 100644
index 00000000000..70b2ea2b347
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/scalar.pyi
@@ -0,0 +1,466 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import collections.abc
+import datetime as dt
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from collections.abc import Iterator
+from typing import Any, Generic, Literal
+
+import numpy as np
+
+from pyarrow._compute import CastOptions
+from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable
+from pyarrow.table import ArrayOrChunkedArray
+from typing_extensions import TypeVar
+
+from ._types import (  # noqa: F401
+    DataType,
+    Decimal128Type,
+    Date32Type,
+    Date64Type,
+    Time32Type,
+    Time64Type,
+    TimestampType,
+    Decimal256Type,
+    NullType,
+    BoolType,
+    UInt8Type,
+    Int8Type,
+    DurationType, MonthDayNanoIntervalType, BinaryType, LargeBinaryType,
+    FixedSizeBinaryType, StringType, LargeStringType, BinaryViewType, StringViewType,
+    FixedSizeListType,
+    Float16Type, Float32Type, Float64Type, Decimal32Type, Decimal64Type,
+    LargeListType,
+    LargeListViewType,
+    ListType,
+    ListViewType,
+    OpaqueType, DictionaryType, MapType, _BasicDataType,
+    StructType, RunEndEncodedType,
+    UInt16Type, Int16Type, UInt32Type, Int32Type, UInt64Type, Int64Type,
+    UnionType, ExtensionType, BaseExtensionType, Bool8Type, UuidType, JsonType,
+    _BasicValueT,
+    _DataTypeT,
+    _IndexT,
+    _K,
+    _Precision,
+    _RunEndType,
+    _Scale,
+    _Size,
+    _Time32Unit,
+    _Time64Unit,
+    _Tz,
+    _Unit,
+    _ValueT,
+)
+
+_AsPyTypeK = TypeVar("_AsPyTypeK")
+_AsPyTypeV = TypeVar("_AsPyTypeV")
+_DataType_co = TypeVar("_DataType_co", bound=DataType, covariant=True)
+
+
+class Scalar(_Weakrefable, Generic[_DataType_co]):
+    @property
+    def type(self) -> _DataType_co: ...
+
+    @property
+    def is_valid(self) -> bool: ...
+
+    def cast(
+        self,
+        target_type: None | _DataTypeT | str,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Self | Scalar[_DataTypeT] | Scalar[Any]: ...
+
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def equals(self, other: Scalar | ArrayOrChunkedArray) -> bool: ...
+
+    def __hash__(self) -> int: ...
+
+    def as_py(self: Scalar[Any], *, maps_as_pydicts: Literal["lossy",
+              "strict"] | None = None) -> Any: ...
+
+    def as_buffer(self) -> Buffer | None: ...
+
+    # Buffer protocol support
+    def __buffer__(self, flags: int) -> memoryview: ...
+
+    # Methods for structured types (StructScalar, MapScalar, ListScalar, etc.)
+    def __len__(self) -> int: ...
+
+    def __iter__(self) -> Iterator[Any]: ...
+
+    def __getitem__(self, key: int | str) -> Any: ...
+
+    def __contains__(self, key: object) -> bool: ...
+
+    def keys(self) -> Iterator[str]: ...
+
+    def items(self) -> Iterator[tuple[str, Any]]: ...
+
+    @property
+    def values(self) -> Any: ...
+
+    # Methods for compatibility with array-like interface
+    def to_pylist(self) -> list: ...
+    def tolist(self) -> list: ...
+    def to_numpy(self, zero_copy_only: bool = True, writable: bool = False) -> Any: ...
+
+
+_NULL: NullScalar
+NA: NullScalar
+
+
+class NullScalar(Scalar[NullType]):
+    ...
+
+
+class BooleanScalar(Scalar[BoolType]):
+    ...
+
+
+class UInt8Scalar(Scalar[UInt8Type]):
+    ...
+
+
+class Int8Scalar(Scalar[Int8Type]):
+    ...
+
+
+class UInt16Scalar(Scalar[UInt16Type]):
+    ...
+
+
+class Int16Scalar(Scalar[Int16Type]):
+    ...
+
+
+class UInt32Scalar(Scalar[UInt32Type]):
+    ...
+
+
+class Int32Scalar(Scalar[Int32Type]):
+    ...
+
+
+class UInt64Scalar(Scalar[UInt64Type]):
+    ...
+
+
+class Int64Scalar(Scalar[Int64Type]):
+    ...
+
+
+class HalfFloatScalar(Scalar[Float16Type]):
+    ...
+
+
+class FloatScalar(Scalar[Float32Type]):
+    ...
+
+
+class DoubleScalar(Scalar[Float64Type]):
+    ...
+
+
+class Decimal32Scalar(Scalar[Decimal32Type[_Precision, _Scale]]):
+    ...
+
+
+class Decimal64Scalar(Scalar[Decimal64Type[_Precision, _Scale]]):
+    ...
+
+
+class Decimal128Scalar(Scalar[Decimal128Type[_Precision, _Scale]]):
+    ...
+
+
+class Decimal256Scalar(Scalar[Decimal256Type[_Precision, _Scale]]):
+    ...
+
+
+class Date32Scalar(Scalar[Date32Type]):
+    ...
+
+
+class Date64Scalar(Scalar[Date64Type]):
+    @property
+    def value(self) -> dt.date | None: ...
+
+
+class Time32Scalar(Scalar[Time32Type[_Time32Unit]]):
+    @property
+    def value(self) -> dt.time | None: ...
+
+
+class Time64Scalar(Scalar[Time64Type[_Time64Unit]]):
+    @property
+    def value(self) -> dt.time | None: ...
+
+
+class TimestampScalar(Scalar[TimestampType[_Unit, _Tz]]):
+    @property
+    def value(self) -> int | None: ...
+
+
+class DurationScalar(Scalar[DurationType[_Unit]]):
+    @property
+    def value(self) -> dt.timedelta | None: ...
+
+
+class MonthDayNanoIntervalScalar(Scalar[MonthDayNanoIntervalType]):
+    @property
+    def value(self) -> MonthDayNano | None: ...
+
+
+class BinaryScalar(Scalar[BinaryType]):
+    def as_buffer(self) -> Buffer: ...
+
+
+class LargeBinaryScalar(Scalar[LargeBinaryType]):
+    def as_buffer(self) -> Buffer: ...
+
+
+class FixedSizeBinaryScalar(Scalar[FixedSizeBinaryType]):
+    def as_buffer(self) -> Buffer: ...
+
+
+class StringScalar(Scalar[StringType]):
+    def as_buffer(self) -> Buffer: ...
+
+
+class LargeStringScalar(Scalar[LargeStringType]):
+    def as_buffer(self) -> Buffer: ...
+
+
+class BinaryViewScalar(Scalar[BinaryViewType]):
+    def as_buffer(self) -> Buffer: ...
+
+
+class StringViewScalar(Scalar[StringViewType]):
+    def as_buffer(self) -> Buffer: ...
+
+
+class ListScalar(Scalar[ListType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int | str) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class FixedSizeListScalar(Scalar[FixedSizeListType[_DataTypeT, _Size]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int | str) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class LargeListScalar(Scalar[LargeListType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int | str) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class ListViewScalar(Scalar[ListViewType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int | str) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class LargeListViewScalar(Scalar[LargeListViewType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int | str) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
+
+class StructScalar(Scalar[StructType], collections.abc.Mapping[str, Scalar]):
+    def __len__(self) -> int: ...
+
+    def __iter__(self) -> Iterator[str]: ...
+
+    def __getitem__(self, key: int | str) -> Scalar[Any]: ...
+
+    def keys(self) -> collections.abc.KeysView[str]:  # type: ignore[override]
+        ...
+
+    def items(self) -> collections.abc.ItemsView[str, Scalar[Any]]:  # type: ignore[override] # noqa: E501
+        ...
+
+    def _as_py_tuple(self) -> list[tuple[str, Any]]: ...
+
+
+class MapScalar(Scalar[MapType[_K, _ValueT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int | str) -> (
+        tuple[Scalar[_K], _ValueT, Any] | Scalar[Any]): ...
+
+    def __iter__(self: Scalar[
+        MapType[_BasicDataType[_AsPyTypeK], _BasicDataType[_AsPyTypeV]]]
+        | Scalar[MapType[Any, _BasicDataType[_AsPyTypeV]]]
+        | Scalar[MapType[_BasicDataType[_AsPyTypeK], Any]]) -> (
+        Iterator[tuple[_AsPyTypeK, _AsPyTypeV]]
+        | Iterator[tuple[Any, _AsPyTypeV]]
+        | Iterator[tuple[_AsPyTypeK, Any]]
+    ): ...
+
+
+class DictionaryScalar(Scalar[DictionaryType[_IndexT, _BasicValueT]]):
+    @property
+    def index(self) -> Scalar[_IndexT]: ...
+
+    @property
+    def value(self) -> Scalar[_BasicValueT]: ...
+
+    @property
+    def dictionary(self) -> Array: ...
+
+
+class RunEndEncodedScalar(Scalar[RunEndEncodedType[_RunEndType, _BasicValueT]]):
+    @property
+    def value(self) -> tuple[int, _BasicValueT] | None: ...
+
+
+class UnionScalar(Scalar[UnionType]):
+    @property
+    def value(self) -> Any | None: ...
+
+    @property
+    def type_code(self) -> str: ...
+
+
+class ExtensionScalar(Scalar[ExtensionType]):
+    @property
+    def value(self) -> Any | None: ...
+
+    @staticmethod
+    def from_storage(typ: BaseExtensionType, value) -> ExtensionScalar: ...
+
+
+class Bool8Scalar(Scalar[Bool8Type]):
+    ...
+
+
+class UuidScalar(Scalar[UuidType]):
+    ...
+
+
+class JsonScalar(Scalar[JsonType]):
+    ...
+
+
+class OpaqueScalar(Scalar[OpaqueType]):
+    ...
+
+
+class FixedShapeTensorScalar(ExtensionScalar):
+    def to_numpy(self, zero_copy_only: bool = True, writable: bool = False) -> (
+            np.ndarray): ...  # type: ignore[override]
+
+    def to_tensor(self) -> Tensor: ...
+
+
+def scalar(
+    value: Any,
+    type: _DataTypeT | str | None = None,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Scalar[_DataTypeT] | Scalar[Any]: ...
+
+
+__all__ = [
+    "Scalar",
+    "_NULL",
+    "NA",
+    "NullScalar",
+    "BooleanScalar",
+    "UInt8Scalar",
+    "Int8Scalar",
+    "UInt16Scalar",
+    "Int16Scalar",
+    "UInt32Scalar",
+    "Int32Scalar",
+    "UInt64Scalar",
+    "Int64Scalar",
+    "HalfFloatScalar",
+    "FloatScalar",
+    "DoubleScalar",
+    "Decimal32Scalar",
+    "Decimal64Scalar",
+    "Decimal128Scalar",
+    "Decimal256Scalar",
+    "Date32Scalar",
+    "Date64Scalar",
+    "Time32Scalar",
+    "Time64Scalar",
+    "TimestampScalar",
+    "DurationScalar",
+    "MonthDayNanoIntervalScalar",
+    "BinaryScalar",
+    "LargeBinaryScalar",
+    "FixedSizeBinaryScalar",
+    "StringScalar",
+    "LargeStringScalar",
+    "BinaryViewScalar",
+    "StringViewScalar",
+    "ListScalar",
+    "FixedSizeListScalar",
+    "LargeListScalar",
+    "ListViewScalar",
+    "LargeListViewScalar",
+    "StructScalar",
+    "MapScalar",
+    "DictionaryScalar",
+    "RunEndEncodedScalar",
+    "UnionScalar",
+    "ExtensionScalar",
+    "FixedShapeTensorScalar",
+    "Bool8Scalar",
+    "UuidScalar",
+    "JsonScalar",
+    "OpaqueScalar",
+    "scalar",
+]
diff --git a/python/pyarrow-stubs/pyarrow/substrait.pyi b/python/pyarrow-stubs/pyarrow/substrait.pyi
new file mode 100644
index 00000000000..b78bbd8aebd
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/substrait.pyi
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow._substrait import (
+    BoundExpressions,
+    SubstraitSchema,
+    deserialize_expressions,
+    deserialize_schema,
+    get_supported_functions,
+    run_query,
+    serialize_expressions,
+    serialize_schema,
+)
+
+__all__ = [
+    "BoundExpressions",
+    "get_supported_functions",
+    "run_query",
+    "deserialize_expressions",
+    "serialize_expressions",
+    "deserialize_schema",
+    "serialize_schema",
+    "SubstraitSchema",
+]
diff --git a/python/pyarrow-stubs/pyarrow/table.pyi b/python/pyarrow-stubs/pyarrow/table.pyi
new file mode 100644
index 00000000000..6dd61674d40
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/table.pyi
@@ -0,0 +1,686 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from collections.abc import (
+    Collection, Generator, Iterable, Iterator, Sequence, Mapping)
+from typing import Any, Generic, Literal, TypeVar
+import builtins
+
+import numpy as np
+import pandas as pd
+
+from numpy.typing import NDArray
+from pyarrow._compute import (
+    CastOptions,
+    CountOptions,
+    FunctionOptions,
+    ScalarAggregateOptions,
+    TDigestOptions,
+    VarianceOptions,
+)
+from pyarrow._stubs_typing import (
+    Indices,
+    Mask,
+    NullEncoding,
+    NullSelectionBehavior,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+    SupportArrowStream,
+)
+from pyarrow.compute import Expression
+from pyarrow.interchange.dataframe import _PyArrowDataFrame
+from pyarrow.lib import Device, MemoryManager, MemoryPool, Schema
+from pyarrow.lib import Field as _Field
+
+from .array import Array, StructArray, _CastAs, _PandasConvertible
+from .device import DeviceAllocationType
+from .io import Buffer
+from ._ipc import RecordBatchReader
+from .scalar import BooleanScalar, Int64Scalar, Scalar, StructScalar
+from .tensor import Tensor
+from ._stubs_typing import NullableCollection
+from ._types import DataType, _AsPyType, _BasicDataType, _DataTypeT
+
+Field: TypeAlias = _Field[DataType]
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+ArrayOrChunkedArray: TypeAlias = Array[_Scalar_co] | ChunkedArray[_Scalar_co]
+
+_Aggregation: TypeAlias = Literal[
+    "all",
+    "any",
+    "approximate_median",
+    "count",
+    "count_all",
+    "count_distinct",
+    "distinct",
+    "first",
+    "first_last",
+    "last",
+    "list",
+    "max",
+    "mean",
+    "min",
+    "min_max",
+    "one",
+    "product",
+    "stddev",
+    "sum",
+    "tdigest",
+    "variance",
+]
+_AggregationPrefixed: TypeAlias = Literal[
+    "hash_all",
+    "hash_any",
+    "hash_approximate_median",
+    "hash_count",
+    "hash_count_all",
+    "hash_count_distinct",
+    "hash_distinct",
+    "hash_first",
+    "hash_first_last",
+    "hash_last",
+    "hash_list",
+    "hash_max",
+    "hash_mean",
+    "hash_min",
+    "hash_min_max",
+    "hash_one",
+    "hash_product",
+    "hash_stddev",
+    "hash_sum",
+    "hash_tdigest",
+    "hash_variance",
+]
+Aggregation: TypeAlias = _Aggregation | _AggregationPrefixed | str
+AggregateOptions: TypeAlias = (ScalarAggregateOptions | CountOptions
+                               | TDigestOptions | VarianceOptions | FunctionOptions)
+
+UnarySelector: TypeAlias = str
+NullarySelector: TypeAlias = tuple[()]
+NarySelector: TypeAlias = list[str] | tuple[str, ...]
+ColumnSelector: TypeAlias = UnarySelector | NullarySelector | NarySelector
+
+
+class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+
+    def as_py(self) -> list[Any]: ...
+
+    @property
+    def data(self) -> Self: ...
+    @property
+    def type(self: ChunkedArray[Scalar[_DataTypeT]]) -> _DataTypeT: ...
+
+    # Private attribute used internally for column names
+    _name: str | None
+
+    def length(self) -> int: ...
+
+    __len__ = length
+
+    def to_string(
+        self,
+        *,
+        indent: int = 0,
+        window: int = 5,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str: ...
+
+    format = to_string
+    def validate(self, *, full: bool = False) -> None: ...
+
+    @property
+    def null_count(self) -> int: ...
+
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+
+    def __getitem__(
+        self, key: int | np.integer | builtins.slice) -> _Scalar_co | Self: ...
+
+    def getitem(self, i: int) -> Scalar: ...
+    def is_null(self, *, nan_is_null: bool = False) -> ChunkedArray[BooleanScalar]: ...
+
+    def is_nan(self) -> ChunkedArray[BooleanScalar]: ...
+
+    def is_valid(self) -> ChunkedArray[BooleanScalar]: ...
+
+    def cast(
+        self, target_type: _CastAs | str | None, safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None
+    ) -> Self | ChunkedArray[Scalar[_CastAs]]: ...
+
+    def fill_null(self, fill_value: Scalar[_DataTypeT] | Any) -> Self: ...
+
+    def equals(self, other: Self | Any) -> bool: ...
+
+    def to_numpy(self, zero_copy_only: bool = False) -> np.ndarray: ...
+
+    def __array__(self, dtype: np.dtype | None = None,
+                  copy: bool | None = None) -> np.ndarray: ...
+
+    def dictionary_encode(self, null_encoding: NullEncoding = "mask") -> Self: ...
+
+    def flatten(self, memory_pool: MemoryPool |
+                None = None) -> list[ChunkedArray[Any]]: ...
+
+    def combine_chunks(self, memory_pool: MemoryPool |
+                       None = None) -> Array[_Scalar_co]: ...
+
+    def unique(self) -> ChunkedArray[_Scalar_co]: ...
+
+    def value_counts(self) -> StructArray: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def filter(self, mask: Mask,
+               null_selection_behavior: NullSelectionBehavior = "drop") -> Self: ...
+
+    def index(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        value: Scalar[_DataTypeT] | _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def drop_null(self) -> Self: ...
+
+    def sort(self, order: Order = "ascending", **kwargs) -> Self: ...
+
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    @property
+    def num_chunks(self) -> int: ...
+
+    def chunk(self, i: int) -> Array[_Scalar_co]: ...
+
+    @property
+    def chunks(self) -> list[Array[_Scalar_co]]: ...
+
+    def iterchunks(
+        self: ArrayOrChunkedArray[_ScalarT],
+    ) -> Generator[Array, None, None]: ...
+
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+
+    def to_pylist(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType | None]: ...
+
+    def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+
+def chunked_array(
+    arrays: Iterable[NullableCollection[Any]]
+    | Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray]
+    | Iterable[Array[_ScalarT]] | Array[_ScalarT]
+    | SupportArrowArray | SupportArrowStream,
+    type: DataType | str | None = None,
+) -> ChunkedArray[Scalar[Any]] | ChunkedArray[_ScalarT]: ...
+
+
+_ColumnT = TypeVar("_ColumnT", bound=ArrayOrChunkedArray[Any])
+
+
+class _Tabular(_PandasConvertible[pd.DataFrame], Generic[_ColumnT]):
+    def __array__(self, dtype: np.dtype | None = None,
+                  copy: bool | None = None) -> np.ndarray: ...
+
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame: ...
+
+    def __getitem__(self, key: int | str | slice) -> _ColumnT | Self: ...
+
+    def __len__(self) -> int: ...
+    def column(self, i: int | str) -> _ColumnT: ...
+
+    @property
+    def column_names(self) -> list[str]: ...
+
+    @property
+    def columns(self) -> list[_ColumnT]: ...
+
+    def drop_null(self) -> Self: ...
+
+    def field(self, i: int | str) -> Field: ...
+
+    @classmethod
+    def from_pydict(
+        cls,
+        mapping:
+        Mapping[Any, ArrayOrChunkedArray[Any] | list[Any] | np.ndarray | range],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_pylist(
+        cls,
+        mapping: Sequence[Mapping[str, Any]],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self: ...
+
+    def itercolumns(self) -> Generator[_ColumnT, None, None]: ...
+
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def shape(self) -> tuple[int, int]: ...
+
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def nbytes(self) -> int: ...
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> Self: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def filter(
+        self,
+        mask: Mask | Expression,
+        null_selection_behavior: NullSelectionBehavior = "drop") -> Self: ...
+
+    def to_pydict(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> dict[str, list[Any]]: ...
+
+    def to_pylist(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> list[dict[str, Any]]: ...
+
+    def to_string(self, *, show_metadata: bool = False,
+                  preview_cols: int = 0) -> str: ...
+
+    def remove_column(self, i: int) -> Self: ...
+    def drop_columns(self, columns: str | list[str]) -> Self: ...
+
+    def add_column(self, i: int, field_: str | Field,
+                   column: ArrayOrChunkedArray[Any] | list[list[Any]]) -> Self: ...
+
+    def append_column(
+        self, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self: ...
+
+
+class RecordBatch(_Tabular[Array]):
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def replace_schema_metadata(
+        self,
+        metadata: dict[str, str]
+        | dict[bytes, bytes]
+        | dict[bytes, str]
+        | dict[str, bytes]
+        | None = None
+    ) -> Self: ...
+
+    @property
+    def num_columns(self) -> int: ...
+
+    @property
+    def num_rows(self) -> int: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
+    ) -> Self: ...
+
+    def remove_column(self, i: int) -> Self: ...
+
+    def set_column(self, i: int, field_: str | Field, column: Array | list) -> Self: ...
+
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self: ...
+
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def equals(self, other: Self | Any, check_metadata: bool = False) -> bool: ...
+
+    def select(self, columns: Iterable[str] |
+               Iterable[int] | NDArray[np.str_]) -> Self: ...
+
+    def cast(self, target_schema: Schema, safe: bool | None = None,
+             options: CastOptions | None = None) -> Self: ...
+
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Iterable[Any],
+        names: list[str] | tuple[str, ...] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[bytes, bytes]
+        | Mapping[str, str]
+        | Mapping[bytes, str]
+        | Mapping[str, bytes]
+        | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: Sequence[str | int] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[StructScalar]
+    ) -> Self: ...
+
+    def to_struct_array(self) -> StructArray: ...
+
+    def to_tensor(
+        self,
+        null_to_nan: bool = False,
+        row_major: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Tensor: ...
+
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0): ...
+
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, schema: Schema) -> Self: ...
+
+    def __arrow_c_array__(self, requested_schema=None): ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, schema: Schema) -> Self: ...
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs): ...
+
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+
+    @property
+    def device_type(self) -> DeviceAllocationType: ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+    def copy_to(self, destination: MemoryManager | Device) -> Self: ...
+
+
+def table_to_blocks(options, table: Table, categories, extension_columns): ...
+
+
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+
+
+class Table(_Tabular[ChunkedArray[Any]]):
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def select(self, columns: Iterable[str] |
+               Iterable[int] | NDArray[np.str_]) -> Self: ...
+
+    def replace_schema_metadata(
+        self, metadata: dict[str, str]
+        | dict[bytes, bytes]
+        | dict[bytes, str]
+        | dict[str, bytes]
+        | None = None
+    ) -> Self: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def equals(self, other: Self | Any, check_metadata: bool = False) -> bool: ...
+
+    def cast(self, target_schema: Schema, safe: bool | None = None,
+             options: CastOptions | None = None) -> Self: ...
+
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: Sequence[str | int] | None = None,
+        safe: bool = True,
+    ) -> Self: ...
+
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays:
+        Collection[ArrayOrChunkedArray[Any] | Collection[NDArray[Any]] | list[Any]],
+        names: list[str] | tuple[str, ...] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[bytes, bytes]
+        | Mapping[str, str]
+        | Mapping[bytes, str]
+        | Mapping[str, bytes] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[StructScalar]
+    ) -> Self: ...
+
+    def to_struct_array(
+        self, max_chunksize: int | None = None
+    ) -> ChunkedArray[StructScalar]: ...
+
+    @classmethod
+    def from_batches(cls, batches: Iterable[RecordBatch],
+                     schema: Schema | None = None) -> Self: ...
+
+    def to_batches(self, max_chunksize: int | None = None) -> list[RecordBatch]: ...
+
+    def to_reader(self, max_chunksize: int | None = None) -> RecordBatchReader: ...
+
+    @property
+    def schema(self) -> Schema: ...
+
+    @property
+    def num_columns(self) -> int: ...
+
+    @property
+    def num_rows(self) -> int: ...
+
+    @property
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
+    def __sizeof__(self) -> int: ...
+
+    def add_column(self, i: int, field_: str | Field,
+                   column: ArrayOrChunkedArray[Any] | list[list[Any]]) -> Self: ...
+
+    def remove_column(self, i: int) -> Self: ...
+
+    def set_column(self, i: int, field_: str | Field,
+                   column: ArrayOrChunkedArray[Any] | list[list[Any]]) -> Self: ...
+
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self: ...
+
+    def drop(self, columns: str | list[str]) -> Self: ...
+
+    def group_by(self, keys: str | list[str],
+                 use_threads: bool = True) -> TableGroupBy: ...
+
+    def join(
+        self,
+        right_table: Self,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> Self: ...
+
+    def join_asof(
+        self,
+        right_table: Self,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> Self: ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
+    @property
+    def is_cpu(self) -> bool: ...
+
+
+def record_batch(
+    data: Mapping[str, list[Any] | Array[Any]]
+    | Collection[Array[Any] | ChunkedArray[Any] | list[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowDeviceArray,
+    names: list[str] | Schema | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+) -> RecordBatch: ...
+
+
+def table(
+    data: Collection[ArrayOrChunkedArray[Any] | list[Any] | range | str]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowStream
+    | SupportArrowDeviceArray
+    | Mapping[str, list[Any] | Array[Any] | ChunkedArray[Any] | range]
+    | Mapping[str, Any],
+    names: list[str] | Schema | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+    nthreads: int | None = None,
+) -> Table: ...
+
+
+def concat_tables(
+    tables: Iterable[Table],
+    memory_pool: MemoryPool | None = None,
+    promote_options: Literal["none", "default", "permissive"] = "none",
+    **kwargs: Any,
+) -> Table: ...
+
+
+class TableGroupBy:
+
+    keys: str | list[str]
+
+    def __init__(self, table: Table, keys: str |
+                 list[str], use_threads: bool = True): ...
+
+    def aggregate(
+        self,
+        aggregations: Iterable[
+            tuple[ColumnSelector, Aggregation]
+            | tuple[ColumnSelector, Aggregation, AggregateOptions | None]
+        ],
+    ) -> Table: ...
+
+    def _table(self) -> Table: ...
+    @property
+    def _use_threads(self) -> bool: ...
+
+
+def concat_batches(
+    recordbatches: Iterable[RecordBatch], memory_pool: MemoryPool | None = None
+) -> RecordBatch: ...
+
+
+__all__ = [
+    "ChunkedArray",
+    "chunked_array",
+    "_Tabular",
+    "RecordBatch",
+    "table_to_blocks",
+    "Table",
+    "record_batch",
+    "table",
+    "concat_tables",
+    "TableGroupBy",
+    "concat_batches",
+    "Aggregation",
+    "AggregateOptions",
+]
diff --git a/python/pyarrow-stubs/pyarrow/tensor.pyi b/python/pyarrow-stubs/pyarrow/tensor.pyi
new file mode 100644
index 00000000000..ba40c7b299d
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/tensor.pyi
@@ -0,0 +1,268 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from collections.abc import Sequence
+import numpy as np
+
+from pyarrow.lib import _Weakrefable
+from pyarrow._types import DataType
+from scipy.sparse import coo_matrix, csr_matrix
+from sparse import COO  # type: ignore[import-untyped, import-not-found]
+
+
+class Tensor(_Weakrefable):
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray,
+                   dim_names: Sequence[str] | None = None) -> Self: ...
+
+    def to_numpy(self) -> np.ndarray: ...
+
+    def equals(self, other: Tensor) -> bool: ...
+
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+
+    @property
+    def is_contiguous(self) -> bool: ...
+
+    @property
+    def ndim(self) -> int: ...
+
+    @property
+    def size(self) -> str: ...
+
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+
+    @property
+    def strides(self) -> tuple[int, ...]: ...
+
+    @property
+    def type(self) -> DataType: ...
+
+
+class SparseCOOTensor(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray,
+                         dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        coords: np.ndarray,
+        shape: Sequence[int],
+        dim_names: Sequence[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix,
+                   dim_names: Sequence[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_pydata_sparse(
+        cls, obj: COO, dim_names: Sequence[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> coo_matrix: ...
+
+    def to_pydata_sparse(self) -> COO: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+    @property
+    def has_canonical_format(self) -> bool: ...
+    @property
+    def type(self) -> DataType: ...
+
+
+class SparseCSRMatrix(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray,
+                         dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: Sequence[int],
+        dim_names: Sequence[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix,
+                   dim_names: Sequence[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> csr_matrix: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+    @property
+    def type(self) -> DataType: ...
+
+
+class SparseCSCMatrix(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray,
+                         dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix,
+                   dim_names: list[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> csr_matrix: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+
+class SparseCSFTensor(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray,
+                         dim_names: Sequence[str] | None = None) -> Self: ...
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: Sequence[np.ndarray],
+        indices: Sequence[np.ndarray],
+        shape: tuple[int, ...],
+        axis_order: Sequence[int] | None = None,
+        dim_names: Sequence[str] | None = None,
+    ) -> Self: ...
+
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+    @property
+    def type(self) -> DataType: ...
+
+
+__all__ = [
+    "Tensor",
+    "SparseCOOTensor",
+    "SparseCSRMatrix",
+    "SparseCSCMatrix",
+    "SparseCSFTensor",
+]
diff --git a/python/pyarrow-stubs/pyarrow/tests/util.pyi b/python/pyarrow-stubs/pyarrow/tests/util.pyi
new file mode 100644
index 00000000000..5ceb784588a
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/tests/util.pyi
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections.abc import Callable
+from contextlib import AbstractContextManager
+from decimal import Decimal
+from os import PathLike
+from typing import Any, Literal
+import socket
+
+import pyarrow.fs
+
+
+def randsign() -> int: ...
+def random_seed(seed: int) -> AbstractContextManager[None]: ...
+def randdecimal(precision: int, scale: int) -> Decimal: ...
+def random_ascii(length: int) -> bytes: ...
+def rands(nchars: int) -> str: ...
+def get_modified_env_with_pythonpath() -> dict[str, str]: ...
+def invoke_script(script_name: str, *args: str) -> None: ...
+def changed_environ(name: str, value: str) -> AbstractContextManager[None]: ...
+def change_cwd(path: str | PathLike[str]) -> AbstractContextManager[None]: ...
+def disabled_gc() -> AbstractContextManager[None]: ...
+def _filesystem_uri(path: str) -> str: ...
+
+
+def memory_leak_check(
+    f: Callable[[], Any],
+    metric: Literal['rss', 'vms', 'shared'] = 'rss',
+    threshold: int = 131072,
+    iterations: int = 10,
+    check_interval: int = 1
+) -> None: ...
+
+
+class FSProtocolClass:
+    def __init__(self, path: str | PathLike[str]) -> None: ...
+    def __fspath__(self) -> str: ...
+
+
+class ProxyHandler(pyarrow.fs.FileSystemHandler):
+    _fs: pyarrow.fs.FileSystem
+    def __init__(self, fs: pyarrow.fs.FileSystem) -> None: ...
+    def __eq__(self, other: object) -> bool: ...
+    def __ne__(self, other: object) -> bool: ...
+    def get_type_name(self) -> str: ...
+    def normalize_path(self, path: str) -> str: ...
+    def get_file_info(self, paths: list[str]) -> list[pyarrow.fs.FileInfo]: ...
+    def get_file_info_selector(
+        self, selector: pyarrow.fs.FileSelector) -> list[pyarrow.fs.FileInfo]: ...
+
+    def create_dir(self, path: str, recursive: bool) -> None: ...
+    def delete_dir(self, path: str) -> None: ...
+    def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None: ...
+    def delete_root_dir_contents(self) -> None: ...
+    def delete_file(self, path: str) -> None: ...
+    def move(self, src: str, dest: str) -> None: ...
+    def copy_file(self, src: str, dest: str) -> None: ...
+    def open_input_stream(self, path: str) -> Any: ...
+    def open_input_file(self, path: str) -> Any: ...
+    def open_output_stream(self, path: str, metadata: dict[str, str]) -> Any: ...
+    def open_append_stream(self, path: str, metadata: dict[str, str]) -> Any: ...
+
+
+def _ensure_minio_component_version(component: str, minimum_year: int) -> bool: ...
+def _run_mc_command(mcdir: str, *args: str) -> None: ...
+def windows_has_tzdata() -> bool: ...
+def running_on_musllinux() -> bool: ...
+
+
+def signal_wakeup_fd(
+    *, warn_on_full_buffer: bool = False) -> AbstractContextManager[socket.socket]: ...
+
+
+def _configure_s3_limited_user(
+    s3_server: dict[str, Any], policy: str, username: str, password: str) -> None: ...
+
+
+def _wait_for_minio_startup(
+    mcdir: str, address: str, access_key: str, secret_key: str) -> None: ...
diff --git a/python/pyarrow-stubs/pyarrow/types.pyi b/python/pyarrow-stubs/pyarrow/types.pyi
new file mode 100644
index 00000000000..9e5a0568db0
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/types.pyi
@@ -0,0 +1,227 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+from enum import IntEnum
+
+from typing import Any
+
+if sys.version_info >= (3, 13):
+    from typing import TypeIs
+else:
+    from typing_extensions import TypeIs
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+import pyarrow.lib as lib
+
+from pyarrow.lib import (
+    BinaryType,
+    BinaryViewType,
+    BoolType,
+    DataType,
+    Date32Type,
+    Date64Type,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    DenseUnionType,
+    DictionaryType,
+    DurationType,
+    FixedSizeBinaryType,
+    FixedSizeListType,
+    Float16Type,
+    Float32Type,
+    Float64Type,
+    Int8Type,
+    Int16Type,
+    Int32Type,
+    Int64Type,
+    LargeBinaryType,
+    LargeListType,
+    LargeListViewType,
+    LargeStringType,
+    ListType,
+    ListViewType,
+    MapType,
+    MonthDayNanoIntervalType,
+    NullType,
+    RunEndEncodedType,
+    SparseUnionType,
+    StringType,
+    StringViewType,
+    StructType,
+    Time32Type,
+    Time64Type,
+    TimestampType,
+    UInt8Type,
+    UInt16Type,
+    UInt32Type,
+    UInt64Type,
+)
+
+_SignedInteger: TypeAlias = Int8Type | Int16Type | Int32Type | Int64Type
+_UnsignedInteger: TypeAlias = UInt8Type | UInt16Type | UInt32Type | UInt64Type
+_Integer: TypeAlias = _SignedInteger | _UnsignedInteger
+_Floating: TypeAlias = Float16Type | Float32Type | Float64Type
+_Decimal: TypeAlias = (
+    Decimal32Type[Any, Any]
+    | Decimal64Type[Any, Any]
+    | Decimal128Type[Any, Any]
+    | Decimal256Type[Any, Any]
+)
+_Date: TypeAlias = Date32Type | Date64Type
+_Time: TypeAlias = Time32Type[Any] | Time64Type[Any]
+_Interval: TypeAlias = MonthDayNanoIntervalType
+_Temporal: TypeAlias = (TimestampType[Any, Any]
+                        | DurationType[Any] | _Time | _Date | _Interval)
+_Union: TypeAlias = SparseUnionType | DenseUnionType
+_Nested: TypeAlias = (
+    ListType[Any]
+    | FixedSizeListType[Any, Any]
+    | LargeListType[Any]
+    | ListViewType[Any]
+    | LargeListViewType[Any]
+    | StructType
+    | MapType[Any, Any, Any]
+    | _Union
+)
+
+
+def is_null(t: DataType) -> TypeIs[NullType]: ...
+def is_boolean(t: DataType) -> TypeIs[BoolType]: ...
+def is_integer(t: DataType) -> TypeIs[_Integer]: ...
+def is_signed_integer(t: DataType) -> TypeIs[_SignedInteger]: ...
+def is_unsigned_integer(t: DataType) -> TypeIs[_UnsignedInteger]: ...
+def is_int8(t: DataType) -> TypeIs[Int8Type]: ...
+def is_int16(t: DataType) -> TypeIs[Int16Type]: ...
+def is_int32(t: DataType) -> TypeIs[Int32Type]: ...
+def is_int64(t: DataType) -> TypeIs[Int64Type]: ...
+def is_uint8(t: DataType) -> TypeIs[UInt8Type]: ...
+def is_uint16(t: DataType) -> TypeIs[UInt16Type]: ...
+def is_uint32(t: DataType) -> TypeIs[UInt32Type]: ...
+def is_uint64(t: DataType) -> TypeIs[UInt64Type]: ...
+def is_floating(t: DataType) -> TypeIs[_Floating]: ...
+def is_float16(t: DataType) -> TypeIs[Float16Type]: ...
+def is_float32(t: DataType) -> TypeIs[Float32Type]: ...
+def is_float64(t: DataType) -> TypeIs[Float64Type]: ...
+def is_list(t: DataType) -> TypeIs[ListType[Any]]: ...
+def is_large_list(t: DataType) -> TypeIs[LargeListType[Any]]: ...
+def is_fixed_size_list(t: DataType) -> TypeIs[FixedSizeListType[Any, Any]]: ...
+def is_list_view(t: DataType) -> TypeIs[ListViewType[Any]]: ...
+def is_large_list_view(t: DataType) -> TypeIs[LargeListViewType[Any]]: ...
+def is_struct(t: DataType) -> TypeIs[StructType]: ...
+def is_union(t: DataType) -> TypeIs[_Union]: ...
+def is_nested(t: DataType) -> TypeIs[_Nested]: ...
+def is_run_end_encoded(t: DataType) -> TypeIs[RunEndEncodedType[Any, Any]]: ...
+def is_temporal(t: DataType) -> TypeIs[_Temporal]: ...
+def is_timestamp(t: DataType) -> TypeIs[TimestampType[Any, Any]]: ...
+def is_duration(t: DataType) -> TypeIs[DurationType[Any]]: ...
+def is_time(t: DataType) -> TypeIs[_Time]: ...
+def is_time32(t: DataType) -> TypeIs[Time32Type[Any]]: ...
+def is_time64(t: DataType) -> TypeIs[Time64Type[Any]]: ...
+def is_binary(t: DataType) -> TypeIs[BinaryType]: ...
+def is_large_binary(t: DataType) -> TypeIs[LargeBinaryType]: ...
+def is_unicode(t: DataType) -> TypeIs[StringType]: ...
+def is_string(t: DataType) -> TypeIs[StringType]: ...
+def is_large_unicode(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_large_string(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_fixed_size_binary(t: DataType) -> TypeIs[FixedSizeBinaryType]: ...
+def is_binary_view(t: DataType) -> TypeIs[BinaryViewType]: ...
+def is_string_view(t: DataType) -> TypeIs[StringViewType]: ...
+def is_date(t: DataType) -> TypeIs[_Date]: ...
+def is_date32(t: DataType) -> TypeIs[Date32Type]: ...
+def is_date64(t: DataType) -> TypeIs[Date64Type]: ...
+def is_map(t: DataType) -> TypeIs[MapType[Any, Any, Any]]: ...
+def is_decimal(t: DataType) -> TypeIs[_Decimal]: ...
+def is_decimal32(t: DataType) -> TypeIs[Decimal32Type[Any, Any]]: ...
+def is_decimal64(t: DataType) -> TypeIs[Decimal64Type[Any, Any]]: ...
+def is_decimal128(t: DataType) -> TypeIs[Decimal128Type[Any, Any]]: ...
+def is_decimal256(t: DataType) -> TypeIs[Decimal256Type[Any, Any]]: ...
+def is_dictionary(t: DataType) -> TypeIs[DictionaryType[Any, Any, Any]]: ...
+def is_interval(t: DataType) -> TypeIs[_Interval]: ...
+def is_primitive(t: DataType) -> bool: ...
+def is_boolean_value(obj: Any) -> bool: ...
+def is_integer_value(obj: Any) -> bool: ...
+def is_float_value(obj: Any) -> bool: ...
+
+
+__all__ = [
+    "lib",
+    "is_binary",
+    "is_binary_view",
+    "is_boolean",
+    "is_date",
+    "is_date32",
+    "is_date64",
+    "is_decimal",
+    "is_decimal128",
+    "is_decimal256",
+    "is_decimal32",
+    "is_decimal64",
+    "is_dictionary",
+    "is_duration",
+    "is_fixed_size_binary",
+    "is_fixed_size_list",
+    "is_float16",
+    "is_float32",
+    "is_float64",
+    "is_floating",
+    "is_int16",
+    "is_int32",
+    "is_int64",
+    "is_int8",
+    "is_integer",
+    "is_interval",
+    "is_large_binary",
+    "is_large_list",
+    "is_large_list_view",
+    "is_large_string",
+    "is_large_unicode",
+    "is_list",
+    "is_list_view",
+    "is_map",
+    "is_nested",
+    "is_null",
+    "is_primitive",
+    "is_run_end_encoded",
+    "is_signed_integer",
+    "is_string",
+    "is_string_view",
+    "is_struct",
+    "is_temporal",
+    "is_time",
+    "is_time32",
+    "is_time64",
+    "is_timestamp",
+    "is_uint16",
+    "is_uint32",
+    "is_uint64",
+    "is_uint8",
+    "is_unicode",
+    "is_union",
+    "is_unsigned_integer",
+]
+
+
+class TypesEnum(IntEnum):
+    INTERVAL_MONTHS = 0
+    INTERVAL_DAY_TIME = 1
+    INTERVAL_MONTH_DAY_NANO = 2
diff --git a/python/pyarrow-stubs/pyarrow/util.pyi b/python/pyarrow-stubs/pyarrow/util.pyi
new file mode 100644
index 00000000000..c3317960c81
--- /dev/null
+++ b/python/pyarrow-stubs/pyarrow/util.pyi
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections.abc import Callable, Sequence
+from os import PathLike
+from typing import Any, Protocol, TypeVar
+
+_F = TypeVar("_F", bound=Callable)
+_N = TypeVar("_N")
+
+
+class _DocStringComponents(Protocol):
+    _docstring_components: list[str]
+
+
+def doc(
+    *docstrings: str | _DocStringComponents | Callable | None, **params: Any
+) -> Callable[[_F], _F]: ...
+def _is_iterable(obj) -> bool: ...
+def _is_path_like(path) -> bool: ...
+def _stringify_path(path: str | PathLike) -> str: ...
+def product(seq: Sequence[_N]) -> _N: ...
+
+
+def get_contiguous_span(
+    shape: tuple[int, ...], strides: tuple[int, ...], itemsize: int
+) -> tuple[int, int]: ...
+def find_free_port() -> int: ...
+def guid() -> str: ...
+def _download_urllib(url, out_path) -> None: ...
+def _download_requests(url, out_path) -> None: ...
+def download_tzdata_on_windows() -> None: ...
+def _deprecate_api(old_name, new_name, api, next_version, type=...): ...
+def _deprecate_class(old_name, new_class, next_version, instancecheck=True): ...
+def _break_traceback_cycle_from_frame(frame) -> None: ...
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index da2fe966475..d6836c14bd6 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -29,19 +29,17 @@
 For more information see the official page at https://arrow.apache.org
 """
 
-import gc as _gc
 import importlib as _importlib
 import os as _os
 import platform as _platform
 import sys as _sys
-import warnings as _warnings
 
 try:
-    from ._generated_version import version as __version__
+    from ._generated_version import version as __version__  # type: ignore[import-untyped, import-not-found] # noqa: E501
 except ImportError:
     # Package is not installed, parse git tag at runtime
     try:
-        import setuptools_scm
+        import setuptools_scm  # type: ignore[import-not-found, import-untyped]
         # Code duplicated from setup.py to avoid a dependency on each other
 
         def parse_git(root, **kwargs):
@@ -49,14 +47,14 @@ def parse_git(root, **kwargs):
             Parse function for setuptools_scm that ignores tags for non-C++
             subprojects, e.g. apache-arrow-js-XXX tags.
             """
-            from setuptools_scm.git import parse
+            from setuptools_scm.git import parse  # type: ignore[import-not-found, import-untyped] # noqa: E501
             kwargs['describe_command'] = \
                 "git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'"
             return parse(root, **kwargs)
         __version__ = setuptools_scm.get_version('../',
                                                  parse=parse_git)
     except ImportError:
-        __version__ = None
+        __version__ = None  # type: ignore[assignment]
 
 import pyarrow.lib as _lib
 from pyarrow.lib import (BuildInfo, CppBuildInfo, RuntimeInfo, set_timezone_db_path,
@@ -153,6 +151,8 @@ def print_entry(label, value):
         print(f"  {codec: <20}: {status: <8}")
 
 
+from pyarrow.lib import (
+    DataType, Array, MemoryPool)  # type: ignore[reportAttributeAccessIssue]
 from pyarrow.lib import (null, bool_,
                          int8, int16, int32, int64,
                          uint8, uint16, uint32, uint64,
@@ -170,7 +170,7 @@ def print_entry(label, value):
                          bool8, fixed_shape_tensor, json_, opaque, uuid,
                          field,
                          type_for_alias,
-                         DataType, DictionaryType, StructType,
+                         DictionaryType, StructType,
                          ListType, LargeListType, FixedSizeListType,
                          ListViewType, LargeListViewType,
                          MapType, UnionType, SparseUnionType, DenseUnionType,
@@ -187,8 +187,7 @@ def print_entry(label, value):
                          Field,
                          Schema,
                          schema,
-                         unify_schemas,
-                         Array, Tensor,
+                         unify_schemas, Tensor,
                          array, chunked_array, record_batch, nulls, repeat,
                          SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
                          SparseCSFTensor,
@@ -243,7 +242,7 @@ def print_entry(label, value):
 from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
                          Codec, compress, decompress, allocate_buffer)
 
-from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
+from pyarrow.lib import (LoggingMemoryPool, ProxyMemoryPool,
                          total_allocated_bytes, set_memory_pool,
                          default_memory_pool, system_memory_pool,
                          jemalloc_memory_pool, mimalloc_memory_pool,
@@ -365,7 +364,7 @@ def create_library_symlinks():
     if _sys.platform == 'linux':
         bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*'))
 
-        def get_symlink_path(hard_path):
+        def get_symlink_path(hard_path):  # type: ignore[reportRedeclaration]
             return hard_path.rsplit('.', 1)[0]
     else:
         bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib'))
diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py
index e475e8db5c2..cd99a1bbc53 100644
--- a/python/pyarrow/acero.py
+++ b/python/pyarrow/acero.py
@@ -22,7 +22,7 @@
 # distutils: language = c++
 # cython: language_level = 3
 
-from pyarrow.lib import Table, RecordBatch, array
+from pyarrow.lib import Table, RecordBatch, array, Schema
 from pyarrow.compute import Expression, field
 
 try:
@@ -49,11 +49,14 @@
 except ImportError:
     class DatasetModuleStub:
         class Dataset:
-            pass
+            @property
+            def schema(self):
+                return Schema()
 
         class InMemoryDataset:
-            pass
-    ds = DatasetModuleStub
+            def __init__(self, source):
+                pass
+    ds = DatasetModuleStub  # type: ignore[assignment]
 
 
 def _dataset_to_decl(dataset, use_threads=True, implicit_ordering=False):
@@ -306,7 +309,7 @@ def _perform_join_asof(left_operand, left_on, left_by,
     # AsofJoin does not return on or by columns for right_operand.
     right_columns = [
         col for col in right_operand.schema.names
-        if col not in [right_on] + right_by
+        if col not in [right_on] + right_by  # type: ignore[reportOperatorIssue]
     ]
     columns_collisions = set(left_operand.schema.names) & set(right_columns)
     if columns_collisions:
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 575b628db3a..915a715f8ec 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -3648,7 +3648,7 @@ cdef class FixedSizeListArray(BaseListArray):
         Or create from a values array, list size and matching type:
 
         >>> typ = pa.list_(pa.field("values", pa.int64()), 2)
-        >>> arr = pa.FixedSizeListArray.from_arrays(values,type=typ)
+        >>> arr = pa.FixedSizeListArray.from_arrays(values, type=typ)
         >>> arr
         <pyarrow.lib.FixedSizeListArray object at ...>
         [
diff --git a/python/pyarrow/benchmark.py b/python/pyarrow/benchmark.py
index 25ee1141f08..0ee9063a9a7 100644
--- a/python/pyarrow/benchmark.py
+++ b/python/pyarrow/benchmark.py
@@ -18,4 +18,4 @@
 # flake8: noqa
 
 
-from pyarrow.lib import benchmark_PandasObjectIsNull
+from pyarrow.lib import benchmark_PandasObjectIsNull  # type: ignore[attr-defined]
diff --git a/python/pyarrow/cffi.py b/python/pyarrow/cffi.py
index 1da1a916914..e5a1c9c1d07 100644
--- a/python/pyarrow/cffi.py
+++ b/python/pyarrow/cffi.py
@@ -16,8 +16,15 @@
 # under the License.
 
 from __future__ import absolute_import
+from typing import TYPE_CHECKING
 
-import cffi
+if TYPE_CHECKING:
+    import cffi
+else:
+    try:
+        import cffi
+    except ImportError:
+        pass
 
 c_source = """
     struct ArrowSchema {
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index fe0afdb0a87..259dd5eb94d 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -106,7 +106,7 @@
 import warnings
 
 import pyarrow as pa
-from pyarrow import _compute_docstrings
+from pyarrow import _compute_docstrings  # type: ignore[reportAttributeAccessIssue]
 from pyarrow.vendored import docscrape
 
 
@@ -241,7 +241,7 @@ def _handle_options(name, options_class, options, args, kwargs):
 
 def _make_generic_wrapper(func_name, func, options_class, arity):
     if options_class is None:
-        def wrapper(*args, memory_pool=None):
+        def wrapper(*args, memory_pool=None):  # type: ignore[misc]
             if arity is not Ellipsis and len(args) != arity:
                 raise TypeError(
                     f"{func_name} takes {arity} positional argument(s), "
@@ -251,7 +251,8 @@ def wrapper(*args, memory_pool=None):
                 return Expression._call(func_name, list(args))
             return func.call(args, None, memory_pool)
     else:
-        def wrapper(*args, memory_pool=None, options=None, **kwargs):
+        def wrapper(  # type: ignore[misc]
+                *args, memory_pool=None, options=None, **kwargs):
             if arity is not Ellipsis:
                 if len(args) < arity:
                     raise TypeError(
@@ -608,7 +609,7 @@ def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
         sort_keys.append(("dummy", "descending"))
     else:
         sort_keys = map(lambda key_name: (key_name, "descending"), sort_keys)
-    options = SelectKOptions(k, sort_keys)
+    options = SelectKOptions(k, sort_keys)  # type: ignore[reportArgumentType]
     return call_function("select_k_unstable", [values], options, memory_pool)
 
 
@@ -655,7 +656,7 @@ def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
         sort_keys.append(("dummy", "ascending"))
     else:
         sort_keys = map(lambda key_name: (key_name, "ascending"), sort_keys)
-    options = SelectKOptions(k, sort_keys)
+    options = SelectKOptions(k, sort_keys)  # type: ignore[reportArgumentType]
     return call_function("select_k_unstable", [values], options, memory_pool)
 
 
@@ -681,7 +682,8 @@ def random(n, *, initializer='system', options=None, memory_pool=None):
     memory_pool : pyarrow.MemoryPool, optional
         If not passed, will allocate memory from the default memory pool.
     """
-    options = RandomOptions(initializer=initializer)
+    options = RandomOptions(
+        initializer=initializer)  # type: ignore[reportArgumentType]
     return call_function("random", [], options, memory_pool, length=n)
 
 
@@ -723,7 +725,7 @@ def field(*name_or_index):
         if isinstance(name_or_index[0], (str, int)):
             return Expression._field(name_or_index[0])
         elif isinstance(name_or_index[0], tuple):
-            return Expression._nested_field(name_or_index[0])
+            return Expression._nested_field(name_or_index[0])  # type: ignore
         else:
             raise TypeError(
                 "field reference should be str, multiple str, tuple or "
@@ -731,7 +733,7 @@ def field(*name_or_index):
             )
     # In case of multiple strings not supplied in a tuple
     else:
-        return Expression._nested_field(name_or_index)
+        return Expression._nested_field(name_or_index)  # type: ignore
 
 
 def scalar(value):
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 41beaa14041..0e8ef66485e 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -114,13 +114,13 @@
     defaults['timezone_data'] = os.path.exists("/usr/share/zoneinfo")
 
 try:
-    import cython  # noqa
+    import cython  # type: ignore[import-untyped, import-not-found] # noqa
     defaults['cython'] = True
 except ImportError:
     pass
 
 try:
-    import fastparquet  # noqa
+    import fastparquet  # type: ignore[import-untyped, import-not-found] # noqa
     defaults['fastparquet'] = True
 except ImportError:
     pass
@@ -347,7 +347,7 @@ def func(ctx, x):
 
     pc.register_aggregate_function(func,
                                    func_name,
-                                   func_doc,
+                                   func_doc,  # type: ignore
                                    {
                                        "x": pa.float64(),
                                    },
diff --git a/python/pyarrow/cuda.py b/python/pyarrow/cuda.py
index 18c530d4afe..eeb637f0ab4 100644
--- a/python/pyarrow/cuda.py
+++ b/python/pyarrow/cuda.py
@@ -18,7 +18,7 @@
 # flake8: noqa
 
 
-from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
+from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,  # type: ignore[reportMissingModuleSource]
                            HostBuffer, BufferReader, BufferWriter,
                            new_host_buffer,
                            serialize_record_batch, read_message,
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index 039da8c0d56..967c4b475dd 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -54,6 +54,9 @@
         get_partition_keys as _get_partition_keys,  # keep for backwards compatibility
         _filesystemdataset_write,
     )
+    from pyarrow.fs import FileInfo
+
+
 except ImportError as exc:
     raise ImportError(
         f"The pyarrow installation is not built with support for 'dataset' ({str(exc)})"
@@ -70,7 +73,8 @@
 )
 
 try:
-    from pyarrow._dataset_orc import OrcFileFormat
+    from pyarrow._dataset_orc import (  # type: ignore[import-not-found]
+        OrcFileFormat)
     _orc_available = True
 except ImportError:
     pass
@@ -371,6 +375,7 @@ def _ensure_multiple_sources(paths, filesystem=None):
     # possible improvement is to group the file_infos by type and raise for
     # multiple paths per error category
     if is_local:
+        # type: ignore[reportGeneralTypeIssues]
         for info in filesystem.get_file_info(paths):
             file_type = info.type
             if file_type == FileType.File:
@@ -422,16 +427,18 @@ def _ensure_single_source(path, filesystem=None):
     filesystem, path = _resolve_filesystem_and_path(path, filesystem)
 
     # ensure that the path is normalized before passing to dataset discovery
+    assert isinstance(path, str)
     path = filesystem.normalize_path(path)
 
     # retrieve the file descriptor
     file_info = filesystem.get_file_info(path)
+    assert isinstance(file_info, FileInfo)
 
     # depending on the path type either return with a recursive
     # directory selector or as a list containing a single file
-    if file_info.type == FileType.Directory:
+    if file_info.type == FileType.Directory:  # type: ignore[reportAttributeAccessIssue]
         paths_or_selector = FileSelector(path, recursive=True)
-    elif file_info.type == FileType.File:
+    elif file_info.type == FileType.File:  # type: ignore[reportAttributeAccessIssue]
         paths_or_selector = [path]
     else:
         raise FileNotFoundError(path)
@@ -1035,6 +1042,7 @@ def file_visitor(written_file):
     _filesystemdataset_write(
         scanner, base_dir, basename_template, filesystem, partitioning,
         preserve_order, file_options, max_partitions, file_visitor,
-        existing_data_behavior, max_open_files, max_rows_per_file,
-        min_rows_per_group, max_rows_per_group, create_dir
+        existing_data_behavior,  # type: ignore[reportArgumentType]
+        max_open_files, max_rows_per_file, min_rows_per_group,
+        max_rows_per_group, create_dir
     )
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 241c27706a6..4b0ecb9f18e 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -183,6 +183,7 @@ def write_feather(df, dest, compression=None, compression_level=None,
                              f'one of {_FEATHER_SUPPORTED_CODECS}')
 
     try:
+        assert version in (1, 2)
         _feather.write_feather(table, dest, compression=compression,
                                compression_level=compression_level,
                                chunksize=chunksize, version=version)
@@ -269,7 +270,7 @@ def read_table(source, columns=None, memory_map=False, use_threads=True):
                         f"Got columns {columns} of types {column_type_names}")
 
     # Feather v1 already respects the column selection
-    if reader.version < 3:
+    if int(reader.version) < 3:
         return table
     # Feather v2 reads with sorted / deduplicated selection
     elif sorted(set(columns)) == columns:
diff --git a/python/pyarrow/flight.py b/python/pyarrow/flight.py
index b1836907c67..ba5008c9ecf 100644
--- a/python/pyarrow/flight.py
+++ b/python/pyarrow/flight.py
@@ -16,7 +16,7 @@
 # under the License.
 
 try:
-    from pyarrow._flight import (  # noqa:F401
+    from pyarrow._flight import (  # noqa:F401  # type: ignore[import-not-found]
         connect,
         Action,
         ActionType,
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index 670ccaaf245..e1aa9090d2d 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -40,7 +40,7 @@
 
 _not_imported = []
 try:
-    from pyarrow._azurefs import AzureFileSystem  # noqa
+    from pyarrow._azurefs import AzureFileSystem  # noqa  # type: ignore[reportMissingModuleSource]
 except ImportError:
     _not_imported.append("AzureFileSystem")
 
@@ -50,12 +50,12 @@
     _not_imported.append("HadoopFileSystem")
 
 try:
-    from pyarrow._gcsfs import GcsFileSystem  # noqa
+    from pyarrow._gcsfs import GcsFileSystem  # noqa  # type: ignore[reportMissingModuleSource]
 except ImportError:
     _not_imported.append("GcsFileSystem")
 
 try:
-    from pyarrow._s3fs import (  # noqa
+    from pyarrow._s3fs import (  # noqa  # type: ignore[reportMissingModuleSource]
         AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy,
         S3FileSystem, S3LogLevel, S3RetryStrategy, ensure_s3_initialized,
         finalize_s3, ensure_s3_finalized, initialize_s3, resolve_s3_region)
@@ -111,7 +111,7 @@ def _ensure_filesystem(filesystem, *, use_mmap=False):
     else:
         # handle fsspec-compatible filesystems
         try:
-            import fsspec
+            import fsspec  # type: ignore[import-untyped]
         except ImportError:
             pass
         else:
@@ -165,6 +165,7 @@ def _resolve_filesystem_and_path(path, filesystem=None, *, memory_map=False):
         file_info = None
         exists_locally = False
     else:
+        assert isinstance(file_info, FileInfo)
         exists_locally = (file_info.type != FileType.NotFound)
 
     # if the file or directory doesn't exists locally, then assume that
@@ -250,7 +251,9 @@ def copy_files(source, destination,
         destination, destination_filesystem
     )
 
+    assert isinstance(source_fs, FileSystem)
     file_info = source_fs.get_file_info(source_path)
+    assert isinstance(file_info, FileInfo)
     if file_info.type == FileType.Directory:
         source_sel = FileSelector(source_path, recursive=True)
         _copy_files_selector(source_fs, source_sel,
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index 4e0d66ec665..222c289c879 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -20,7 +20,7 @@
 import warnings
 
 from pyarrow.lib import Table
-import pyarrow._orc as _orc
+import pyarrow._orc as _orc  # type: ignore[reportMissingModuleSource]
 from pyarrow.fs import _resolve_filesystem_and_path
 
 
@@ -255,9 +255,11 @@ def __init__(self, where, *,
             file_version=file_version,
             batch_size=batch_size,
             stripe_size=stripe_size,
-            compression=compression,
+            compression=compression,  # type: ignore[reportArgumentType]
             compression_block_size=compression_block_size,
-            compression_strategy=compression_strategy,
+            compression_strategy=(
+                compression_strategy  # type: ignore[reportArgumentType]
+            ),
             row_index_stride=row_index_stride,
             padding_tolerance=padding_tolerance,
             dictionary_key_size_threshold=dictionary_key_size_threshold,
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index dfed76d3711..e1fb05d1317 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -33,18 +33,18 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # noqa
 
 
-_logical_type_map = {}
-_numpy_logical_type_map = {}
-_pandas_logical_type_map = {}
+_logical_type_map: dict[int, str] = {}
+_numpy_logical_type_map: dict[int, str] = {}
+_pandas_logical_type_map: dict[int, str] = {}
 
 
 def get_logical_type_map():
-    global _logical_type_map
+    global _logical_type_map  # noqa: F824
 
     if not _logical_type_map:
         _logical_type_map.update({
@@ -90,9 +90,9 @@ def get_logical_type(arrow_type):
 
 
 def get_numpy_logical_type_map():
-    global _numpy_logical_type_map
+    global _numpy_logical_type_map  # noqa: F824
     if not _numpy_logical_type_map:
-        _numpy_logical_type_map.update({
+        _numpy_logical_type_map.update({  # type: ignore[reportCallIssue]
             np.bool_: 'bool',
             np.int8: 'int8',
             np.int16: 'int16',
@@ -704,7 +704,7 @@ def get_datetimetz_type(values, dtype, type_):
         # If no user type passed, construct a tz-aware timestamp type
         tz = dtype.tz
         unit = dtype.unit
-        type_ = pa.timestamp(unit, tz)
+        type_ = pa.timestamp(unit, tz)  # type: ignore[reportArgumentType]
     elif type_ is None:
         # Trust the NumPy dtype
         type_ = pa.from_numpy_dtype(values.dtype)
@@ -743,7 +743,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None, return_block=
     pandas Block
 
     """
-    import pandas.core.internals as _int
+    import pandas.core.internals as _int  # type: ignore[import-not-found]
 
     block_arr = item.get('block', None)
     placement = item['placement']
@@ -769,6 +769,8 @@ def _reconstruct_block(item, columns=None, extension_columns=None, return_block=
         # create ExtensionBlock
         arr = item['py_array']
         assert len(placement) == 1
+        assert isinstance(columns, list)
+        assert isinstance(extension_columns, dict)
         name = columns[placement[0]]
         pandas_dtype = extension_columns[name]
         if not hasattr(pandas_dtype, '__from_arrow__'):
@@ -788,7 +790,7 @@ def make_datetimetz(unit, tz):
     if _pandas_api.is_v1():
         unit = 'ns'  # ARROW-3789: Coerce date/timestamp types to datetime64[ns]
     tz = pa.lib.string_to_tzinfo(tz)
-    return _pandas_api.datetimetz_type(unit, tz=tz)
+    return _pandas_api.datetimetz_type(unit, tz=tz)  # type: ignore[reportArgumentType]
 
 
 def table_to_dataframe(
@@ -822,7 +824,8 @@ def table_to_dataframe(
     result = pa.lib.table_to_blocks(options, table, categories,
                                     list(ext_columns_dtypes.keys()))
     if _pandas_api.is_ge_v3():
-        from pandas.api.internals import create_dataframe_from_blocks
+        from pandas.api.internals import (  # type: ignore[import-not-found]
+            create_dataframe_from_blocks)
 
         blocks = [
             _reconstruct_block(
@@ -834,7 +837,8 @@ def table_to_dataframe(
 
         return df
     else:
-        from pandas.core.internals import BlockManager
+        from pandas.core.internals import (  # type: ignore[reportMissingImports]
+            BlockManager)
         from pandas import DataFrame
 
         blocks = [
@@ -844,7 +848,8 @@ def table_to_dataframe(
         axes = [columns, index]
         mgr = BlockManager(blocks, axes)
         if _pandas_api.is_ge_v21():
-            df = DataFrame._from_mgr(mgr, mgr.axes)
+            df = DataFrame._from_mgr(  # type: ignore[reportAttributeAccessIssue]
+                mgr, mgr.axes)
         else:
             df = DataFrame(mgr)
 
@@ -1092,10 +1097,10 @@ def _is_generated_index_name(name):
 
 
 def get_pandas_logical_type_map():
-    global _pandas_logical_type_map
+    global _pandas_logical_type_map  # noqa: F824
 
     if not _pandas_logical_type_map:
-        _pandas_logical_type_map.update({
+        _pandas_logical_type_map.update({  # type: ignore[reportCallIssue]
             'date': 'datetime64[D]',
             'datetime': 'datetime64[ns]',
             'datetimetz': 'datetime64[ns]',
@@ -1162,12 +1167,14 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
     labels = getattr(columns, 'codes', None) or [None]
 
     # Convert each level to the dtype provided in the metadata
-    levels_dtypes = [
-        (level, col_index.get('pandas_type', str(level.dtype)),
-         col_index.get('numpy_type', None))
+    levels_dtypes = [(level, col_index.get(
+        'pandas_type',
+        str(level.dtype)  # type: ignore[reportAttributeAccessIssue]
+    ),
+        col_index.get('numpy_type', None))
         for level, col_index in zip_longest(
             levels, column_indexes, fillvalue={}
-        )
+    )
     ]
 
     new_levels = []
@@ -1179,7 +1186,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
         # bytes into unicode strings when json.loads-ing them. We need to
         # convert them back to bytes to preserve metadata.
         if dtype == np.bytes_:
-            level = level.map(encoder)
+            level = level.map(encoder)  # type: ignore[reportAttributeAccessIssue]
         # ARROW-13756: if index is timezone aware DataTimeIndex
         elif pandas_dtype == "datetimetz":
             tz = pa.lib.string_to_tzinfo(
@@ -1188,12 +1195,14 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
             if _pandas_api.is_ge_v3():
                 # with pandas 3+, to_datetime returns a unit depending on the string
                 # data, so we restore it to the original unit from the metadata
-                level = level.as_unit(np.datetime_data(dtype)[0])
+                level = level.as_unit(np.datetime_data(
+                    dtype)[0])  # type: ignore[reportArgumentType]
         # GH-41503: if the column index was decimal, restore to decimal
         elif pandas_dtype == "decimal":
             level = _pandas_api.pd.Index([decimal.Decimal(i) for i in level])
         elif (
-            level.dtype == "str" and numpy_dtype == "object"
+            level.dtype == "str"  # type: ignore[reportAttributeAccessIssue]
+            and numpy_dtype == "object"
             and ("mixed" in pandas_dtype or pandas_dtype in ["unicode", "string"])
         ):
             # the metadata indicate that the original dataframe used object dtype,
@@ -1206,11 +1215,12 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
             #   for pandas >= 3 we want to use the default string dtype for .columns
             new_levels.append(level)
             continue
-        elif level.dtype != dtype:
-            level = level.astype(dtype)
+        elif level.dtype != dtype:  # type: ignore[reportAttributeAccessIssue]
+            level = level.astype(dtype)  # type: ignore[reportAttributeAccessIssue]
         # ARROW-9096: if original DataFrame was upcast we keep that
         if level.dtype != numpy_dtype and pandas_dtype != "datetimetz":
-            level = level.astype(numpy_dtype)
+            level = level.astype(  # type: ignore[reportAttributeAccessIssue]
+                numpy_dtype)
 
         new_levels.append(level)
 
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 676bc445238..a9e7a1984ae 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -45,7 +45,7 @@
                               FileDecryptionProperties,
                               SortingColumn)
 from pyarrow.fs import (LocalFileSystem, FileType, _resolve_filesystem_and_path,
-                        _ensure_filesystem)
+                        _ensure_filesystem, FileInfo)
 from pyarrow.util import guid, _is_path_like, _stringify_path, _deprecate_api
 
 
@@ -1413,12 +1413,15 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None,
                     path_or_paths, filesystem, memory_map=memory_map
                 )
                 finfo = filesystem.get_file_info(path_or_paths)
+                assert isinstance(finfo, FileInfo)
                 if finfo.type == FileType.Directory:
                     self._base_dir = path_or_paths
             else:
                 single_file = path_or_paths
 
-        parquet_format = ds.ParquetFileFormat(**read_options)
+        parquet_format = ds.ParquetFileFormat(
+            **read_options  # type: ignore[invalid-argument-type]
+        )
 
         if single_file is not None:
             fragment = parquet_format.make_fragment(single_file, filesystem)
@@ -1573,6 +1576,7 @@ def _get_common_pandas_metadata(self):
         for name in ["_common_metadata", "_metadata"]:
             metadata_path = os.path.join(str(self._base_dir), name)
             finfo = self.filesystem.get_file_info(metadata_path)
+            assert isinstance(finfo, FileInfo)
             if finfo.is_file:
                 pq_meta = read_metadata(
                     metadata_path, filesystem=self.filesystem)
@@ -1671,6 +1675,7 @@ def files(self):
         >>> dataset.files
         ['dataset_v2_files/year=2019/...-0.parquet', ...
         """
+        assert isinstance(self._dataset, pa.dataset.FileSystemDataset)
         return self._dataset.files
 
     @property
@@ -1678,6 +1683,7 @@ def filesystem(self):
         """
         The filesystem type of the Dataset source.
         """
+        assert isinstance(self._dataset, pa.dataset.FileSystemDataset)
         return self._dataset.filesystem
 
     @property
@@ -1685,6 +1691,7 @@ def partitioning(self):
         """
         The partitioning of the Dataset source, if discovered.
         """
+        assert isinstance(self._dataset, pa.dataset.FileSystemDataset)
         return self._dataset.partitioning
 
 
@@ -1901,14 +1908,16 @@ def read_table(source, *, columns=None, use_threads=True,
 
         filesystem, path = _resolve_filesystem_and_path(source, filesystem)
         if filesystem is not None:
-            if not filesystem.get_file_info(path).is_file:
+            file_info = filesystem.get_file_info(path)
+            assert isinstance(file_info, FileInfo)
+            if not file_info.is_file:
                 raise ValueError(
                     "the 'source' argument should be "
                     "an existing parquet file and not a directory "
                     "when the pyarrow.dataset module is not available"
                 )
 
-            source = filesystem.open_input_file(path)
+            source = filesystem.open_input_file(path)  # type: ignore
 
         dataset = ParquetFile(
             source, read_dictionary=read_dictionary,
@@ -2081,7 +2090,8 @@ def write_table(table, where, row_group_size=None, version='2.6',
 def write_to_dataset(table, root_path, partition_cols=None,
                      filesystem=None, schema=None, partitioning=None,
                      basename_template=None, use_threads=None,
-                     file_visitor=None, existing_data_behavior=None,
+                     file_visitor=None,  # type: ignore[reportRedeclaration]
+                     existing_data_behavior=None,
                      **kwargs):
     """Wrapper around dataset.write_dataset for writing a Table to
     Parquet format by partitions.
@@ -2310,7 +2320,7 @@ def write_metadata(schema, where, metadata_collector=None, filesystem=None,
     filesystem, where = _resolve_filesystem_and_path(where, filesystem)
 
     if hasattr(where, "seek"):  # file-like
-        cursor_position = where.tell()
+        cursor_position = where.tell()  # type: ignore[reportAttributeAccessIssue]
 
     writer = ParquetWriter(where, schema, filesystem, **kwargs)
     writer.close()
@@ -2319,8 +2329,8 @@ def write_metadata(schema, where, metadata_collector=None, filesystem=None,
         # ParquetWriter doesn't expose the metadata until it's written. Write
         # it and read it again.
         metadata = read_metadata(where, filesystem=filesystem)
-        if hasattr(where, "seek"):
-            where.seek(cursor_position)  # file-like, set cursor back.
+        if hasattr(where, "seek"):  # file-like, set cursor back.
+            where.seek(cursor_position)  # type: ignore[reportAttributeAccessIssue]
 
         for m in metadata_collector:
             metadata.append_row_groups(m)
diff --git a/python/pyarrow/parquet/encryption.py b/python/pyarrow/parquet/encryption.py
index df6eed913fa..1c6835d6acf 100644
--- a/python/pyarrow/parquet/encryption.py
+++ b/python/pyarrow/parquet/encryption.py
@@ -20,4 +20,5 @@
                                          EncryptionConfiguration,
                                          DecryptionConfiguration,
                                          KmsConnectionConfig,
-                                         KmsClient)
+                                         KmsClient,
+                                         FileSystemKeyMaterialStore)
diff --git a/python/pyarrow/py.typed b/python/pyarrow/py.typed
new file mode 100644
index 00000000000..13a83393a91
--- /dev/null
+++ b/python/pyarrow/py.typed
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 83cabcf447d..16fed344e4d 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -1036,7 +1036,7 @@ cdef class StructScalar(Scalar, Mapping):
 
         Parameters
         ----------
-        index : Union[int, str]
+        key : Union[int, str]
             Index / position or name of the field.
 
         Returns
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 575444c1cfc..3f227d3101c 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -64,7 +64,8 @@
     if os.environ.get('TZDIR', None) is None:
         from importlib import resources
         try:
-            os.environ['TZDIR'] = os.path.join(resources.files('tzdata'), 'zoneinfo')
+            tzdata_path = resources.files('tzdata')
+            os.environ['TZDIR'] = os.path.join(str(tzdata_path), 'zoneinfo')
         except ModuleNotFoundError:
             print(
                 'Package "tzdata" not found. Not setting TZDIR environment variable.'
@@ -191,6 +192,7 @@ def decorate(func):
         def wrapper(*args, **kwargs):
             remaining_attempts = attempts
             curr_delay = delay
+            last_exception = None
             while remaining_attempts > 0:
                 try:
                     return func(*args, **kwargs)
@@ -201,6 +203,9 @@ def wrapper(*args, **kwargs):
                     if max_delay:
                         curr_delay = min(curr_delay, max_delay)
                     time.sleep(curr_delay)
+            # At this point, we've exhausted all attempts and last_exception must be set
+            # (since we must have caught at least one exception to exit the loop)
+            assert last_exception is not None, "No attempts were made"
             raise last_exception
         return wrapper
     return decorate
diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py
index 50da6693aff..62da25f0af3 100644
--- a/python/pyarrow/tests/interchange/test_conversion.py
+++ b/python/pyarrow/tests/interchange/test_conversion.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 import pyarrow.interchange as pi
 from pyarrow.interchange.column import (
@@ -163,8 +163,8 @@ def test_pandas_roundtrip_string():
     result = pi.from_dataframe(pandas_df)
 
     assert result["a"].to_pylist() == table["a"].to_pylist()
-    assert pa.types.is_string(table["a"].type)
-    assert pa.types.is_large_string(result["a"].type)
+    assert pa.types.is_string(table.column("a").type)
+    assert pa.types.is_large_string(result.column("a").type)
 
     table_protocol = table.__dataframe__()
     result_protocol = result.__dataframe__()
@@ -193,8 +193,8 @@ def test_pandas_roundtrip_large_string():
         result = pi.from_dataframe(pandas_df)
 
         assert result["a_large"].to_pylist() == table["a_large"].to_pylist()
-        assert pa.types.is_large_string(table["a_large"].type)
-        assert pa.types.is_large_string(result["a_large"].type)
+        assert pa.types.is_large_string(table.column("a_large").type)
+        assert pa.types.is_large_string(result.column("a_large").type)
 
         table_protocol = table.__dataframe__()
         result_protocol = result.__dataframe__()
@@ -231,12 +231,12 @@ def test_pandas_roundtrip_string_with_missing():
         result = pi.from_dataframe(pandas_df)
 
         assert result["a"].to_pylist() == table["a"].to_pylist()
-        assert pa.types.is_string(table["a"].type)
-        assert pa.types.is_large_string(result["a"].type)
+        assert pa.types.is_string(table.column("a").type)
+        assert pa.types.is_large_string(result.column("a").type)
 
         assert result["a_large"].to_pylist() == table["a_large"].to_pylist()
-        assert pa.types.is_large_string(table["a_large"].type)
-        assert pa.types.is_large_string(result["a_large"].type)
+        assert pa.types.is_large_string(table.column("a_large").type)
+        assert pa.types.is_large_string(result.column("a_large").type)
     else:
         # older versions of pandas do not have bitmask support
         # https://github.com/pandas-dev/pandas/issues/49888
@@ -261,12 +261,16 @@ def test_pandas_roundtrip_categorical():
     result = pi.from_dataframe(pandas_df)
 
     assert result["weekday"].to_pylist() == table["weekday"].to_pylist()
-    assert pa.types.is_dictionary(table["weekday"].type)
-    assert pa.types.is_dictionary(result["weekday"].type)
-    assert pa.types.is_string(table["weekday"].chunk(0).dictionary.type)
-    assert pa.types.is_large_string(result["weekday"].chunk(0).dictionary.type)
-    assert pa.types.is_int32(table["weekday"].chunk(0).indices.type)
-    assert pa.types.is_int8(result["weekday"].chunk(0).indices.type)
+    assert pa.types.is_dictionary(table.column("weekday").type)
+    assert pa.types.is_dictionary(result.column("weekday").type)
+    table_chunk_0 = table.column("weekday").chunk(0)
+    result_chunk_0 = result.column("weekday").chunk(0)
+    assert isinstance(table_chunk_0, pa.DictionaryArray)
+    assert isinstance(result_chunk_0, pa.DictionaryArray)
+    assert pa.types.is_string(table_chunk_0.dictionary.type)
+    assert pa.types.is_large_string(result_chunk_0.dictionary.type)
+    assert pa.types.is_int32(table_chunk_0.indices.type)
+    assert pa.types.is_int8(result_chunk_0.indices.type)
 
     table_protocol = table.__dataframe__()
     result_protocol = result.__dataframe__()
@@ -289,6 +293,7 @@ def test_pandas_roundtrip_categorical():
 
     assert desc_cat_table["is_ordered"] == desc_cat_result["is_ordered"]
     assert desc_cat_table["is_dictionary"] == desc_cat_result["is_dictionary"]
+    assert desc_cat_result["categories"] is not None
     assert isinstance(desc_cat_result["categories"]._col, pa.Array)
 
 
@@ -450,6 +455,7 @@ def test_pyarrow_roundtrip_categorical(offset, length):
 
     assert desc_cat_table["is_ordered"] == desc_cat_result["is_ordered"]
     assert desc_cat_table["is_dictionary"] == desc_cat_result["is_dictionary"]
+    assert desc_cat_result["categories"] is not None
     assert isinstance(desc_cat_result["categories"]._col, pa.Array)
 
 
@@ -464,8 +470,8 @@ def test_pyarrow_roundtrip_large_string():
     col = result.__dataframe__().get_column(0)
 
     assert col.size() == 3*1024**2
-    assert pa.types.is_large_string(table[0].type)
-    assert pa.types.is_large_string(result[0].type)
+    assert pa.types.is_large_string(table.column(0).type)
+    assert pa.types.is_large_string(result.column(0).type)
 
     assert table.equals(result)
 
diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
index cea694d1c1e..3208b56c42d 100644
--- a/python/pyarrow/tests/interchange/test_interchange_spec.py
+++ b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pyarrow as pa
 import pyarrow.tests.strategies as past
 
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index 5390a24b90d..3cbf5801dfc 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -16,11 +16,12 @@
 # under the License.
 
 import io
+from typing import cast
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 import pyarrow as pa
 from pyarrow.tests import util
@@ -137,7 +138,7 @@ def make_sample_file(table_or_df):
     else:
         a_table = pa.Table.from_pandas(table_or_df)
 
-    buf = io.BytesIO()
+    buf = io.BytesIO()  # type: ignore[attr-defined]
     _write_table(a_table, buf, compression='SNAPPY', version='2.6')
 
     buf.seek(0)
@@ -161,12 +162,9 @@ def alltypes_sample(size=10000, seed=0, categorical=False):
         'float32': np.arange(size, dtype=np.float32),
         'float64': np.arange(size, dtype=np.float64),
         'bool': np.random.randn(size) > 0,
-        'datetime_ms': np.arange("2016-01-01T00:00:00.001", size,
-                                 dtype='datetime64[ms]'),
-        'datetime_us': np.arange("2016-01-01T00:00:00.000001", size,
-                                 dtype='datetime64[us]'),
-        'datetime_ns': np.arange("2016-01-01T00:00:00.000000001", size,
-                                 dtype='datetime64[ns]'),
+        'datetime_ms': pd.date_range("2016-01-01T00:00:00.001", periods=size, freq='ms').values,
+        'datetime_us': pd.date_range("2016-01-01T00:00:00.000001", periods=size, freq='us').values,
+        'datetime_ns': pd.date_range("2016-01-01T00:00:00.000000001", periods=size, freq='ns').values,
         'timedelta': np.arange(0, size, dtype="timedelta64[s]"),
         'str': pd.Series([str(x) for x in range(size)]),
         'empty_str': [''] * size,
@@ -175,5 +173,6 @@ def alltypes_sample(size=10000, seed=0, categorical=False):
         'null_list': [None] * 2 + [[None] * (x % 4) for x in range(size - 2)],
     }
     if categorical:
-        arrays['str_category'] = arrays['str'].astype('category')
+        import pandas as pd
+        arrays['str_category'] = cast(pd.Series, arrays['str']).astype('category')
     return pd.DataFrame(arrays)
diff --git a/python/pyarrow/tests/parquet/encryption.py b/python/pyarrow/tests/parquet/encryption.py
index efaee1d08a9..7a6ef3de7bc 100644
--- a/python/pyarrow/tests/parquet/encryption.py
+++ b/python/pyarrow/tests/parquet/encryption.py
@@ -30,7 +30,7 @@ def __init__(self, config):
         pe.KmsClient.__init__(self)
         self.master_keys_map = config.custom_kms_conf
 
-    def wrap_key(self, key_bytes, master_key_identifier):
+    def wrap_key(self, key_bytes, master_key_identifier):  # type: ignore[override]
         """Not a secure cipher - the wrapped key
         is just the master key concatenated with key bytes"""
         master_key_bytes = self.master_keys_map[master_key_identifier].encode(
@@ -39,7 +39,7 @@ def wrap_key(self, key_bytes, master_key_identifier):
         result = base64.b64encode(wrapped_key)
         return result
 
-    def unwrap_key(self, wrapped_key, master_key_identifier):
+    def unwrap_key(self, wrapped_key, master_key_identifier):  # type: ignore[override]
         """Not a secure cipher - just extract the key from
         the wrapped key"""
         if master_key_identifier not in self.master_keys_map:
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 94868741f39..4c0e6d1429b 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -35,7 +35,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _read_table, _write_table
 except ImportError:
-    pq = None
+    pass
 
 
 try:
@@ -45,12 +45,12 @@
     from pyarrow.tests.pandas_examples import dataframe_with_lists
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None
+    pass
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
@@ -162,10 +162,10 @@ def test_invalid_source():
     # Test that we provide an helpful error message pointing out
     # that None wasn't expected when trying to open a Parquet None file.
     with pytest.raises(TypeError, match="None"):
-        pq.read_table(None)
+        pq.read_table(None)  # type: ignore[arg-type]
 
     with pytest.raises(TypeError, match="None"):
-        pq.ParquetFile(None)
+        pq.ParquetFile(None)  # type: ignore[arg-type]
 
 
 def test_read_table_without_dataset(tempdir):
@@ -747,7 +747,7 @@ def test_fastparquet_cross_compatibility(tempdir):
 
     # Arrow -> fastparquet
     file_arrow = str(tempdir / "cross_compat_arrow.parquet")
-    pq.write_table(table, file_arrow, compression=None)
+    pq.write_table(table, file_arrow, compression=None)  # type: ignore[arg-type]
 
     fp_file = fp.ParquetFile(file_arrow)
     df_fp = fp_file.to_pandas()
@@ -788,7 +788,7 @@ def test_buffer_contents(
     for col in table.columns:
         [chunk] = col.chunks
         buf = chunk.buffers()[1]
-        assert buf.to_pybytes() == buf.size * b"\0"
+        assert buf.to_pybytes() == buf.size * b"\0"  # type: ignore[union-attr]
 
 
 def test_parquet_compression_roundtrip(tempdir):
@@ -798,7 +798,7 @@ def test_parquet_compression_roundtrip(tempdir):
     # the stream due to auto-detecting the extension in the filename
     table = pa.table([pa.array(range(4))], names=["ints"])
     path = tempdir / "arrow-10480.pyarrow.gz"
-    pq.write_table(table, path, compression="GZIP")
+    pq.write_table(table, path, compression="GZIP")  # type: ignore[arg-type]
     result = pq.read_table(path)
     assert result.equals(table)
 
@@ -823,7 +823,7 @@ def test_empty_row_groups(tempdir):
 
 def test_reads_over_batch(tempdir):
     data = [None] * (1 << 20)
-    data.append([1])
+    data.append([1])  # type: ignore[reportArgumentType]
     # Large list<int64> with mostly nones and one final
     # value.  This should force batched reads when
     # reading back.
diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
index 2345855a332..af418812be8 100644
--- a/python/pyarrow/tests/parquet/test_compliant_nested_type.py
+++ b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -24,15 +24,14 @@
     from pyarrow.tests.parquet.common import (_read_table,
                                               _check_roundtrip)
 except ImportError:
-    pq = None
+    pass
 
 try:
     import pandas as pd
-    import pandas.testing as tm
 
     from pyarrow.tests.parquet.common import _roundtrip_pandas_dataframe
 except ImportError:
-    pd = tm = None
+    pass
 
 
 # Marks all of the tests in this module
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index c546bc1532a..bd48ffe7155 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -22,7 +22,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pytest
 
 import pyarrow as pa
@@ -33,7 +33,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _read_table, _write_table
 except ImportError:
-    pq = None
+    pass
 
 
 try:
@@ -44,7 +44,7 @@
                                                dataframe_with_lists)
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None
+    pass
 
 
 # Marks all of the tests in this module
@@ -142,7 +142,7 @@ def test_direct_read_dictionary():
                            read_dictionary=['f0'])
 
     # Compute dictionary-encoded subfield
-    expected = pa.table([table[0].dictionary_encode()], names=['f0'])
+    expected = pa.table([table.column(0).dictionary_encode()], names=['f0'])
     assert result.equals(expected)
 
 
@@ -174,7 +174,7 @@ def test_direct_read_dictionary_subfield():
     expected = pa.table([expected_arr], names=['f0'])
 
     assert result.equals(expected)
-    assert result[0].num_chunks == 1
+    assert result.column(0).num_chunks == 1
 
 
 @pytest.mark.numpy
@@ -260,8 +260,8 @@ def test_single_pylist_column_roundtrip(tempdir, dtype,):
     _write_table(table, filename)
     table_read = _read_table(filename)
     for i in range(table.num_columns):
-        col_written = table[i]
-        col_read = table_read[i]
+        col_written = table.column(i)
+        col_read = table_read.column(i)
         assert table.field(i).name == table_read.field(i).name
         assert col_read.num_chunks == 1
         data_written = col_written.chunk(0)
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index d3e9cda7301..14253ca7d6b 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -20,35 +20,41 @@
 import os
 import pathlib
 import sys
+from typing import TYPE_CHECKING
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pytest
 import unittest.mock as mock
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.fs import (FileSelector, FileSystem, LocalFileSystem,
+from pyarrow.fs import (FileSelector, FileSystem, LocalFileSystem, FileInfo, FileType,
                         PyFileSystem, SubTreeFileSystem, FSSpecHandler)
 from pyarrow.tests import util
 from pyarrow.util import guid
 
-try:
+if TYPE_CHECKING:
+    import pandas as pd
+    import pandas.testing as tm
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import (
         _read_table, _test_dataframe, _test_table, _write_table)
-except ImportError:
-    pq = None
+else:
+    try:
+        import pyarrow.parquet as pq
+        from pyarrow.tests.parquet.common import (
+            _read_table, _test_dataframe, _test_table, _write_table)
+    except ImportError:
+        pass
 
-
-try:
-    import pandas as pd
-    import pandas.testing as tm
-
-except ImportError:
-    pd = tm = None
+    try:
+        import pandas as pd
+        import pandas.testing as tm
+    except ImportError:
+        pass
 
 
 # Marks all of the tests in this module
@@ -70,8 +76,8 @@ def test_filesystem_uri(tempdir):
     assert result.equals(table)
 
     # filesystem URI
-    result = pq.read_table(
-        "data_dir/data.parquet", filesystem=util._filesystem_uri(tempdir))
+    result = pq.read_table("data_dir/data.parquet",
+                           filesystem=util._filesystem_uri(tempdir))
     assert result.equals(table)
 
 
@@ -553,7 +559,7 @@ def _generate_partition_directories(fs, base_dir, partition_spec, df):
     #                                       ['bar', ['a', 'b', 'c']]
     # part_table : a pyarrow.Table to write to each partition
     if not isinstance(fs, FileSystem):
-        fs = PyFileSystem(FSSpecHandler(fs))
+        fs = PyFileSystem(FSSpecHandler(fs))  # type: ignore[abstract]
 
     DEPTH = len(partition_spec)
 
@@ -572,15 +578,15 @@ def _visit_level(base_dir, level, part_keys):
 
             if level == DEPTH - 1:
                 # Generate example data
-                from pyarrow.fs import FileType
-
                 file_path = pathsep.join([level_dir, guid()])
                 filtered_df = _filter_partition(df, this_part_keys)
                 part_table = pa.Table.from_pandas(filtered_df)
                 with fs.open_output_stream(file_path) as f:
                     _write_table(part_table, f)
-                assert fs.get_file_info(file_path).type != FileType.NotFound
-                assert fs.get_file_info(file_path).type == FileType.File
+                file_info = fs.get_file_info(file_path)
+                assert isinstance(file_info, FileInfo)
+                assert file_info.type != FileType.NotFound
+                assert file_info.type == FileType.File
 
                 file_success = pathsep.join([level_dir, '_SUCCESS'])
                 with fs.open_output_stream(file_success) as f:
@@ -717,8 +723,8 @@ def test_dataset_read_pandas(tempdir):
     paths = []
     for i in range(nfiles):
         df = _test_dataframe(size, seed=i)
-        df.index = np.arange(i * size, (i + 1) * size)
-        df.index.name = 'index'
+        df.index = np.arange(i * size, (i + 1) * size)  # type: ignore[assignment]
+        df.index.name = 'index'  # type: ignore[attr-defined]
 
         path = dirpath / f'{i}.parquet'
 
@@ -931,8 +937,7 @@ def _test_write_to_dataset_with_partitions(base_path,
         'group2': list('eefeffgeee'),
         'num': list(range(10)),
         'nan': [np.nan] * 10,
-        'date': np.arange('2017-01-01', '2017-01-11', dtype='datetime64[D]').astype(
-            'datetime64[ns]')
+        'date': pd.date_range('2017-01-01', periods=10, freq='D').values.astype('datetime64[ns]')
     })
     cols = output_df.columns.tolist()
     partition_by = ['group1', 'group2']
@@ -965,7 +970,7 @@ def _test_write_to_dataset_with_partitions(base_path,
     input_df_cols = input_df.columns.tolist()
     assert partition_by == input_df_cols[-1 * len(partition_by):]
 
-    input_df = input_df[cols]
+    input_df = input_df.loc[:, cols]
     # Partitioned columns become 'categorical' dtypes
     for col in partition_by:
         output_df[col] = output_df[col].astype('category')
@@ -974,6 +979,7 @@ def _test_write_to_dataset_with_partitions(base_path,
         expected_date_type = schema.field('date').type.to_pandas_dtype()
         output_df["date"] = output_df["date"].astype(expected_date_type)
 
+    assert isinstance(input_df, pd.DataFrame)
     tm.assert_frame_equal(output_df, input_df)
 
 
@@ -988,8 +994,7 @@ def _test_write_to_dataset_no_partitions(base_path,
         'group1': list('aaabbbbccc'),
         'group2': list('eefeffgeee'),
         'num': list(range(10)),
-        'date': np.arange('2017-01-01', '2017-01-11', dtype='datetime64[D]').astype(
-            'datetime64[ns]')
+        'date': pd.date_range('2017-01-01', periods=10, freq='D').values.astype('datetime64[ns]')
     })
     cols = output_df.columns.tolist()
     output_table = pa.Table.from_pandas(output_df)
@@ -997,7 +1002,7 @@ def _test_write_to_dataset_no_partitions(base_path,
     if filesystem is None:
         filesystem = LocalFileSystem()
     elif not isinstance(filesystem, FileSystem):
-        filesystem = PyFileSystem(FSSpecHandler(filesystem))
+        filesystem = PyFileSystem(FSSpecHandler(filesystem))  # type: ignore[abstract]
 
     # Without partitions, append files to root_path
     n = 5
@@ -1009,8 +1014,10 @@ def _test_write_to_dataset_no_partitions(base_path,
                             recursive=True)
 
     infos = filesystem.get_file_info(selector)
-    output_files = [info for info in infos if info.path.endswith(".parquet")]
-    assert len(output_files) == n
+    if isinstance(infos, list):
+        assert all(isinstance(info, FileInfo) for info in infos)
+        output_files = [info for info in infos if info.path.endswith(".parquet")]
+        assert len(output_files) == n
 
     # Deduplicated incoming DataFrame should match
     # original outgoing Dataframe
@@ -1020,6 +1027,7 @@ def _test_write_to_dataset_no_partitions(base_path,
     input_df = input_table.to_pandas()
     input_df = input_df.drop_duplicates()
     input_df = input_df[cols]
+    assert isinstance(input_df, pd.DataFrame)
     tm.assert_frame_equal(output_df, input_df)
 
 
@@ -1168,11 +1176,11 @@ def test_dataset_read_dictionary(tempdir):
         path, read_dictionary=['f0']).read()
 
     # The order of the chunks is non-deterministic
-    ex_chunks = [t1[0].chunk(0).dictionary_encode(),
-                 t2[0].chunk(0).dictionary_encode()]
+    ex_chunks = [t1.column(0).chunk(0).dictionary_encode(),
+                 t2.column(0).chunk(0).dictionary_encode()]
 
-    assert result[0].num_chunks == 2
-    c0, c1 = result[0].chunk(0), result[0].chunk(1)
+    assert result.column(0).num_chunks == 2
+    c0, c1 = result.column(0).chunk(0), result.column(0).chunk(1)
     if c0.equals(ex_chunks[0]):
         assert c1.equals(ex_chunks[1])
     else:
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index b89fd97cb91..a7652a01e64 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -22,7 +22,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pytest
 
 import pyarrow as pa
@@ -32,7 +32,7 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _read_table, _write_table
 except ImportError:
-    pq = None
+    pass
 
 
 try:
@@ -41,7 +41,7 @@
 
     from pyarrow.tests.parquet.common import _roundtrip_pandas_dataframe
 except ImportError:
-    pd = tm = None
+    pass
 
 
 # Marks all of the tests in this module
@@ -56,7 +56,7 @@ def test_pandas_parquet_datetime_tz():
     # coerce to [ns] due to lack of non-[ns] support.
     s = pd.Series([datetime.datetime(2017, 9, 6)], dtype='datetime64[us]')
     s = s.dt.tz_localize('utc')
-    s.index = s
+    s.index = s  # type: ignore[assignment]
 
     # Both a column and an index to hit both use cases
     df = pd.DataFrame({'tz_aware': s,
@@ -287,7 +287,8 @@ def test_coerce_int96_timestamp_unit(unit):
 
     # For either Parquet version, coercing to nanoseconds is allowed
     # if Int96 storage is used
-    expected = pa.Table.from_arrays([arrays.get(unit)]*4, names)
+    array_for_unit = arrays.get(unit, a_ns)
+    expected = pa.Table.from_arrays([array_for_unit] * 4, names)
     read_table_kwargs = {"coerce_int96_timestamp_unit": unit}
     _check_roundtrip(table, expected,
                      read_table_kwargs=read_table_kwargs,
@@ -323,6 +324,7 @@ def get_table(pq_reader_method, filename, **kwargs):
     # with the default resolution of ns, we get wrong values for INT96
     # that are out of bounds for nanosecond range
     tab_error = get_table(pq_reader_method, filename)
+    assert tab_error is not None
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore",
                                 "Discarding nonzero nanoseconds in conversion",
@@ -333,6 +335,7 @@ def get_table(pq_reader_method, filename, **kwargs):
     tab_correct = get_table(
         pq_reader_method, filename, coerce_int96_timestamp_unit="s"
     )
+    assert tab_correct is not None
     df_correct = tab_correct.to_pandas(timestamp_as_object=True)
     df["a"] = df["a"].astype(object)
     tm.assert_frame_equal(df, df_correct)
diff --git a/python/pyarrow/tests/parquet/test_encryption.py b/python/pyarrow/tests/parquet/test_encryption.py
index 4e2fb069bd0..82b934edf77 100644
--- a/python/pyarrow/tests/parquet/test_encryption.py
+++ b/python/pyarrow/tests/parquet/test_encryption.py
@@ -21,8 +21,7 @@
     import pyarrow.parquet as pq
     import pyarrow.parquet.encryption as pe
 except ImportError:
-    pq = None
-    pe = None
+    pass
 else:
     from pyarrow.tests.parquet.encryption import (InMemoryKmsClient,
                                                   MockVersioningKmsClient,
@@ -131,7 +130,7 @@ def test_encrypted_parquet_write_read(tempdir, data_table):
         encryption_algorithm="AES_GCM_V1",
         cache_lifetime=timedelta(minutes=5.0),
         data_key_length_bits=256)
-    assert encryption_config.uniform_encryption is False
+    assert encryption_config.uniform_encryption is False  # type: ignore[attr-defined]
 
     kms_connection_config, crypto_factory = write_encrypted_file(
         path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY,
@@ -154,11 +153,11 @@ def test_uniform_encrypted_parquet_write_read(tempdir, data_table):
     # Encrypt the footer and all columns with the footer key,
     encryption_config = pe.EncryptionConfiguration(
         footer_key=FOOTER_KEY_NAME,
-        uniform_encryption=True,
+        uniform_encryption=True,  # type: ignore[call-arg]
         encryption_algorithm="AES_GCM_V1",
         cache_lifetime=timedelta(minutes=5.0),
         data_key_length_bits=256)
-    assert encryption_config.uniform_encryption is True
+    assert encryption_config.uniform_encryption is True  # type: ignore[attr-defined]
 
     kms_connection_config, crypto_factory = write_encrypted_file(
         path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, b"",
@@ -303,7 +302,7 @@ def test_encrypted_parquet_write_col_key_and_uniform_encryption(tempdir, data_ta
         column_keys={
             COL_KEY_NAME: ["a", "b"],
         },
-        uniform_encryption=True)
+        uniform_encryption=True)  # type: ignore[call-arg]
 
     with pytest.raises(OSError,
                        match=r"Cannot set both column_keys and uniform_encryption"):
@@ -415,7 +414,7 @@ def unwrap_key(self, wrapped_key, master_key_identifier):
     def kms_factory(kms_connection_configuration):
         return WrongTypeKmsClient(kms_connection_configuration)
 
-    crypto_factory = pe.CryptoFactory(kms_factory)
+    crypto_factory = pe.CryptoFactory(kms_factory)  # type: ignore[arg-type]
     with pytest.raises(TypeError):
         # Write with encryption properties
         write_encrypted_parquet(path, data_table, encryption_config,
@@ -554,7 +553,7 @@ def test_encrypted_parquet_write_read_external(tempdir, data_table,
     result_table = read_encrypted_parquet(
         path, decryption_config, kms_connection_config, crypto_factory,
         internal_key_material=False)
-    store = pa._parquet_encryption.FileSystemKeyMaterialStore.for_file(path)
+    store = pe.FileSystemKeyMaterialStore.for_file(path)
 
     assert len(key_ids := store.get_key_id_set()) == (
         len(external_encryption_config.column_keys[COL_KEY_NAME]) + 1)
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 148bfebaa67..646873b3d4f 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -19,11 +19,7 @@
 import decimal
 from collections import OrderedDict
 import io
-
-try:
-    import numpy as np
-except ImportError:
-    np = None
+from typing import TYPE_CHECKING
 import pytest
 
 import pyarrow as pa
@@ -31,20 +27,25 @@
 from pyarrow.fs import LocalFileSystem
 from pyarrow.tests import util
 
-try:
-    import pyarrow.parquet as pq
-    from pyarrow.tests.parquet.common import _write_table
-except ImportError:
-    pq = None
-
-
-try:
+if TYPE_CHECKING:
+    import numpy as np
     import pandas as pd
-    import pandas.testing as tm
-
-    from pyarrow.tests.parquet.common import alltypes_sample
-except ImportError:
-    pd = tm = None
+    import pyarrow.parquet as pq
+    from pyarrow.tests.parquet.common import alltypes_sample, _write_table
+else:
+    try:
+        import pyarrow.parquet as pq
+        from pyarrow.tests.parquet.common import _write_table, alltypes_sample
+    except ImportError:
+        pass
+    try:
+        import pandas as pd
+    except ImportError:
+        pass
+    try:
+        import numpy as np
+    except ImportError:
+        pass
 
 
 # Marks all of the tests in this module
@@ -56,7 +57,7 @@
 def test_parquet_metadata_api():
     df = alltypes_sample(size=10000)
     df = df.reindex(columns=sorted(df.columns))
-    df.index = np.random.randint(0, 1000000, size=len(df))
+    df.index = np.random.randint(0, 1000000, size=len(df))  # type: ignore[assignment]
 
     fileh = make_sample_file(df)
     ncols = len(df.columns)
@@ -80,15 +81,15 @@ def test_parquet_metadata_api():
 
     col = schema[0]
     repr(col)
-    assert col.name == df.columns[0]
-    assert col.max_definition_level == 1
-    assert col.max_repetition_level == 0
-    assert col.max_repetition_level == 0
-    assert col.physical_type == 'BOOLEAN'
-    assert col.converted_type == 'NONE'
+    assert col.name == df.columns[0]  # type: ignore[attr-defined]
+    assert col.max_definition_level == 1  # type: ignore[attr-defined]
+    assert col.max_repetition_level == 0  # type: ignore[attr-defined]
+    assert col.max_repetition_level == 0  # type: ignore[attr-defined]
+    assert col.physical_type == 'BOOLEAN'  # type: ignore[attr-defined]
+    assert col.converted_type == 'NONE'  # type: ignore[attr-defined]
 
     col_float16 = schema[5]
-    assert col_float16.logical_type.type == 'FLOAT16'
+    assert col_float16.logical_type.type == 'FLOAT16'  # type: ignore[attr-defined]
 
     with pytest.raises(IndexError):
         schema[ncols + 1]  # +1 for index
@@ -210,15 +211,16 @@ def test_parquet_column_statistics_api(data, type, physical_type, min_value,
     col_meta = rg_meta.column(0)
 
     stat = col_meta.statistics
-    assert stat.has_min_max
-    assert _close(type, stat.min, min_value)
-    assert _close(type, stat.max, max_value)
-    assert stat.null_count == null_count
-    assert stat.num_values == num_values
+    assert stat is not None
+    assert stat.has_min_max  # type: ignore[attr-defined]
+    assert _close(type, stat.min, min_value)  # type: ignore[attr-defined]
+    assert _close(type, stat.max, max_value)  # type: ignore[attr-defined]
+    assert stat.null_count == null_count  # type: ignore[attr-defined]
+    assert stat.num_values == num_values  # type: ignore[attr-defined]
     # TODO(kszucs) until parquet-cpp API doesn't expose HasDistinctCount
     # method, missing distinct_count is represented as zero instead of None
-    assert stat.distinct_count == distinct_count
-    assert stat.physical_type == physical_type
+    assert stat.distinct_count == distinct_count  # type: ignore[attr-defined]
+    assert stat.physical_type == physical_type  # type: ignore[attr-defined]
 
 
 def _close(type, left, right):
@@ -236,8 +238,10 @@ def test_parquet_raise_on_unset_statistics():
     df = pd.DataFrame({"t": pd.Series([pd.NaT], dtype="datetime64[ns]")})
     meta = make_sample_file(pa.Table.from_pandas(df)).metadata
 
-    assert not meta.row_group(0).column(0).statistics.has_min_max
-    assert meta.row_group(0).column(0).statistics.max is None
+    stat = meta.row_group(0).column(0).statistics
+    assert stat is not None
+    assert not stat.has_min_max
+    assert stat.max is None
 
 
 def test_statistics_convert_logical_types(tempdir):
@@ -271,8 +275,9 @@ def test_statistics_convert_logical_types(tempdir):
         pq.write_table(t, path, version='2.6')
         pf = pq.ParquetFile(path)
         stats = pf.metadata.row_group(0).column(0).statistics
-        assert stats.min == min_val
-        assert stats.max == max_val
+        assert stats is not None
+        assert stats.min == min_val  # type: ignore[attr-defined]
+        assert stats.max == max_val  # type: ignore[attr-defined]
 
 
 def test_parquet_write_disable_statistics(tempdir):
@@ -429,29 +434,36 @@ def test_field_id_metadata():
     pf = pq.ParquetFile(pa.BufferReader(contents))
     schema = pf.schema_arrow
 
-    assert schema[0].metadata[field_id] == b'1'
-    assert schema[0].metadata[b'other'] == b'abc'
+    assert schema[0].metadata is not None
+    assert schema[0].metadata[field_id] == b'1'  # type: ignore[index]
+    assert schema[0].metadata[b'other'] == b'abc'  # type: ignore[index]
 
     list_field = schema[1]
-    assert list_field.metadata[field_id] == b'11'
+    assert list_field.metadata is not None
+    assert list_field.metadata[field_id] == b'11'  # type: ignore[index]
 
     list_item_field = list_field.type.value_field
-    assert list_item_field.metadata[field_id] == b'10'
+    assert list_item_field.metadata is not None
+    assert list_item_field.metadata[field_id] == b'10'  # type: ignore[index]
 
     struct_field = schema[2]
-    assert struct_field.metadata[field_id] == b'102'
+    assert struct_field.metadata is not None
+    assert struct_field.metadata[field_id] == b'102'  # type: ignore[index]
 
     struct_middle_field = struct_field.type[0]
-    assert struct_middle_field.metadata[field_id] == b'101'
+    assert struct_middle_field.metadata is not None
+    assert struct_middle_field.metadata[field_id] == b'101'  # type: ignore[index]
 
     struct_inner_field = struct_middle_field.type[0]
-    assert struct_inner_field.metadata[field_id] == b'100'
+    assert struct_inner_field.metadata is not None
+    assert struct_inner_field.metadata[field_id] == b'100'  # type: ignore[index]
 
     assert schema[3].metadata is None
     # Invalid input is passed through (ok) but does not
     # have field_id in parquet (not tested)
-    assert schema[4].metadata[field_id] == b'xyz'
-    assert schema[5].metadata[field_id] == b'-1000'
+    assert schema[4].metadata is not None
+    assert schema[4].metadata[field_id] == b'xyz'  # type: ignore[index]
+    assert schema[5].metadata[field_id] == b'-1000'  # type: ignore[index]
 
 
 def test_parquet_file_page_index():
@@ -495,13 +507,14 @@ def test_multi_dataset_metadata(tempdir):
             _meta.append_row_groups(meta[0])
 
     # Write merged metadata-only file
+    assert _meta is not None
     with open(metapath, "wb") as f:
-        _meta.write_metadata_file(f)
+        _meta.write_metadata_file(f)  # type: ignore[union-attr]
 
     # Read back the metadata
     meta = pq.read_metadata(metapath)
     md = meta.to_dict()
-    _md = _meta.to_dict()
+    _md = _meta.to_dict()  # type: ignore[union-attr]
     for key in _md:
         if key != 'serialized_size':
             assert _md[key] == md[key]
@@ -695,13 +708,14 @@ def test_metadata_schema_filesystem(tempdir):
     assert pq.read_metadata(
         file_path, filesystem=LocalFileSystem()).equals(metadata)
     assert pq.read_metadata(
+        # type: ignore[arg-type]
         fname, filesystem=f'file:///{tempdir}').equals(metadata)
 
     assert pq.read_schema(file_uri).equals(schema)
     assert pq.read_schema(
         file_path, filesystem=LocalFileSystem()).equals(schema)
     assert pq.read_schema(
-        fname, filesystem=f'file:///{tempdir}').equals(schema)
+        fname, filesystem=f'file:///{tempdir}').equals(schema)  # type: ignore[arg-type]
 
     with util.change_cwd(tempdir):
         # Pass `filesystem` arg
@@ -721,7 +735,7 @@ def test_metadata_equals():
     original_metadata = pq.read_metadata(pa.BufferReader(buf))
     match = "Argument 'other' has incorrect type"
     with pytest.raises(TypeError, match=match):
-        original_metadata.equals(None)
+        original_metadata.equals(None)  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize("t1,t2,expected_error", (
@@ -810,7 +824,7 @@ def msg(c):
         pq.ColumnChunkMetaData()
 
     with pytest.raises(TypeError, match=msg("RowGroupMetaData")):
-        pq.RowGroupMetaData()
+        pq.RowGroupMetaData()  # type: ignore[call-arg]
 
     with pytest.raises(TypeError, match=msg("FileMetaData")):
-        pq.FileMetaData()
+        pq.FileMetaData()  # type: ignore[call-arg]
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 53864ff15ea..91ae2385734 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -17,11 +17,12 @@
 
 import io
 import json
+from typing import TYPE_CHECKING, cast
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pytest
 
 import pyarrow as pa
@@ -29,22 +30,29 @@
 from pyarrow.util import guid
 from pyarrow.vendored.version import Version
 
-try:
-    import pyarrow.parquet as pq
-    from pyarrow.tests.parquet.common import (_read_table, _test_dataframe,
-                                              _write_table)
-except ImportError:
-    pq = None
-
-
-try:
+if TYPE_CHECKING:
     import pandas as pd
     import pandas.testing as tm
+    import pyarrow.parquet as pq
+    from pyarrow.tests.parquet.common import (
+        _read_table, _roundtrip_pandas_dataframe, _test_dataframe,
+        _write_table, alltypes_sample
+    )
+else:
+    try:
+        import pyarrow.parquet as pq
+        from pyarrow.tests.parquet.common import (
+            _read_table, _test_dataframe, _write_table, alltypes_sample,
+            _roundtrip_pandas_dataframe
+        )
 
-    from pyarrow.tests.parquet.common import (_roundtrip_pandas_dataframe,
-                                              alltypes_sample)
-except ImportError:
-    pd = tm = None
+    except ImportError:
+        pass
+    try:
+        import pandas as pd
+        import pandas.testing as tm
+    except ImportError:
+        pass
 
 
 # Marks all of the tests in this module
@@ -58,11 +66,14 @@ def test_pandas_parquet_custom_metadata(tempdir):
 
     filename = tempdir / 'pandas_roundtrip.parquet'
     arrow_table = pa.Table.from_pandas(df)
+    assert arrow_table.schema.metadata is not None
     assert b'pandas' in arrow_table.schema.metadata
 
     _write_table(arrow_table, filename)
 
-    metadata = pq.read_metadata(filename).metadata
+    file_metadata = pq.read_metadata(filename)
+    metadata = file_metadata.metadata
+    assert metadata is not None
     assert b'pandas' in metadata
 
     js = json.loads(metadata[b'pandas'].decode('utf8'))
@@ -117,10 +128,13 @@ def test_attributes_metadata_persistence(tempdir):
     }
 
     table = pa.Table.from_pandas(df)
+    assert table.schema.metadata is not None
     assert b'attributes' in table.schema.metadata[b'pandas']
 
     _write_table(table, filename)
-    metadata = pq.read_metadata(filename).metadata
+    file_metadata = pq.read_metadata(filename)
+    metadata = file_metadata.metadata
+    assert metadata is not None
     js = json.loads(metadata[b'pandas'].decode('utf8'))
     assert 'attributes' in js
     assert js['attributes'] == df.attrs
@@ -297,8 +311,8 @@ def test_pandas_parquet_configuration_options(tempdir):
 @pytest.mark.pandas
 def test_spark_flavor_preserves_pandas_metadata():
     df = _test_dataframe(size=100)
-    df.index = np.arange(0, 10 * len(df), 10)
-    df.index.name = 'foo'
+    df.index = np.arange(0, 10 * len(df), 10)  # type: ignore[assignment]
+    df.index.name = 'foo'  # type: ignore[attr-defined]
 
     result = _roundtrip_pandas_dataframe(df, {'flavor': 'spark'})
     tm.assert_frame_equal(result, df)
@@ -450,7 +464,9 @@ def test_backwards_compatible_column_metadata_handling(datadir):
     table = _read_table(
         path, columns=['a'])
     result = table.to_pandas()
-    tm.assert_frame_equal(result, expected[['a']].reset_index(drop=True))
+    expected_df = expected[['a']].reset_index(drop=True)
+    assert isinstance(expected_df, pd.DataFrame)
+    tm.assert_frame_equal(result, expected_df)
 
 
 @pytest.mark.pandas
@@ -510,7 +526,7 @@ def test_pandas_categorical_roundtrip():
     codes = np.array([2, 0, 0, 2, 0, -1, 2], dtype='int32')
     categories = ['foo', 'bar', 'baz']
     df = pd.DataFrame({'x': pd.Categorical.from_codes(
-        codes, categories=categories)})
+        codes, categories=categories)})  # type: ignore[arg-type]
 
     buf = pa.BufferOutputStream()
     pq.write_table(pa.table(df), buf)
@@ -555,15 +571,18 @@ def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
         table, str(tempdir / "case1"), partition_cols=['part'],
     )
     result = pq.read_table(str(tempdir / "case1")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]])
+    tm.assert_frame_equal(
+        result[["col"]], df[["col"]])
 
     pq.write_to_dataset(table, str(tempdir / "case2"))
     result = pq.read_table(str(tempdir / "case2")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]])
+    tm.assert_frame_equal(
+        result[["col"]], df[["col"]])
 
     pq.write_table(table, str(tempdir / "data.parquet"))
     result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], df[["col"]])
+    tm.assert_frame_equal(
+        result[["col"]], df[["col"]])
 
 
 @pytest.mark.pandas
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index a62b5c3298c..3c5182dc56e 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -30,15 +30,14 @@
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import _write_table
 except ImportError:
-    pq = None
+    pass
 
 try:
-    import pandas as pd
     import pandas.testing as tm
 
     from pyarrow.tests.parquet.common import alltypes_sample
 except ImportError:
-    pd = tm = None
+    pass
 
 
 # Marks all of the tests in this module
@@ -172,7 +171,7 @@ def test_scan_contents():
     pf = pq.ParquetFile(buf)
 
     assert pf.scan_contents() == 10000
-    assert pf.scan_contents(df.columns[:4]) == 10000
+    assert pf.scan_contents(list(df.columns[:4])) == 10000
 
 
 def test_parquet_file_pass_directory_instead_of_file(tempdir):
@@ -215,7 +214,7 @@ def test_iter_batches_columns_reader(tempdir, batch_size):
                  chunk_size=chunk_size)
 
     file_ = pq.ParquetFile(filename)
-    for columns in [df.columns[:10], df.columns[10:]]:
+    for columns in [list(df.columns[:10]), list(df.columns[10:])]:
         batches = file_.iter_batches(batch_size=batch_size, columns=columns)
         batch_starts = range(0, total_size+batch_size, batch_size)
         for batch, start in zip(batches, batch_starts):
@@ -263,9 +262,10 @@ def get_all_batches(f):
 
         tm.assert_frame_equal(
             batches[batch_no].to_pandas().reset_index(drop=True),
-            file_.read_row_groups([i]).to_pandas().iloc[900:].reset_index(
-                drop=True
-            )
+            file_
+            .read_row_groups([i])
+            .to_pandas().iloc[900:]
+            .reset_index(drop=True)  # type: ignore[arg-type]
         )
 
         batch_no += 1
@@ -346,6 +346,7 @@ def test_read_statistics():
     buf.seek(0)
 
     statistics = pq.ParquetFile(buf).read().columns[0].chunks[0].statistics
+    assert statistics is not None
     assert statistics.is_null_count_exact is True
     assert statistics.null_count == 1
     assert statistics.distinct_count is None
@@ -389,7 +390,8 @@ def test_parquet_file_fsspec_support():
 
 def test_parquet_file_fsspec_support_through_filesystem_argument():
     try:
-        from fsspec.implementations.memory import MemoryFileSystem
+        from fsspec.implementations.memory import (  # type: ignore[import-untyped]
+            MemoryFileSystem)
     except ImportError:
         pytest.skip("fsspec is not installed, skipping test")
 
@@ -412,7 +414,7 @@ def test_parquet_file_hugginface_support():
         pytest.skip("fsspec is not installed, skipping Hugging Face test")
 
     fake_hf_module = types.ModuleType("huggingface_hub")
-    fake_hf_module.HfFileSystem = MemoryFileSystem
+    fake_hf_module.HfFileSystem = MemoryFileSystem  # type: ignore[attr-defined]
     with mock.patch.dict("sys.modules", {"huggingface_hub": fake_hf_module}):
         uri = "hf://datasets/apache/arrow/test.parquet"
         table = pa.table({"a": range(10)})
@@ -424,7 +426,7 @@ def test_parquet_file_hugginface_support():
 def test_fsspec_uri_raises_if_fsspec_is_not_available():
     # sadly cannot patch sys.modules because cython will still be able to import fsspec
     try:
-        import fsspec  # noqa: F401
+        import fsspec  # type: ignore[import-untyped]  # noqa: F401
     except ImportError:
         pass
     else:
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index a49441f09f4..87787a0f3f0 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -23,9 +23,10 @@
 try:
     import pyarrow.parquet as pq
     from pyarrow.tests.parquet.common import (_read_table, _test_dataframe,
+                                              # type: ignore[attr-defined]
                                               _test_table, _range_integers)
 except ImportError:
-    pq = None
+    pass
 
 
 try:
@@ -33,7 +34,7 @@
     import pandas.testing as tm
 
 except ImportError:
-    pd = tm = None
+    pass
 
 
 # Marks all of the tests in this module
@@ -94,10 +95,10 @@ def test_parquet_invalid_writer(tempdir):
     # avoid segfaults with invalid construction
     with pytest.raises(TypeError):
         some_schema = pa.schema([pa.field("x", pa.int32())])
-        pq.ParquetWriter(None, some_schema)
+        pq.ParquetWriter(None, some_schema)  # type: ignore[arg-type]
 
     with pytest.raises(TypeError):
-        pq.ParquetWriter(tempdir / "some_path", None)
+        pq.ParquetWriter(tempdir / "some_path", None)  # type: ignore[arg-type]
 
 
 @pytest.mark.pandas
@@ -335,6 +336,7 @@ def test_parquet_writer_store_schema(tempdir):
         writer.write_table(table)
 
     meta = pq.read_metadata(path1)
+    assert meta.metadata is not None
     assert b'ARROW:schema' in meta.metadata
     assert meta.metadata[b'ARROW:schema']
 
@@ -357,6 +359,7 @@ def test_parquet_writer_append_key_value_metadata(tempdir):
         writer.add_key_value_metadata({'key2': '2', 'key3': '3'})
     reader = pq.ParquetFile(path)
     metadata = reader.metadata.metadata
+    assert metadata is not None
     assert metadata[b'key1'] == b'1'
     assert metadata[b'key2'] == b'2'
     assert metadata[b'key3'] == b'3'
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 8319c9ce3e4..434d5efc7d4 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -17,31 +17,32 @@
 
 import datetime
 import sys
+from typing import Any
 
-import pytest
-import hypothesis as h
-import hypothesis.strategies as st
+import pytest  # type: ignore[import-not-found]
+import hypothesis as h  # type: ignore[import-not-found]
+import hypothesis.strategies as st  # type: ignore[import-not-found]
 try:
-    import hypothesis.extra.numpy as npst
+    import hypothesis.extra.numpy as npst  # type: ignore[import-not-found]
 except ImportError:
-    npst = None
+    npst = None  # type: ignore[assignment]
 try:
-    import hypothesis.extra.pytz as tzst
+    import hypothesis.extra.pytz as tzst  # type: ignore[import-not-found]
 except ImportError:
-    tzst = None
+    tzst = None  # type: ignore[assignment]
 try:
     import zoneinfo
 except ImportError:
-    zoneinfo = None
+    zoneinfo = None  # type: ignore[assignment]
 if sys.platform == 'win32':
     try:
-        import tzdata  # noqa:F401
+        import tzdata  # type: ignore[import-not-found, import-untyped]  # noqa:F401
     except ImportError:
-        zoneinfo = None
+        zoneinfo = None  # type: ignore[assignment]
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 
@@ -134,12 +135,12 @@
     timezones = st.one_of(st.none(), st.timezones())
 else:
     timezones = st.none()
-timestamp_types = st.builds(
+timestamp_types: Any = st.builds(
     pa.timestamp,
     unit=st.sampled_from(['s', 'ms', 'us', 'ns']),
     tz=timezones
 )
-duration_types = st.builds(
+duration_types: Any = st.builds(
     pa.duration,
     st.sampled_from(['s', 'ms', 'us', 'ns'])
 )
@@ -234,13 +235,13 @@ def schemas(type_strategy=primitive_types, max_fields=None):
 
 all_types = st.deferred(
     lambda: (
-        primitive_types |
-        list_types() |
-        struct_types() |
-        dictionary_types() |
-        map_types() |
-        list_types(all_types) |
-        struct_types(all_types)
+        primitive_types
+        | list_types()
+        | struct_types()
+        | dictionary_types()
+        | map_types()
+        | list_types(all_types)  # type: ignore[has-type]
+        | struct_types(all_types)  # type: ignore[has-type]
     )
 )
 all_fields = fields(all_types)
@@ -280,6 +281,7 @@ def arrays(draw, type, size=None, nullable=True):
     elif not isinstance(size, int):
         raise TypeError('Size must be an integer')
 
+    assert npst is not None
     if pa.types.is_null(ty):
         h.assume(nullable)
         value = st.none()
@@ -292,6 +294,7 @@ def arrays(draw, type, size=None, nullable=True):
         values = draw(npst.arrays(ty.to_pandas_dtype(), shape=(size,)))
         # Workaround ARROW-4952: no easy way to assert array equality
         # in a NaN-tolerant way.
+        assert np is not None
         values[np.isnan(values)] = -42.0
         return pa.array(values, type=ty)
     elif pa.types.is_decimal(ty):
@@ -317,9 +320,11 @@ def arrays(draw, type, size=None, nullable=True):
             offset = ty.tz.split(":")
             offset_hours = int(offset[0])
             offset_min = int(offset[1])
-            tz = datetime.timedelta(hours=offset_hours, minutes=offset_min)
+            tz = datetime.timezone(
+                datetime.timedelta(hours=offset_hours, minutes=offset_min)
+            )
         except ValueError:
-            tz = zoneinfo.ZoneInfo(ty.tz)
+            tz = zoneinfo.ZoneInfo(str(ty.tz))
         value = st.datetimes(timezones=st.just(tz), min_value=min_datetime,
                              max_value=max_datetime)
     elif pa.types.is_duration(ty):
@@ -478,7 +483,9 @@ def pandas_compatible_list_types(
         dictionary_types(
             value_strategy=pandas_compatible_dictionary_value_types
         ),
-        pandas_compatible_list_types(pandas_compatible_types),
-        struct_types(pandas_compatible_types)
+        pandas_compatible_list_types(
+            pandas_compatible_types  # type: ignore[has-type]
+        ),
+        struct_types(pandas_compatible_types)  # type: ignore[has-type]
     )
 )
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
index cb97e3849fd..1285534d08a 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import pytest
+from typing import Literal, cast
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -37,9 +38,10 @@
 
 try:
     import pyarrow.dataset as ds
-    from pyarrow.acero import ScanNodeOptions
+    from pyarrow._dataset import ScanNodeOptions
 except ImportError:
-    ds = None
+    ds = None  # type: ignore[assignment]
+    ScanNodeOptions = None  # type: ignore[assignment, misc]
 
 pytestmark = pytest.mark.acero
 
@@ -53,7 +55,6 @@ def table_source():
 
 
 def test_declaration():
-
     table = pa.table({'a': [1, 2, 3], 'b': [4, 5, 6]})
     table_opts = TableSourceNodeOptions(table)
     filter_opts = FilterNodeOptions(field('a') > 1)
@@ -89,7 +90,8 @@ def test_declaration_to_reader(table_source):
 
 def test_table_source():
     with pytest.raises(TypeError):
-        TableSourceNodeOptions(pa.record_batch([pa.array([1, 2, 3])], ["a"]))
+        TableSourceNodeOptions(pa.record_batch(
+            [pa.array([1, 2, 3])], ["a"]))
 
     table_source = TableSourceNodeOptions(None)
     decl = Declaration("table_source", table_source)
@@ -110,9 +112,9 @@ def test_filter(table_source):
 
     # requires a pyarrow Expression
     with pytest.raises(TypeError):
-        FilterNodeOptions(pa.array([True, False, True]))
+        FilterNodeOptions(pa.array([True, False, True]))  # type: ignore[arg-type]
     with pytest.raises(TypeError):
-        FilterNodeOptions(None)
+        FilterNodeOptions(None)  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize('source', [
@@ -267,19 +269,23 @@ def test_order_by():
     table = pa.table({'a': [1, 2, 3, 4], 'b': [1, 3, None, 2]})
     table_source = Declaration("table_source", TableSourceNodeOptions(table))
 
-    ord_opts = OrderByNodeOptions([("b", "ascending")])
+    sort_keys = [("b", "ascending")]
+    sort_keys = cast(list[tuple[str, Literal["ascending", "descending"]]], sort_keys)
+    ord_opts = OrderByNodeOptions(sort_keys)
     decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
     result = decl.to_table()
     expected = pa.table({"a": [1, 4, 2, 3], "b": [1, 2, 3, None]})
     assert result.equals(expected)
 
-    ord_opts = OrderByNodeOptions([(field("b"), "descending")])
+    ord_opts = OrderByNodeOptions(
+        [(field("b"), "descending")])  # type: ignore[arg-type]
     decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
     result = decl.to_table()
     expected = pa.table({"a": [2, 4, 1, 3], "b": [3, 2, 1, None]})
     assert result.equals(expected)
 
-    ord_opts = OrderByNodeOptions([(1, "descending")], null_placement="at_start")
+    ord_opts = OrderByNodeOptions(
+        [(1, "descending")], null_placement="at_start")  # type: ignore[arg-type]
     decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
     result = decl.to_table()
     expected = pa.table({"a": [3, 2, 4, 1], "b": [None, 3, 2, 1]})
@@ -294,10 +300,12 @@ def test_order_by():
         _ = decl.to_table()
 
     with pytest.raises(ValueError, match="\"decreasing\" is not a valid sort order"):
-        _ = OrderByNodeOptions([("b", "decreasing")])
+        _ = OrderByNodeOptions([("b", "decreasing")])  # type: ignore[arg-type]
 
     with pytest.raises(ValueError, match="\"start\" is not a valid null placement"):
-        _ = OrderByNodeOptions([("b", "ascending")], null_placement="start")
+        _ = OrderByNodeOptions(
+            [("b", "ascending")], null_placement="start"  # type: ignore[arg-type]
+        )
 
 
 def test_hash_join():
@@ -382,7 +390,9 @@ def test_hash_join_with_residual_filter():
     # test filter expression referencing columns from both side
     join_opts = HashJoinNodeOptions(
         "left outer", left_keys="key", right_keys="key",
-        filter_expression=pc.equal(pc.field("a"), 5) | pc.equal(pc.field("b"), 10)
+        filter_expression=(
+            pc.equal(pc.field("a"), 5)
+            | pc.equal(pc.field("b"), 10))  # type: ignore[reportOperatorIssue]
     )
     joined = Declaration(
         "hashjoin", options=join_opts, inputs=[left_source, right_source])
@@ -462,6 +472,8 @@ def test_asof_join():
 
 @pytest.mark.dataset
 def test_scan(tempdir):
+    assert ds is not None
+    assert ScanNodeOptions is not None
     table = pa.table({'a': [1, 2, 3], 'b': [4, 5, 6]})
     ds.write_dataset(table, tempdir / "dataset", format="parquet")
     dataset = ds.dataset(tempdir / "dataset", format="parquet")
@@ -486,11 +498,10 @@ def test_scan(tempdir):
     assert decl.to_table().num_rows == 0
 
     # projection scan option
-
     scan_opts = ScanNodeOptions(dataset, columns={"a2": pc.multiply(field("a"), 2)})
     decl = Declaration("scan", scan_opts)
     result = decl.to_table()
     # "a" is included in the result (needed later on for the actual projection)
     assert result["a"].to_pylist() == [1, 2, 3]
     # "b" is still included, but without data as it will be removed by the projection
-    assert pc.all(result["b"].is_null()).as_py()
+    assert pc.all(result.column("b").is_null()).as_py()
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
index 76a766984da..9f61bc7ddfe 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -20,7 +20,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index ec361159c5f..969adcb87b0 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -18,19 +18,23 @@
 from collections.abc import Iterable
 import datetime
 import decimal
-import hypothesis as h
-import hypothesis.strategies as st
+import hypothesis as h  # type: ignore[import-not-found]
+import hypothesis.strategies as st  # type: ignore[import-not-found]
 import itertools
-import pytest
+import pytest  # type: ignore[import-not-found]
 import struct
 import subprocess
 import sys
 import weakref
+from typing import TYPE_CHECKING
 
-try:
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -71,7 +75,7 @@ def test_constructor_raises():
     # This could happen by wrong capitalization.
     # ARROW-2638: prevent calling extension class constructors directly
     with pytest.raises(TypeError):
-        pa.Array([1, 2])
+        pa.Array([1, 2])  # type: ignore[reportCallIssue]
 
 
 def test_list_format():
@@ -321,11 +325,11 @@ def test_asarray():
 
     arr = pa.array(range(4))
 
-    # The iterator interface gives back an array of Int64Value's
+    # The iterator interface gives back an array of Int64Type's
     np_arr = np.asarray([_ for _ in arr])
     assert np_arr.tolist() == [0, 1, 2, 3]
     assert np_arr.dtype == np.dtype('O')
-    assert isinstance(np_arr[0], pa.lib.Int64Value)
+    assert isinstance(np_arr[0], pa.lib.Int64Type)
 
     # Calling with the arrow array gives back an array with 'int64' dtype
     np_arr = np.asarray(arr)
@@ -649,8 +653,8 @@ def test_array_eq():
 
 @pytest.mark.numpy
 def test_array_from_buffers():
-    values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
-    nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
+    values_buf = pa.py_buffer(np.array([4, 5, 6, 7], dtype=np.int16()))
+    nulls_buf = pa.py_buffer(np.array([0b00001101], dtype=np.uint8()))
     arr = pa.Array.from_buffers(pa.int16(), 4, [nulls_buf, values_buf])
     assert arr.type == pa.int16()
     assert arr.to_pylist() == [4, None, 6, 7]
@@ -665,7 +669,9 @@ def test_array_from_buffers():
     assert arr.to_pylist() == [None, 6, 7]
 
     with pytest.raises(TypeError):
-        pa.Array.from_buffers(pa.int16(), 3, ['', ''], offset=1)
+        pa.Array.from_buffers(
+            pa.int16(), 3, ['', ''], offset=1  # type: ignore[reportArgumentType]
+        )
 
 
 def test_string_binary_from_buffers():
@@ -859,7 +865,8 @@ def test_struct_array_from_chunked():
     chunked_arr = pa.chunked_array([[1, 2, 3], [4, 5, 6]])
 
     with pytest.raises(TypeError, match="Expected Array"):
-        pa.StructArray.from_arrays([chunked_arr], ["foo"])
+        pa.StructArray.from_arrays(
+            [chunked_arr], ["foo"])  # type: ignore[reportArgumentType]
 
 
 @pytest.mark.parametrize("offset", (0, 1))
@@ -1179,24 +1186,24 @@ def test_map_from_arrays():
     keys = pa.array(pykeys, type='binary')
     items = pa.array(pyitems, type='i4')
 
-    result = pa.MapArray.from_arrays(offsets, keys, items)
+    result = pa.MapArray.from_arrays(offsets, keys, items)  # type: ignore[arg-type]
     expected = pa.array(pyentries, type=pa.map_(pa.binary(), pa.int32()))
 
     assert result.equals(expected)
 
     # pass in the type explicitly
-    result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
-        keys.type,
-        items.type
-    ))
+    result = pa.MapArray.from_arrays(offsets, keys, items,  # type: ignore[arg-type]
+                                     pa.map_(keys.type, items.type))
     assert result.equals(expected)
 
     # pass in invalid types
     with pytest.raises(pa.ArrowTypeError, match='Expected map type, got string'):
-        pa.MapArray.from_arrays(offsets, keys, items, pa.string())
+        pa.MapArray.from_arrays(
+            offsets, keys, items, pa.string()  # type: ignore[arg-type]
+        )
 
     with pytest.raises(pa.ArrowTypeError, match='Mismatching map items type'):
-        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(  # type: ignore[arg-type]
             keys.type,
             # Larger than the original i4
             pa.int64()
@@ -1234,7 +1241,7 @@ def test_map_from_arrays():
     # error if null bitmap and offsets with nulls passed
     msg1 = 'Ambiguous to specify both validity map and offsets with nulls'
     with pytest.raises(pa.ArrowInvalid, match=msg1):
-        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(  # type: ignore[arg-type]
             keys.type,
             items.type),
             mask=pa.array([False, True, False], type=pa.bool_())
@@ -2642,7 +2649,7 @@ def test_interval_array_from_relativedelta():
     assert arr.type == pa.month_day_nano_interval()
     expected_list = [
         None,
-        pa.MonthDayNano([13, 8,
+        pa.MonthDayNano([13, 8,  # type: ignore[arg-type]
                          (datetime.timedelta(seconds=1, microseconds=1,
                                              minutes=1, hours=1) //
                           datetime.timedelta(microseconds=1)) * 1000])]
@@ -2675,7 +2682,7 @@ def test_interval_array_from_tuple():
     assert arr.type == pa.month_day_nano_interval()
     expected_list = [
         None,
-        pa.MonthDayNano([1, 2, -3])]
+        pa.MonthDayNano([1, 2, -3])]  # type: ignore[arg-type]
     expected = pa.array(expected_list)
     assert arr.equals(expected)
     assert arr.to_pylist() == expected_list
@@ -2696,8 +2703,8 @@ def test_interval_array_from_dateoffset():
     assert arr.type == pa.month_day_nano_interval()
     expected_list = [
         None,
-        pa.MonthDayNano([13, 8, 3661000001001]),
-        pa.MonthDayNano([0, 0, 0])]
+        pa.MonthDayNano([13, 8, 3661000001001]),  # type: ignore[arg-type]
+        pa.MonthDayNano([0, 0, 0])]  # type: ignore[arg-type]
     expected = pa.array(expected_list)
     assert arr.equals(expected)
     expected_from_pandas = [
@@ -2861,7 +2868,7 @@ def test_buffers_primitive():
     # Slicing does not affect the buffers but the offset
     a_sliced = a[1:]
     buffers = a_sliced.buffers()
-    a_sliced.offset == 1
+    assert a_sliced.offset == 1
     assert len(buffers) == 2
     null_bitmap = buffers[0].to_pybytes()
     assert 1 <= len(null_bitmap) <= 64  # XXX this is varying
@@ -2869,7 +2876,7 @@ def test_buffers_primitive():
 
     assert struct.unpack('hhxxh', buffers[1].to_pybytes()) == (1, 2, 4)
 
-    a = pa.array(np.int8([4, 5, 6]))
+    a = pa.array(np.array([4, 5, 6], dtype=np.int8))
     buffers = a.buffers()
     assert len(buffers) == 2
     # No null bitmap from Numpy int array
@@ -2955,7 +2962,7 @@ def test_nbytes_size():
 def test_invalid_tensor_constructor_repr():
     # ARROW-2638: prevent calling extension class constructors directly
     with pytest.raises(TypeError):
-        repr(pa.Tensor([1]))
+        repr(pa.Tensor([1]))  # type: ignore[reportCallIssue]
 
 
 def test_invalid_tensor_construction():
@@ -3473,7 +3480,7 @@ def test_array_supported_masks():
 
     with pytest.raises(pa.ArrowTypeError):
         arr = pa.array([4, None, 4, 3],
-                       mask=[1.0, 2.0, 3.0, 4.0])
+                       mask=[1.0, 2.0, 3.0, 4.0])  # type: ignore[reportArgumentType]
 
     with pytest.raises(pa.ArrowTypeError):
         arr = pa.array([4, None, 4, 3],
@@ -3760,11 +3767,11 @@ def test_concat_array_invalid_type():
     # ARROW-9920 - do not segfault on non-array input
 
     with pytest.raises(TypeError, match="should contain Array objects"):
-        pa.concat_arrays([None])
+        pa.concat_arrays([None])  # type: ignore[reportArgumentType]
 
     arr = pa.chunked_array([[0, 1], [3, 4]])
     with pytest.raises(TypeError, match="should contain Array objects"):
-        pa.concat_arrays(arr)
+        pa.concat_arrays(arr)  # type: ignore[reportArgumentType]
 
 
 @pytest.mark.pandas
@@ -4293,7 +4300,7 @@ def test_non_cpu_array():
     with pytest.raises(NotImplementedError):
         [i for i in iter(arr)]
     with pytest.raises(NotImplementedError):
-        arr == arr2
+        _ = arr == arr2
     with pytest.raises(NotImplementedError):
         arr.is_null()
     with pytest.raises(NotImplementedError):
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index 481c387d533..f8abec90269 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -24,7 +24,7 @@
 try:
     from pyarrow.cffi import ffi
 except ImportError:
-    ffi = None
+    pass
 
 import pytest
 
@@ -32,7 +32,7 @@
     import pandas as pd
     import pandas.testing as tm
 except ImportError:
-    pd = tm = None
+    pd = None  # type: ignore[assignment]
 
 
 needs_cffi = pytest.mark.skipif(ffi is None,
@@ -148,7 +148,7 @@ def test_export_import_type():
     # Invalid format string
     pa.int32()._export_to_c(ptr_schema)
     bad_format = ffi.new("char[]", b"zzz")
-    c_schema.format = bad_format
+    c_schema.format = bad_format  # type: ignore[attr-defined]
     with pytest.raises(ValueError,
                        match="Invalid or unsupported format string"):
         pa.DataType._import_from_c(ptr_schema)
@@ -248,9 +248,9 @@ def test_export_import_device_array():
     arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
     arr._export_to_c_device(ptr_array)
 
-    assert c_array.device_type == 1  # ARROW_DEVICE_CPU 1
-    assert c_array.device_id == -1
-    assert c_array.array.length == 2
+    assert c_array.device_type == 1  # type: ignore[attr-defined]  # ARROW_DEVICE_CPU 1
+    assert c_array.device_id == -1  # type: ignore[attr-defined]
+    assert c_array.array.length == 2  # type: ignore[attr-defined]
 
 
 def check_export_import_schema(schema_factory, expected_schema_factory=None):
@@ -310,9 +310,10 @@ def test_export_import_schema_float_pointer():
 
     match = "Passing a pointer value as a float is unsafe"
     with pytest.warns(UserWarning, match=match):
-        make_schema()._export_to_c(float(ptr_schema))
+        make_schema()._export_to_c(float(ptr_schema))  # type: ignore[arg-type]
     with pytest.warns(UserWarning, match=match):
-        schema_new = pa.Schema._import_from_c(float(ptr_schema))
+        schema_new = pa.Schema._import_from_c(
+            float(ptr_schema))  # type: ignore[arg-type]
     assert schema_new == make_schema()
 
 
@@ -405,9 +406,9 @@ def test_export_import_device_batch():
     ptr_array = int(ffi.cast("uintptr_t", c_array))
     batch = make_batch()
     batch._export_to_c_device(ptr_array)
-    assert c_array.device_type == 1  # ARROW_DEVICE_CPU 1
-    assert c_array.device_id == -1
-    assert c_array.array.length == 2
+    assert c_array.device_type == 1  # type: ignore[attr-defined]  # ARROW_DEVICE_CPU 1
+    assert c_array.device_id == -1  # type: ignore[attr-defined]
+    assert c_array.array.length == 2  # type: ignore[attr-defined]
 
 
 def _export_import_batch_reader(ptr_stream, reader_factory):
@@ -764,7 +765,7 @@ def test_import_device_no_cuda():
 
     # patch the device type of the struct, this results in an invalid ArrowDeviceArray
     # but this is just to test we raise am error before actually importing buffers
-    c_array.device_type = 2  # ARROW_DEVICE_CUDA
+    c_array.device_type = 2  # type: ignore[attr-defined]  # ARROW_DEVICE_CUDA
 
     with pytest.raises(ImportError, match="Trying to import data on a CUDA device"):
         pa.Array._import_from_c_device(ptr_array, arr.type)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index fe810a6dc90..0ea2590a9f5 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -31,12 +31,12 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 try:
     import pandas as pd
 except ImportError:
-    pd = None
+    pass
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -45,7 +45,7 @@
 try:
     import pyarrow.substrait as pas
 except ImportError:
-    pas = None
+    pas = None  # type: ignore[assignment]
 
 exported_functions = [
     func for (name, func) in sorted(pc.__dict__.items())
@@ -329,9 +329,11 @@ def test_function_attributes():
 def test_input_type_conversion():
     # Automatic array conversion from Python
     arr = pc.add([1, 2], [4, None])
+    assert isinstance(arr, pa.Array)
     assert arr.to_pylist() == [5, None]
     # Automatic scalar conversion from Python
     arr = pc.add([1, 2], 4)
+    assert isinstance(arr, pa.Array)
     assert arr.to_pylist() == [5, 6]
     # Other scalar type
     assert pc.equal(["foo", "bar", None],
@@ -779,9 +781,11 @@ def test_min_max():
     assert s.as_py() == {'min': 1, 'max': 6}
     s = pc.min_max(data, options=pc.ScalarAggregateOptions())
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=True))
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions(
+        skip_nulls=True))
     assert s.as_py() == {'min': 1, 'max': 6}
-    s = pc.min_max(data, options=pc.ScalarAggregateOptions(skip_nulls=False))
+    s = pc.min_max(data, options=pc.ScalarAggregateOptions(
+        skip_nulls=False))
     assert s.as_py() == {'min': None, 'max': None}
 
     # Options as dict of kwargs
@@ -799,11 +803,11 @@ def test_min_max():
     # Wrong options type
     options = pc.TakeOptions()
     with pytest.raises(TypeError):
-        s = pc.min_max(data, options=options)
+        s = pc.min_max(data, options=options)  # type: ignore[arg-type]
 
     # Missing argument
     with pytest.raises(TypeError, match="min_max takes 1 positional"):
-        s = pc.min_max()
+        s = pc.min_max()  # type: ignore[call-arg]
 
 
 def test_any():
@@ -844,12 +848,12 @@ def test_all():
     assert pc.all(a, options=options).as_py() is None
 
     a = pa.chunked_array([[True], [True, None]])
-    assert pc.all(a).as_py() is True
-    assert pc.all(a, options=options).as_py() is None
+    assert pc.all(a).as_py() is True  # type: ignore[arg-type]
+    assert pc.all(a, options=options).as_py() is None  # type: ignore[arg-type]
 
     a = pa.chunked_array([[True], [False]])
-    assert pc.all(a).as_py() is False
-    assert pc.all(a, options=options).as_py() is False
+    assert pc.all(a).as_py() is False  # type: ignore[arg-type]
+    assert pc.all(a, options=options).as_py() is False  # type: ignore[arg-type]
 
 
 def test_is_valid():
@@ -858,7 +862,7 @@ def test_is_valid():
     assert pc.is_valid(data).to_pylist() == [True, True, False]
 
     with pytest.raises(TypeError):
-        pc.is_valid(data, options=None)
+        pc.is_valid(data, options=None)  # type: ignore[call-arg]
 
 
 def test_generated_docstrings():
@@ -1037,21 +1041,6 @@ def find_new_unicode_codepoints():
     0x2097, 0x2098, 0x2099, 0x209a, 0x209b, 0x209c,
     0x2c7c, 0x2c7d, 0xa69c, 0xa69d, 0xa770, 0xa7f8,
     0xa7f9, 0xab5c, 0xab5d, 0xab5e, 0xab5f, }
-# utf8proc does not store if a codepoint is numeric
-numeric_info_missing = {
-    0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
-    0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
-    0x4ebf, 0x4ec0, 0x4edf, 0x4ee8, 0x4f0d, 0x4f70,
-    0x5104, 0x5146, 0x5169, 0x516b, 0x516d, 0x5341,
-    0x5343, 0x5344, 0x5345, 0x534c, 0x53c1, 0x53c2,
-    0x53c3, 0x53c4, 0x56db, 0x58f1, 0x58f9, 0x5e7a,
-    0x5efe, 0x5eff, 0x5f0c, 0x5f0d, 0x5f0e, 0x5f10,
-    0x62fe, 0x634c, 0x67d2, 0x6f06, 0x7396, 0x767e,
-    0x8086, 0x842c, 0x8cae, 0x8cb3, 0x8d30, 0x9621,
-    0x9646, 0x964c, 0x9678, 0x96f6, 0xf96b, 0xf973,
-    0xf978, 0xf9b2, 0xf9d1, 0xf9d3, 0xf9fd, 0x10fc5,
-    0x10fc6, 0x10fc7, 0x10fc8, 0x10fc9, 0x10fca,
-    0x10fcb, }
 # utf8proc has no no digit/numeric information
 digit_info_missing = {
     0xb2, 0xb3, 0xb9, 0x1369, 0x136a, 0x136b, 0x136c,
@@ -1070,6 +1059,7 @@ def find_new_unicode_codepoints():
     0x278f, 0x2790, 0x2791, 0x2792, 0x10a40, 0x10a41,
     0x10a42, 0x10a43, 0x10e60, 0x10e61, 0x10e62, 0x10e63,
     0x10e64, 0x10e65, 0x10e66, 0x10e67, 0x10e68, }
+# utf8proc does not store if a codepoint is numeric
 numeric_info_missing = {
     0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
     0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
@@ -1104,7 +1094,7 @@ def test_string_py_compat_boolean(function_name, variant):
     py_name = function_name.replace('_', '')
     ignore = codepoints_ignore.get(function_name, set()) | \
         find_new_unicode_codepoints()
-    for i in range(128 if ascii else 0x11000):
+    for i in range(128 if ascii else 0x11000):  # type: ignore[truthy-function]
         if i in range(0xD800, 0xE000):
             continue  # bug? pyarrow doesn't allow utf16 surrogates
         # the issues we know of, we skip
@@ -1593,10 +1583,10 @@ def test_filter_null_type():
 @pytest.mark.parametrize("typ", ["array", "chunked_array"])
 def test_compare_array(typ):
     if typ == "array":
-        def con(values):
+        def con(values):  # type: ignore[no-redef]
             return pa.array(values)
     else:
-        def con(values):
+        def con(values):  # type: ignore[no-redef]
             return pa.chunked_array([values])
 
     arr1 = con([1, 2, 3, 4, None])
@@ -1624,10 +1614,10 @@ def con(values):
 @pytest.mark.parametrize("typ", ["array", "chunked_array"])
 def test_compare_string_scalar(typ):
     if typ == "array":
-        def con(values):
+        def con(values):  # type: ignore[no-redef]
             return pa.array(values)
     else:
-        def con(values):
+        def con(values):  # type: ignore[no-redef]
             return pa.chunked_array([values])
 
     arr = con(['a', 'b', 'c', None])
@@ -1661,10 +1651,10 @@ def con(values):
 @pytest.mark.parametrize("typ", ["array", "chunked_array"])
 def test_compare_scalar(typ):
     if typ == "array":
-        def con(values):
+        def con(values):  # type: ignore[no-redef]
             return pa.array(values)
     else:
-        def con(values):
+        def con(values):  # type: ignore[no-redef]
             return pa.chunked_array([values])
 
     arr = con([1, 2, 3, None])
@@ -1757,8 +1747,9 @@ def test_round_to_integer(ty):
         "half_to_odd": [3, 3, 4, 5, -3, -3, -4, None],
     }
     for round_mode, expected in rmode_and_expected.items():
-        options = RoundOptions(round_mode=round_mode)
-        result = round(values, options=options)
+        options = RoundOptions(
+            round_mode=round_mode)  # type: ignore[arg-type]
+        result = round(values, options=options)  # type: ignore[arg-type]
         expected_array = pa.array(expected, type=pa.float64())
         assert expected_array.equals(result)
 
@@ -1776,7 +1767,9 @@ def test_round():
     for ndigits, expected in ndigits_and_expected.items():
         options = pc.RoundOptions(ndigits, "half_towards_infinity")
         result = pc.round(values, options=options)
-        np.testing.assert_allclose(result, pa.array(expected), equal_nan=True)
+        assert isinstance(result, pa.Array)
+        np.testing.assert_allclose(
+            result, pa.array(expected), equal_nan=True)
         assert pc.round(values, ndigits,
                         round_mode="half_towards_infinity") == result
         assert pc.round(values, ndigits, "half_towards_infinity") == result
@@ -1796,6 +1789,7 @@ def test_round_to_multiple():
     for multiple, expected in multiple_and_expected.items():
         options = pc.RoundToMultipleOptions(multiple, "half_towards_infinity")
         result = pc.round_to_multiple(values, options=options)
+        assert isinstance(result, pa.Array)
         np.testing.assert_allclose(result, pa.array(expected), equal_nan=True)
         assert pc.round_to_multiple(values, multiple,
                                     "half_towards_infinity") == result
@@ -1803,11 +1797,11 @@ def test_round_to_multiple():
     for multiple in [0, -2, pa.scalar(-10.4)]:
         with pytest.raises(pa.ArrowInvalid,
                            match="Rounding multiple must be positive"):
-            pc.round_to_multiple(values, multiple=multiple)
+            pc.round_to_multiple(values, multiple=multiple)  # type: ignore[arg-type]
 
     for multiple in [object, 99999999999999999999999]:
         with pytest.raises(TypeError, match="is not a valid multiple type"):
-            pc.round_to_multiple(values, multiple=multiple)
+            pc.round_to_multiple(values, multiple=multiple)  # type: ignore[arg-type]
 
 
 def test_round_binary():
@@ -1992,7 +1986,8 @@ def test_logical():
 def test_dictionary_decode():
     array = pa.array(["a", "a", "b", "c", "b"])
     dictionary_array = array.dictionary_encode()
-    dictionary_array_decode = pc.dictionary_decode(dictionary_array)
+    dictionary_array_decode = pc.dictionary_decode(
+        dictionary_array)
 
     assert array != dictionary_array
 
@@ -2172,7 +2167,7 @@ def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx,
     # Round `expected` to `scale` digits after the decimal point
     expected = expected.quantize(decimal.Decimal(1).scaleb(-decimal_ty.scale))
     s = pa.scalar(float_val, type=float_ty)
-    actual = pc.cast(s, decimal_ty).as_py()
+    actual = pc.cast(s, decimal_ty).as_py()  # type: ignore[union-attr]
     if actual != expected:
         # Allow the last digit to vary. The tolerance is higher for
         # very high precisions as rounding errors can accumulate in
@@ -2264,8 +2259,9 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits):
                     expected = decimal.Decimal(mantissa) / 2**-float_exp
                 expected_as_int = round(expected.scaleb(scale))
                 actual = pc.cast(
-                    pa.scalar(float_val, type=float_ty), decimal_ty).as_py()
-                actual_as_int = round(actual.scaleb(scale))
+                    pa.scalar(float_val, type=float_ty), decimal_ty
+                ).as_py()  # type: ignore[union-attr]
+                actual_as_int = round(actual.scaleb(scale))  # type: ignore[union-attr]
                 # We allow for a minor rounding error between expected and actual
                 assert abs(actual_as_int - expected_as_int) <= 1
 
@@ -2301,7 +2297,7 @@ def test_strptime():
 @pytest.mark.pandas
 @pytest.mark.timezone_data
 def test_strftime():
-    times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
+    times: list[str | None] = ["2018-03-10 09:00", "2038-01-31 12:23", None]
     timezones = ["CET", "UTC", "Europe/Ljubljana"]
 
     formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I",
@@ -2311,14 +2307,15 @@ def test_strftime():
         formats.extend(["%c", "%x", "%X"])
 
     for timezone in timezones:
-        ts = pd.to_datetime(times).tz_localize(timezone)
+        ts = pd.to_datetime(times).tz_localize(timezone)  # type: ignore[no-matching-overload]
         for unit in ["s", "ms", "us", "ns"]:
             tsa = pa.array(ts, type=pa.timestamp(unit, timezone))
             for fmt in formats:
                 options = pc.StrftimeOptions(fmt)
                 result = pc.strftime(tsa, options=options)
+                st = ts.strftime(fmt)  # type: ignore[call-non-callable]
                 # cast to the same type as result to ignore string vs large_string
-                expected = pa.array(ts.strftime(fmt)).cast(result.type)
+                expected = pa.array(st).cast(result.type)
                 assert result.equals(expected)
 
         fmt = "%Y-%m-%dT%H:%M:%S"
@@ -2326,42 +2323,48 @@ def test_strftime():
         # Default format
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         result = pc.strftime(tsa, options=pc.StrftimeOptions())
-        expected = pa.array(ts.strftime(fmt)).cast(result.type)
+        st = ts.strftime(fmt)  # type: ignore[call-non-callable]
+        expected = pa.array(st).cast(result.type)
         assert result.equals(expected)
 
         # Default format plus timezone
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
-        expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type)
+        st = ts.strftime(fmt + "%Z")  # type: ignore[call-non-callable]
+        expected = pa.array(st).cast(result.type)
         assert result.equals(expected)
 
         # Pandas %S is equivalent to %S in arrow for unit="s"
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions("%S")
         result = pc.strftime(tsa, options=options)
-        expected = pa.array(ts.strftime("%S")).cast(result.type)
+        st = ts.strftime("%S")  # type: ignore[call-non-callable]
+        expected = pa.array(st).cast(result.type)
         assert result.equals(expected)
 
         # Pandas %S.%f is equivalent to %S in arrow for unit="us"
         tsa = pa.array(ts, type=pa.timestamp("us", timezone))
         options = pc.StrftimeOptions("%S")
         result = pc.strftime(tsa, options=options)
-        expected = pa.array(ts.strftime("%S.%f")).cast(result.type)
+        st = ts.strftime("%S.%f")  # type: ignore[call-non-callable]
+        expected = pa.array(st).cast(result.type)
         assert result.equals(expected)
 
         # Test setting locale
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions(fmt, locale="C")
         result = pc.strftime(tsa, options=options)
-        expected = pa.array(ts.strftime(fmt)).cast(result.type)
+        st = ts.strftime(fmt)  # type: ignore[call-non-callable]
+        expected = pa.array(st).cast(result.type)
         assert result.equals(expected)
 
     # Test timestamps without timezone
     fmt = "%Y-%m-%dT%H:%M:%S"
-    ts = pd.to_datetime(times)
+    ts = pd.to_datetime(times)  # type: ignore[no-matching-overload]
     tsa = pa.array(ts, type=pa.timestamp("s"))
     result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
-    expected = pa.array(ts.strftime(fmt)).cast(result.type)
+    st = ts.strftime(fmt)  # type: ignore[call-non-callable]
+    expected = pa.array(st).cast(result.type)
 
     # Positional format
     assert pc.strftime(tsa, fmt) == result
@@ -2490,10 +2493,11 @@ def test_extract_datetime_components(request):
 
 
 def test_offset_timezone():
-    arr = pc.strptime(["2012-12-12T12:12:12"], format="%Y-%m-%dT%H:%M:%S", unit="s")
+    arr = pc.strptime(pa.array(["2012-12-12T12:12:12"]),
+                      format="%Y-%m-%dT%H:%M:%S", unit="s")
     zoned_arr = arr.cast(pa.timestamp("s", tz="+05:30"))
-    assert pc.hour(zoned_arr)[0].as_py() == 17
-    assert pc.minute(zoned_arr)[0].as_py() == 42
+    assert pc.hour(zoned_arr)[0].as_py() == 17  # type: ignore[index,arg-type]
+    assert pc.minute(zoned_arr)[0].as_py() == 42  # type: ignore[index,arg-type]
 
 
 @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
@@ -2590,12 +2594,14 @@ def test_assume_timezone():
                              f"timezone '{timezone}'"):
         pc.assume_timezone(ambiguous_array, options=options_ambiguous_raise)
 
-    expected = ambiguous.tz_localize(timezone, ambiguous=[True, True, True])
+    expected = ambiguous.tz_localize(
+        timezone, ambiguous=np.array([True, True, True]))
     result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_earliest)
     result.equals(pa.array(expected))
 
-    expected = ambiguous.tz_localize(timezone, ambiguous=[False, False, False])
+    expected = ambiguous.tz_localize(
+        timezone, ambiguous=np.array([False, False, False]))
     result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_latest)
     result.equals(pa.array(expected))
@@ -2684,7 +2690,9 @@ def _check_temporal_rounding(ts, values, unit):
 
         expected = np.where(
             expected == ts,
-            expected + pd.Timedelta(value, unit_shorthand[unit]),
+            expected + pd.Timedelta(
+                value, unit_shorthand[unit]  # type: ignore[arg-type]
+            ),
             expected)
         np.testing.assert_array_equal(result, expected)
 
@@ -2746,7 +2754,7 @@ def test_count():
 
     with pytest.raises(ValueError,
                        match='"something else" is not a valid count mode'):
-        pc.count(arr, 'something else')
+        pc.count(arr, 'something else')  # type: ignore[arg-type]
 
 
 def test_index():
@@ -2796,7 +2804,7 @@ def test_partition_nth():
     with pytest.raises(
             ValueError,
             match="'partition_nth_indices' cannot be called without options"):
-        pc.partition_nth_indices(data)
+        pc.partition_nth_indices(data)  # type: ignore[call-arg]
 
 
 def test_partition_nth_null_placement():
@@ -2918,7 +2926,7 @@ def test_array_sort_indices():
     assert result.to_pylist() == [2, 1, 0, 3]
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.array_sort_indices(arr, order="nonscending")
+        pc.array_sort_indices(arr, order="nonscending")  # type: ignore[arg-type]
 
 
 def test_sort_indices_array():
@@ -2981,23 +2989,29 @@ def test_sort_indices_table():
         pc.sort_indices(table, sort_keys=[("unknown", "ascending")])
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.sort_indices(table, sort_keys=[("a", "nonscending")])
+        pc.sort_indices(
+            table, sort_keys=[("a", "nonscending")]  # type: ignore[list-item]
+        )
 
 
 def test_is_in():
     arr = pa.array([1, 2, None, 1, 2, 3])
 
     result = pc.is_in(arr, value_set=pa.array([1, 3, None]))
-    assert result.to_pylist() == [True, False, True, True, False, True]
+    assert result.to_pylist() == [True, False, True, True,
+                                  False, True]
 
     result = pc.is_in(arr, value_set=pa.array([1, 3, None]), skip_nulls=True)
-    assert result.to_pylist() == [True, False, False, True, False, True]
+    assert result.to_pylist() == [True, False, False, True,
+                                  False, True]
 
     result = pc.is_in(arr, value_set=pa.array([1, 3]))
-    assert result.to_pylist() == [True, False, False, True, False, True]
+    assert result.to_pylist() == [True, False, False, True,
+                                  False, True]
 
     result = pc.is_in(arr, value_set=pa.array([1, 3]), skip_nulls=True)
-    assert result.to_pylist() == [True, False, False, True, False, True]
+    assert result.to_pylist() == [True, False, False, True,
+                                  False, True]
 
 
 def test_index_in():
@@ -3061,7 +3075,7 @@ def test_quantile():
     with pytest.raises(ValueError, match="Quantile must be between 0 and 1"):
         pc.quantile(arr, q=1.1)
     with pytest.raises(ValueError, match="not a valid quantile interpolation"):
-        pc.quantile(arr, interpolation='zzz')
+        pc.quantile(arr, interpolation='zzz')  # type: ignore[arg-type]
 
 
 def test_tdigest():
@@ -3170,12 +3184,13 @@ def test_cumulative_sum(start, skip_nulls):
             # Add `start` offset to expected array before comparing
             expected = pc.add(expected_arrays[i], strt if strt is not None
                               else 0)
+            assert isinstance(expected, pa.Array)
             np.testing.assert_array_almost_equal(result.to_numpy(
                 zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_sum([1, 2, 3], start=strt)
+            pc.cumulative_sum([1, 2, 3], start=strt)  # type: ignore[arg-type]
 
 
 @pytest.mark.numpy
@@ -3225,6 +3240,7 @@ def test_cumulative_prod(start, skip_nulls):
             # Multiply `start` offset to expected array before comparing
             expected = pc.multiply(expected_arrays[i], strt if strt is not None
                                    else 1)
+            assert isinstance(expected, pa.Array)
             np.testing.assert_array_almost_equal(result.to_numpy(
                 zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
 
@@ -3283,8 +3299,10 @@ def test_cumulative_max(start, skip_nulls):
             expected = pc.max_element_wise(
                 expected_arrays[i], strt if strt is not None else -1e9,
                 skip_nulls=False)
-            np.testing.assert_array_almost_equal(result.to_numpy(
-                zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
+            np.testing.assert_array_almost_equal(
+                result.to_numpy(zero_copy_only=False),
+                expected.to_numpy(zero_copy_only=False)
+            )
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
@@ -3341,8 +3359,10 @@ def test_cumulative_min(start, skip_nulls):
             expected = pc.min_element_wise(
                 expected_arrays[i], strt if strt is not None else 1e9,
                 skip_nulls=False)
-            np.testing.assert_array_almost_equal(result.to_numpy(
-                zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
+            np.testing.assert_array_almost_equal(
+                result.to_numpy(zero_copy_only=False),
+                expected.to_numpy(zero_copy_only=False)
+            )
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
@@ -3420,7 +3440,7 @@ def test_struct_fields_options():
         pc.struct_field(arr, '.a.foo')
 
     with pytest.raises(pa.ArrowInvalid, match="cannot be called without options"):
-        pc.struct_field(arr)
+        pc.struct_field(arr)  # type: ignore[call-arg]
 
 
 def test_case_when():
@@ -3472,7 +3492,7 @@ def test_utf8_normalize():
     with pytest.raises(
             ValueError,
             match='"NFZ" is not a valid Unicode normalization form'):
-        pc.utf8_normalize(arr, form="NFZ")
+        pc.utf8_normalize(arr, form="NFZ")  # type: ignore[arg-type]
 
 
 def test_random():
@@ -3499,7 +3519,7 @@ def test_random():
     with pytest.raises(TypeError,
                        match=r"initializer should be 'system', an integer, "
                              r"or a hashable object; got \[\]"):
-        pc.random(100, initializer=[])
+        pc.random(100, initializer=[])  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize(
@@ -3549,7 +3569,7 @@ def test_rank_options():
                        match=r'"NonExisting" is not a valid tiebreaker'):
         pc.RankOptions(sort_keys="descending",
                        null_placement="at_end",
-                       tiebreaker="NonExisting")
+                       tiebreaker="NonExisting")  # type: ignore[arg-type]
 
 
 def test_rank_quantile_options():
@@ -3579,7 +3599,7 @@ def test_rank_quantile_options():
     assert result.equals(expected_descending)
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.rank_quantile(arr, sort_keys="XXX")
+        pc.rank_quantile(arr, sort_keys="XXX")  # type: ignore[arg-type]
 
 
 def test_rank_normal_options():
@@ -3765,21 +3785,21 @@ def test_expression_construction():
     nested_field = pc.field(("nested", "field"))
     nested_field2 = pc.field("nested", "field")
 
-    zero | one == string
-    ~true == false
+    _ = zero | one == string
+    _ = ~true == false
     for typ in ("bool", pa.bool_()):
-        field.cast(typ) == true
+        _ = field.cast(typ) == true
 
-    field.isin([1, 2])
-    nested_mixed_types.isin(["foo", "bar"])
+    _ = field.isin([1, 2])
+    _ = nested_mixed_types.isin(["foo", "bar"])
     nested_field.isin(["foo", "bar"])
     nested_field2.isin(["foo", "bar"])
 
     with pytest.raises(TypeError):
-        field.isin(1)
+        field.isin(1)  # type: ignore[arg-type]
 
     with pytest.raises(pa.ArrowInvalid):
-        field != object()
+        _ = field != object()
 
 
 def test_expression_boolean_operators():
@@ -3788,16 +3808,16 @@ def test_expression_boolean_operators():
     false = pc.scalar(False)
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
-        true and false
+        _ = true and false
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
-        true or false
+        _ = true or false
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
         bool(true)
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
-        not true
+        _ = not true
 
 
 def test_expression_call_function():
@@ -3826,7 +3846,7 @@ def test_cast_table_raises():
     table = pa.table({'a': [1, 2]})
 
     with pytest.raises(pa.lib.ArrowTypeError):
-        pc.cast(table, pa.int64())
+        pc.cast(table, pa.int64())  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize("start,stop,expected", (
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 07286125c4c..b5a472e3225 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -21,13 +21,18 @@
 import itertools
 import math
 import re
+from typing import TYPE_CHECKING, cast
 
 import hypothesis as h
 import pytest
-try:
+
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        np = None
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
 import pyarrow as pa
@@ -66,7 +71,7 @@ def __int__(self):
 
 class MyBrokenInt:
     def __int__(self):
-        1/0  # MARKER
+        _ = 1/0  # MARKER
 
 
 def check_struct_type(ty, expected):
@@ -145,7 +150,7 @@ def test_object_with_getitem():
     # https://github.com/apache/arrow/issues/34944
     # considered as sequence because of __getitem__, but has no length
     with pytest.raises(TypeError, match="has no len()"):
-        pa.array(ObjectWithOnlyGetitem())
+        pa.array(ObjectWithOnlyGetitem())  # type: ignore[arg-type]
 
 
 def _as_list(xs):
@@ -853,7 +858,7 @@ def test_large_binary_value(ty):
     assert isinstance(arr, pa.Array)
     assert arr.type == ty
     assert len(arr) == 4
-    buf = arr[1].as_buffer()
+    buf = cast(pa.FixedSizeBinaryScalar, arr[1]).as_buffer()
     assert len(buf) == len(s) * nrepeats
 
 
@@ -1099,11 +1104,11 @@ def expected_datetime_value(dt):
         ),
     ]
     utcdata = [
-        pytz.utc.localize(data[0]),
+        pytz.utc.localize(cast(datetime.datetime, data[0])),
         data[1],
         None,
-        data[3].astimezone(pytz.utc),
-        data[4].astimezone(pytz.utc),
+        cast(datetime.datetime, data[3]).astimezone(pytz.utc),
+        cast(datetime.datetime, data[4]).astimezone(pytz.utc),
     ]
 
     ty = pa.timestamp(unit, tz=timezone)
@@ -1231,9 +1236,9 @@ def test_sequence_timestamp_from_mixed_builtin_and_pandas_datetimes():
         None,
     ]
     utcdata = [
-        data[0].astimezone(pytz.utc),
-        pytz.utc.localize(data[1]),
-        data[2].astimezone(pytz.utc),
+        cast(datetime.datetime, data[0]).astimezone(pytz.utc),
+        pytz.utc.localize(cast(datetime.datetime, data[1])),
+        cast(datetime.datetime, data[2]).astimezone(pytz.utc),
         None,
     ]
 
@@ -2062,8 +2067,8 @@ def test_map_from_dicts():
     assert arr.to_pylist() == expected
 
     # With omitted values
-    data[1] = None
-    expected[1] = None
+    data[1] = None  # type: ignore[call-overload]
+    expected[1] = None  # type: ignore[call-overload]
 
     arr = pa.array(expected, type=pa.map_(pa.binary(), pa.int32()))
 
@@ -2388,6 +2393,7 @@ def test_nested_auto_chunking(ty, char):
     }
 
 
+@pytest.mark.numpy
 @pytest.mark.large_memory
 def test_array_from_pylist_data_overflow():
     # Regression test for ARROW-12983
@@ -2410,6 +2416,7 @@ def test_array_from_pylist_data_overflow():
     assert len(arr.chunks) > 1
 
 
+@pytest.mark.numpy
 @pytest.mark.slow
 @pytest.mark.large_memory
 def test_array_from_pylist_offset_overflow():
@@ -2434,6 +2441,7 @@ def test_array_from_pylist_offset_overflow():
     assert len(arr.chunks) > 1
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
 @pytest.mark.parametrize(('data', 'scalar_data', 'value_type'), [
     ([True, False, None], [pa.scalar(True), pa.scalar(False), None], pa.bool_()),
@@ -2471,8 +2479,10 @@ def test_array_from_pylist_offset_overflow():
         pa.timestamp('us')
     ),
     (
-        [pa.MonthDayNano([1, -1, -10100])],
-        [pa.scalar(pa.MonthDayNano([1, -1, -10100]))],
+        [pa.MonthDayNano([1, -1, -10100])],  # type: ignore[call-arg, arg-type]
+        [pa.scalar(
+            pa.MonthDayNano([1, -1, -10100])  # type: ignore[call-arg, arg-type]
+        )],
         pa.month_day_nano_interval()
     ),
     (["a", "b"], [pa.scalar("a"), pa.scalar("b")], pa.string()),
diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py
index 7508d8f0b98..7d652acf62f 100644
--- a/python/pyarrow/tests/test_cpp_internals.py
+++ b/python/pyarrow/tests/test_cpp_internals.py
@@ -20,7 +20,8 @@
 
 import pytest
 
-from pyarrow._pyarrow_cpp_tests import get_cpp_tests
+from pyarrow._pyarrow_cpp_tests import (  # type: ignore[import-not-found, import-untyped] # noqa: E501
+    get_cpp_tests)
 
 
 def inject_cpp_tests(ns):
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index f510c6dbe23..530332b2124 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -178,6 +178,7 @@ def test_read_options(pickle_module):
                                  encoding='utf16',
                                  skip_rows_after_names=27)
 
+    assert opts.block_size is not None
     assert opts.block_size > 0
     opts.block_size = 12345
     assert opts.block_size == 12345
@@ -302,6 +303,7 @@ def test_convert_options(pickle_module):
     with pytest.raises(ValueError):
         opts.decimal_point = '..'
 
+    assert opts.auto_dict_max_cardinality is not None
     assert opts.auto_dict_max_cardinality > 0
     opts.auto_dict_max_cardinality = 99999
     assert opts.auto_dict_max_cardinality == 99999
@@ -323,7 +325,7 @@ def test_convert_options(pickle_module):
     with pytest.raises(TypeError, match='DataType expected'):
         opts.column_types = {'a': None}
     with pytest.raises(TypeError):
-        opts.column_types = 0
+        opts.column_types = 0  # type: ignore[reportAttributeAccessIssue]
 
     assert isinstance(opts.null_values, list)
     assert '' in opts.null_values
@@ -1158,10 +1160,14 @@ def test_auto_dict_encode(self):
         table = self.read_bytes(rows, convert_options=opts,
                                 validate_full=False)
         assert table.schema == schema
-        dict_values = table['a'].chunk(0).dictionary
+        column_chunk = table.column('a').chunk(0)
+        assert isinstance(column_chunk, pa.DictionaryArray)
+        dict_values = column_chunk.dictionary
         assert len(dict_values) == 2
         assert dict_values[0].as_py() == "ab"
-        assert dict_values[1].as_buffer() == b"cd\xff"
+        dict_value = dict_values[1]
+        assert isinstance(dict_value, pa.StringScalar)
+        assert dict_value.as_buffer() == b"cd\xff"
 
         # With invalid UTF8, checked
         opts.check_utf8 = True
@@ -1502,7 +1508,7 @@ def signal_from_thread():
 
         # Interruption should have arrived timely
         assert last_duration <= 2.0
-        e = exc_info.__context__
+        e = exc_info.__context__  # type: ignore[possibly-missing-attribute, misc]
         assert isinstance(e, pa.ArrowCancelled)
         assert e.signum == signal.SIGINT
 
@@ -1866,6 +1872,9 @@ def use_threads(self):
 
 
 class BaseTestCompressedCSVRead:
+    def write_file(self, path, contents):
+        pass
+    csv_filename = ""
 
     def setUp(self):
         self.tmpdir = tempfile.mkdtemp(prefix='arrow-csv-test-')
@@ -1997,7 +2006,7 @@ def test_write_quoting_style():
             except Exception as e:
                 # This will trigger when we try to write a comma (,)
                 # without quotes, which is invalid
-                assert isinstance(e, res)
+                assert isinstance(e, res)  # type: ignore[invalid-argument-type]
                 break
         assert buf.getvalue() == res
         buf.seek(0)
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index e06f479987c..9d03a3bbff2 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -103,6 +103,7 @@ def make_random_buffer(size, target='host'):
         assert size >= 0
         buf = pa.allocate_buffer(size)
         assert buf.size == size
+        assert isinstance(buf, pa.Buffer)
         arr = np.frombuffer(buf, dtype=np.uint8)
         assert arr.size == size
         arr[:] = np.random.randint(low=1, high=255, size=size, dtype=np.uint8)
@@ -194,12 +195,14 @@ def test_context_device_buffer(size):
     np.testing.assert_equal(arr[soffset:soffset + ssize], arr2)
 
     # Creating a device buffer from a slice of an array
-    cudabuf = global_context.buffer_from_data(arr, offset=soffset, size=ssize)
+    cudabuf = global_context.buffer_from_data(
+        arr, offset=soffset, size=ssize)
     assert cudabuf.size == ssize
     arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
     np.testing.assert_equal(arr[soffset:soffset + ssize], arr2)
 
-    cudabuf = global_context.buffer_from_data(arr[soffset:soffset+ssize])
+    cudabuf = global_context.buffer_from_data(
+        arr[soffset:soffset+ssize])
     assert cudabuf.size == ssize
     arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
     np.testing.assert_equal(arr[soffset:soffset + ssize], arr2)
@@ -235,7 +238,8 @@ def test_context_device_buffer(size):
 
     # Creating device buffer from HostBuffer slice
 
-    cudabuf = global_context.buffer_from_data(buf, offset=soffset, size=ssize)
+    cudabuf = global_context.buffer_from_data(
+        buf, offset=soffset, size=ssize)
     assert cudabuf.size == ssize
     arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
     np.testing.assert_equal(arr[soffset:soffset+ssize], arr2)
@@ -384,7 +388,8 @@ def test_copy_from_to_host(size):
     device_buffer.copy_from_host(buf, position=0, nbytes=nbytes)
 
     # Copy back to host and compare contents
-    buf2 = device_buffer.copy_to_host(position=0, nbytes=nbytes)
+    buf2 = device_buffer.copy_to_host(
+        position=0, nbytes=nbytes)
     arr2 = np.frombuffer(buf2, dtype=dt)
     np.testing.assert_equal(arr, arr2)
 
@@ -395,7 +400,8 @@ def test_copy_to_host(size):
 
     buf = dbuf.copy_to_host()
     assert buf.is_cpu
-    np.testing.assert_equal(arr, np.frombuffer(buf, dtype=np.uint8))
+    np.testing.assert_equal(arr, np.frombuffer(
+        buf, dtype=np.uint8))
 
     buf = dbuf.copy_to_host(position=size//4)
     assert buf.is_cpu
@@ -437,11 +443,13 @@ def test_copy_to_host(size):
                             np.frombuffer(buf, dtype=np.uint8))
 
     dbuf.copy_to_host(buf=buf, nbytes=12)
-    np.testing.assert_equal(arr[:12], np.frombuffer(buf, dtype=np.uint8)[:12])
+    np.testing.assert_equal(arr[:12], np.frombuffer(
+        buf, dtype=np.uint8)[:12])
 
     dbuf.copy_to_host(buf=buf, nbytes=12, position=6)
-    np.testing.assert_equal(arr[6:6+12],
-                            np.frombuffer(buf, dtype=np.uint8)[:12])
+    np.testing.assert_equal(
+        arr[6:6+12], np.frombuffer(buf, dtype=np.uint8)[:12]
+    )
 
     for (position, nbytes) in [
             (0, size+10), (10, size-5),
@@ -450,7 +458,8 @@ def test_copy_to_host(size):
         with pytest.raises(ValueError,
                            match=('requested copy does not '
                                   'fit into host buffer')):
-            dbuf.copy_to_host(buf=buf, position=position, nbytes=nbytes)
+            dbuf.copy_to_host(
+                buf=buf, position=position, nbytes=nbytes)
 
 
 @pytest.mark.parametrize("dest_ctx", ['same', 'another'])
@@ -460,7 +469,9 @@ def test_copy_from_device(dest_ctx, size):
     lst = arr.tolist()
     if dest_ctx == 'another':
         dest_ctx = global_context1
-        if buf.context.device_number == dest_ctx.device_number:
+        if (
+            buf.context.device_number == dest_ctx.device_number
+        ):
             pytest.skip("not a multi-GPU system")
     else:
         dest_ctx = buf.context
@@ -563,7 +574,10 @@ def test_buffer_device():
     _, buf = make_random_buffer(size=10, target='device')
     assert buf.device_type == pa.DeviceAllocationType.CUDA
     assert isinstance(buf.device, pa.Device)
-    assert buf.device == global_context.memory_manager.device
+    assert (
+        buf.device ==
+        global_context.memory_manager.device
+    )
     assert isinstance(buf.memory_manager, pa.MemoryManager)
     assert not buf.is_cpu
     assert not buf.device.is_cpu
@@ -807,8 +821,9 @@ def test_create_table_with_device_buffers():
 
 
 def other_process_for_test_IPC(handle_buffer, expected_arr):
-    other_context = pa.cuda.Context(0)
-    ipc_handle = pa.cuda.IpcMemHandle.from_buffer(handle_buffer)
+    other_context = cuda.Context(0)
+    ipc_handle = cuda.IpcMemHandle.from_buffer(
+        handle_buffer)
     ipc_buf = other_context.open_ipc_buffer(ipc_handle)
     ipc_buf.context.synchronize()
     buf = ipc_buf.copy_to_host()
@@ -848,7 +863,8 @@ def test_copy_to():
 
         batch = pa.record_batch({"col": arr})
         batch_cuda = batch.copy_to(dest)
-        buf_cuda = batch_cuda["col"].buffers()[1]
+        buf_cuda = batch_cuda.column("col").buffers()[1]
+        assert buf_cuda is not None
         assert not buf_cuda.is_cpu
         assert buf_cuda.device_type == pa.DeviceAllocationType.CUDA
         assert buf_cuda.device == mm_cuda.device
@@ -949,7 +965,8 @@ def test_device_interface_batch_array():
     cbatch._export_to_c_device(ptr_array, ptr_schema)
     # Delete and recreate C++ objects from exported pointers
     del cbatch
-    cbatch_new = pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)
+    cbatch_new = pa.RecordBatch._import_from_c_device(
+        ptr_array, ptr_schema)
     assert cbatch_new.schema == schema
     batch_new = cbatch_new.copy_to(pa.default_cpu_memory_manager())
     assert batch_new.equals(batch)
@@ -957,13 +974,15 @@ def test_device_interface_batch_array():
     del cbatch_new
     # Now released
     with pytest.raises(ValueError, match="Cannot import released ArrowSchema"):
-        pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)
+        pa.RecordBatch._import_from_c_device(
+            ptr_array, ptr_schema)
 
     # Not a struct type
     pa.int32()._export_to_c(ptr_schema)
     with pytest.raises(ValueError,
                        match="ArrowSchema describes non-struct type"):
-        pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)
+        pa.RecordBatch._import_from_c_device(
+            ptr_array, ptr_schema)
 
 
 def test_print_array():
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index 876f3c7f761..4a5bc797533 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -28,7 +28,6 @@
 
 from numba.cuda.cudadrv.devicearray import DeviceNDArray  # noqa: E402
 
-
 context_choices = None
 context_choice_ids = ['pyarrow.cuda', 'numba.cuda']
 
@@ -62,17 +61,19 @@ def test_context(c):
 def make_random_buffer(size, target='host', dtype='uint8', ctx=None):
     """Return a host or device buffer with random data.
     """
-    dtype = np.dtype(dtype)
+    assert np is not None
+    dtype_obj = np.dtype(dtype)
     if target == 'host':
         assert size >= 0
-        buf = pa.allocate_buffer(size*dtype.itemsize)
-        arr = np.frombuffer(buf, dtype=dtype)
+        buf = pa.allocate_buffer(size*dtype_obj.itemsize)
+        arr = np.frombuffer(buf, dtype=dtype_obj)
         arr[:] = np.random.randint(low=0, high=255, size=size,
                                    dtype=np.uint8)
         return arr, buf
     elif target == 'device':
         arr, buf = make_random_buffer(size, target='host', dtype=dtype)
-        dbuf = ctx.new_buffer(size * dtype.itemsize)
+        assert ctx is not None
+        dbuf = ctx.new_buffer(size * dtype_obj.itemsize)
         dbuf.copy_from_host(buf, position=0, nbytes=buf.size)
         return arr, dbuf
     raise ValueError('invalid target value')
@@ -161,8 +162,8 @@ def __cuda_array_interface__(self):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_numba_memalloc(c, dtype):
+    assert np is not None
     ctx, nb_ctx = context_choices[c]
-    dtype = np.dtype(dtype)
     # Allocate memory using numba context
     # Warning: this will not be reflected in pyarrow context manager
     # (e.g bytes_allocated does not change)
@@ -198,6 +199,7 @@ def test_pyarrow_memalloc(c, dtype):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_numba_context(c, dtype):
+    assert np is not None
     ctx, nb_ctx = context_choices[c]
     size = 10
     with nb_cuda.gpus[0]:
@@ -209,7 +211,10 @@ def test_numba_context(c, dtype):
         np.testing.assert_equal(darr.copy_to_host(), arr)
         darr[0] = 99
         cbuf.context.synchronize()
-        arr2 = np.frombuffer(cbuf.copy_to_host(), dtype=dtype)
+        arr2 = np.frombuffer(
+            cbuf.copy_to_host(),
+            dtype=np.dtype(dtype)
+        )
         assert arr2[0] == 99
 
 
@@ -217,6 +222,7 @@ def test_numba_context(c, dtype):
                          ids=context_choice_ids)
 @pytest.mark.parametrize("dtype", dtypes, ids=dtypes)
 def test_pyarrow_jit(c, dtype):
+    assert np is not None
     ctx, nb_ctx = context_choices[c]
 
     @nb_cuda.jit
@@ -234,5 +240,8 @@ def increment_by_one(an_array):
     darr = DeviceNDArray(arr.shape, arr.strides, arr.dtype, gpu_data=mem)
     increment_by_one[blockspergrid, threadsperblock](darr)
     cbuf.context.synchronize()
-    arr1 = np.frombuffer(cbuf.copy_to_host(), dtype=arr.dtype)
+    arr1 = np.frombuffer(
+        cbuf.copy_to_host(),
+        dtype=arr.dtype
+    )
     np.testing.assert_equal(arr1, arr + 1)
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index a142e66db56..11ef01412a6 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -89,7 +89,7 @@ def test_cython_api(tmpdir):
     Basic test for the Cython API.
     """
     # Fail early if cython is not found
-    import cython  # noqa
+    import cython  # type: ignore[import-untyped, import-not-found] # noqa
 
     with tmpdir.as_cwd():
         # Set up temporary workspace
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index d00c0c4b3eb..ce913612bad 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -32,7 +32,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pytest
 
 import pyarrow as pa
@@ -40,6 +40,7 @@
 import pyarrow.csv
 import pyarrow.feather
 import pyarrow.fs as fs
+from pyarrow.fs import FileInfo
 import pyarrow.json
 from pyarrow.lib import is_threading_enabled
 from pyarrow.tests.util import (FSProtocolClass, ProxyHandler,
@@ -49,17 +50,17 @@
 try:
     import pandas as pd
 except ImportError:
-    pd = None
+    pass
 
 try:
     import pyarrow.dataset as ds
 except ImportError:
-    ds = None
+    pass
 
 try:
     import pyarrow.parquet as pq
 except ImportError:
-    pq = None
+    pass
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not dataset'
@@ -395,14 +396,16 @@ def test_filesystem_dataset(mockfs):
 
     # validation of required arguments
     with pytest.raises(TypeError, match="incorrect type"):
-        ds.FileSystemDataset(fragments, file_format, schema)
+        ds.FileSystemDataset(fragments, file_format, schema)  # type: ignore[arg-type]
     # validation of root_partition
     with pytest.raises(TypeError, match="incorrect type"):
-        ds.FileSystemDataset(fragments, schema=schema,
-                             format=file_format, root_partition=1)
+        ds.FileSystemDataset(
+            fragments, schema=schema, format=file_format,
+            root_partition=1)  # type: ignore[arg-type]
     # missing required argument in from_paths
     with pytest.raises(TypeError, match="incorrect type"):
-        ds.FileSystemDataset.from_paths(fragments, format=file_format)
+        ds.FileSystemDataset.from_paths(
+            fragments, format=file_format)  # type: ignore[arg-type]
 
 
 def test_filesystem_dataset_no_filesystem_interaction(dataset_reader):
@@ -827,7 +830,8 @@ def test_partitioning():
         load_back = None
         with pytest.raises(ValueError,
                            match="Expected Partitioning or PartitioningFactory"):
-            load_back = ds.dataset(tempdir, format='ipc', partitioning=int(0))
+            load_back = ds.dataset(
+                tempdir, format='ipc', partitioning=int(0))  # type: ignore[arg-type]
         assert load_back is None
 
 
@@ -859,8 +863,8 @@ def test_partitioning_pickling(pickle_module):
 )
 def test_dataset_partitioning_format(
     flavor: str,
-    expected_defined_partition: tuple,
-    expected_undefined_partition: tuple,
+    expected_defined_partition: tuple[str],
+    expected_undefined_partition: tuple[str],
 ):
 
     partitioning_schema = pa.schema([("foo", pa.string()), ("bar", pa.string())])
@@ -1215,6 +1219,7 @@ def test_make_fragment(multisourcefs):
     parquet_format = ds.ParquetFileFormat()
     dataset = ds.dataset('/plain', filesystem=multisourcefs,
                          format=parquet_format)
+    assert isinstance(dataset, ds.FileSystemDataset)
 
     for path in dataset.files:
         fragment = parquet_format.make_fragment(path, multisourcefs)
@@ -1252,7 +1257,9 @@ def test_make_fragment_with_size(s3_example_simple):
     assert tbl.equals(table)
 
     # true sizes -> works
-    sizes_true = [dataset.filesystem.get_file_info(x).size for x in dataset.files]
+    dataset_file_info = [dataset.filesystem.get_file_info(x) for x in dataset.files]
+    sizes_true = [x.size if isinstance(
+        x, FileInfo) else None for x in dataset_file_info]
     fragments_with_size = [file_format.make_fragment(path, fs, file_size=size)
                            for path, size in zip(paths, sizes_true)]
     dataset_with_size = ds.FileSystemDataset(
@@ -1943,6 +1950,7 @@ def test_fragments_repr(tempdir, dataset):
     # single-file parquet dataset (no partition information in repr)
     table, path = _create_single_file(tempdir)
     dataset = ds.dataset(path, format="parquet")
+    assert isinstance(dataset, ds.FileSystemDataset)
     fragment = list(dataset.get_fragments())[0]
     assert (
         repr(fragment) ==
@@ -1954,6 +1962,7 @@ def test_fragments_repr(tempdir, dataset):
     path = tempdir / "data.feather"
     pa.feather.write_feather(table, path)
     dataset = ds.dataset(path, format="feather")
+    assert isinstance(dataset, ds.FileSystemDataset)
     fragment = list(dataset.get_fragments())[0]
     assert (
         repr(fragment) ==
@@ -2065,7 +2074,7 @@ def test_partitioning_factory_segment_encoding(pickled, pickle_module):
     actual = factory.finish().to_table(columns={
         "date_int": ds.field("date").cast(pa.int64()),
     })
-    assert actual[0][0].as_py() == 1620086400
+    assert actual.column(0).chunk(0)[0].as_py() == 1620086400
 
     partitioning_factory = ds.DirectoryPartitioning.discover(
         ["date", "string"], segment_encoding="none")
@@ -2105,7 +2114,7 @@ def test_partitioning_factory_segment_encoding(pickled, pickle_module):
     actual = factory.finish().to_table(columns={
         "date_int": ds.field("date").cast(pa.int64()),
     })
-    assert actual[0][0].as_py() == 1620086400
+    assert actual.column(0).chunk(0)[0].as_py() == 1620086400
 
     partitioning_factory = ds.HivePartitioning.discover(
         segment_encoding="none")
@@ -2173,7 +2182,7 @@ def test_partitioning_factory_hive_segment_encoding_key_encoded(pickled, pickle_
     actual = factory.finish().to_table(columns={
         "date_int": ds.field("test'; date").cast(pa.int64()),
     })
-    assert actual[0][0].as_py() == 1620086400
+    assert actual.column(0).chunk(0)[0].as_py() == 1620086400
 
     partitioning_factory = ds.HivePartitioning.discover(
         segment_encoding="uri")
@@ -2231,7 +2240,7 @@ def test_dictionary_partitioning_outer_nulls_raises(tempdir):
 def test_positional_keywords_raises(tempdir):
     table = pa.table({'a': ['x', 'y', None], 'b': ['x', 'y', 'z']})
     with pytest.raises(TypeError):
-        ds.write_dataset(table, tempdir, "basename-{i}.arrow")
+        ds.write_dataset(table, tempdir, "basename-{i}.arrow")  # type: ignore[arg-type]
 
 
 @pytest.mark.parquet
@@ -2245,20 +2254,20 @@ def test_read_partition_keys_only(tempdir):
         'key': pa.repeat(0, BATCH_SIZE + 1),
         'value': np.arange(BATCH_SIZE + 1)})
     pq.write_to_dataset(
-        table[:BATCH_SIZE],
+        table[:BATCH_SIZE],  # type: ignore[arg-type]
         tempdir / 'one', partition_cols=['key'])
     pq.write_to_dataset(
-        table[:BATCH_SIZE + 1],
+        table[:BATCH_SIZE + 1],  # type: ignore[arg-type]
         tempdir / 'two', partition_cols=['key'])
 
     table = pq.read_table(tempdir / 'one', columns=['key'])
-    assert table['key'].num_chunks == 1
+    assert table.column('key').num_chunks == 1
 
     table = pq.read_table(tempdir / 'two', columns=['key', 'value'])
-    assert table['key'].num_chunks == 2
+    assert table.column('key').num_chunks == 2
 
     table = pq.read_table(tempdir / 'two', columns=['key'])
-    assert table['key'].num_chunks == 2
+    assert table.column('key').num_chunks == 2
 
 
 def _has_subdirs(basedir):
@@ -2319,9 +2328,9 @@ def test_partitioning_function():
     with pytest.raises(ValueError):
         ds.partitioning()
     with pytest.raises(ValueError, match="Expected list"):
-        ds.partitioning(field_names=schema)
+        ds.partitioning(field_names=schema)  # type: ignore[arg-type]
     with pytest.raises(ValueError, match="Cannot specify both"):
-        ds.partitioning(schema, field_names=schema)
+        ds.partitioning(schema, field_names=schema)  # type: ignore[call-overload]
 
     # Hive partitioning
     part = ds.partitioning(schema, flavor="hive")
@@ -2332,13 +2341,13 @@ def test_partitioning_function():
     assert isinstance(part, ds.PartitioningFactory)
     # cannot pass list of names
     with pytest.raises(ValueError):
-        ds.partitioning(names, flavor="hive")
+        ds.partitioning(names, flavor="hive")  # type: ignore[arg-type]
     with pytest.raises(ValueError, match="Cannot specify 'field_names'"):
         ds.partitioning(field_names=names, flavor="hive")
 
     # unsupported flavor
     with pytest.raises(ValueError):
-        ds.partitioning(schema, flavor="unsupported")
+        ds.partitioning(schema, flavor="unsupported")  # type: ignore[arg-type]
 
 
 @pytest.mark.parquet
@@ -2353,6 +2362,8 @@ def test_directory_partitioning_dictionary_key(mockfs):
     dataset = ds.dataset(
         "subdir", format="parquet", filesystem=mockfs, partitioning=part
     )
+    assert isinstance(dataset, ds.FileSystemDataset)
+    assert dataset.partitioning is not None
     assert dataset.partitioning.schema == schema
     table = dataset.to_table()
 
@@ -2373,6 +2384,8 @@ def test_hive_partitioning_dictionary_key(multisourcefs):
     dataset = ds.dataset(
         "hive", format="parquet", filesystem=multisourcefs, partitioning=part
     )
+    assert isinstance(dataset, ds.FileSystemDataset)
+    assert dataset.partitioning is not None
     assert dataset.partitioning.schema == schema
     table = dataset.to_table()
 
@@ -2380,11 +2393,13 @@ def test_hive_partitioning_dictionary_key(multisourcefs):
     month_dictionary = list(range(1, 13))
     assert table.column('year').type.equals(schema.types[0])
     for chunk in table.column('year').chunks:
+        assert isinstance(chunk, pa.DictionaryArray)
         actual = chunk.dictionary.to_pylist()
         actual.sort()
         assert actual == year_dictionary
     assert table.column('month').type.equals(schema.types[1])
     for chunk in table.column('month').chunks:
+        assert isinstance(chunk, pa.DictionaryArray)
         actual = chunk.dictionary.to_pylist()
         actual.sort()
         assert actual == month_dictionary
@@ -2574,6 +2589,8 @@ def test_construct_from_mixed_child_datasets(mockfs):
                     'subdir/2/yyy/file1.parquet'], filesystem=mockfs)
     b = ds.dataset('subdir', filesystem=mockfs)
 
+    assert isinstance(a, ds.FileSystemDataset)
+    assert isinstance(b, ds.FileSystemDataset)
     dataset = ds.dataset([a, b])
 
     assert isinstance(dataset, ds.UnionDataset)
@@ -2585,8 +2602,8 @@ def test_construct_from_mixed_child_datasets(mockfs):
 
     assert len(dataset.children) == 2
     for child in dataset.children:
-        assert child.files == ['subdir/1/xxx/file0.parquet',
-                               'subdir/2/yyy/file1.parquet']
+        assert child.files == [  # type: ignore[attr-defined]
+            'subdir/1/xxx/file0.parquet', 'subdir/2/yyy/file1.parquet']
 
 
 def test_construct_empty_dataset():
@@ -2620,7 +2637,7 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
     batch2 = pa.RecordBatch.from_arrays([pa.array(range(10))], names=["b"])
 
     with pytest.raises(TypeError, match='Expected.*FileSystemDatasetFactory'):
-        ds.dataset([child1, child2])
+        ds.dataset([child1, child2])  # type: ignore[arg-type]
 
     expected = (
         "Expected a list of path-like or dataset objects, or a list "
@@ -2628,14 +2645,14 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
         "types: int"
     )
     with pytest.raises(TypeError, match=expected):
-        ds.dataset([1, 2, 3])
+        ds.dataset([1, 2, 3])  # type: ignore[arg-type]
 
     expected = (
         "Expected a path-like, list of path-likes or a list of Datasets "
         "instead of the given type: NoneType"
     )
     with pytest.raises(TypeError, match=expected):
-        ds.dataset(None)
+        ds.dataset(None)  # type: ignore[arg-type]
 
     expected = (
         "Expected a path-like, list of path-likes or a list of Datasets "
@@ -2662,7 +2679,7 @@ def test_construct_from_invalid_sources_raise(multisourcefs):
         "batches or tables. The given list contains the following types:"
     )
     with pytest.raises(TypeError, match=expected):
-        ds.dataset([batch1, 0])
+        ds.dataset([batch1, 0])  # type: ignore[arg-type]
 
     expected = (
         "Expected a list of tables or batches. The given list contains a int"
@@ -2752,7 +2769,7 @@ def test_open_dataset_partitioned_directory(tempdir, dataset_reader, pickle_modu
     dataset = ds.dataset(
         str(path),
         partitioning=ds.partitioning(
-            pa.schema([("part", pa.int8())]), flavor="hive"))
+            schema=pa.schema([("part", pa.int8())]), flavor="hive"))
     expected_schema = table.schema.append(pa.field("part", pa.int8()))
     assert dataset.schema.equals(expected_schema)
 
@@ -2797,7 +2814,7 @@ def test_open_union_dataset(tempdir, dataset_reader, pickle_module):
     _, path = _create_single_file(tempdir)
     dataset = ds.dataset(path)
 
-    union = ds.dataset([dataset, dataset])
+    union = ds.dataset([dataset, dataset])  # type: ignore[arg-type]
     assert isinstance(union, ds.UnionDataset)
 
     pickled = pickle_module.loads(pickle_module.dumps(union))
@@ -2807,7 +2824,7 @@ def test_open_union_dataset(tempdir, dataset_reader, pickle_module):
 def test_open_union_dataset_with_additional_kwargs(multisourcefs):
     child = ds.dataset('/plain', filesystem=multisourcefs, format='parquet')
     with pytest.raises(ValueError, match="cannot pass any additional"):
-        ds.dataset([child], format="parquet")
+        ds.dataset([child], format="parquet")  # type: ignore[arg-type]
 
 
 def test_open_dataset_non_existing_file():
@@ -2894,7 +2911,7 @@ def expected_type(key):
 def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module):
     # https://issues.apache.org/jira/browse/ARROW-11400
     table = pa.table({'part': np.repeat(['A', 'B'], 5), 'col': range(10)})
-    part = ds.partitioning(table.select(['part']).schema, flavor="hive")
+    part = ds.partitioning(schema=table.select(['part']).schema, flavor="hive")
     ds.write_dataset(table, tempdir, partitioning=part, format="feather")
 
     dataset = ds.dataset(
@@ -2902,7 +2919,7 @@ def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module)
         partitioning=ds.HivePartitioning.discover(infer_dictionary=True)
     )
     expected = pa.table(
-        {'col': table['col'], 'part': table['part'].dictionary_encode()}
+        {'col': table.column('col'), 'part': table.column('part').dictionary_encode()}
     )
     assert dataset.to_table().equals(expected)
     fragment = list(dataset.get_fragments())[0]
@@ -2987,7 +3004,7 @@ def test_open_dataset_from_uri_s3_fsspec(s3_example_simple):
     assert dataset.to_table().equals(table)
 
     # directly passing the fsspec-handler
-    fs = PyFileSystem(FSSpecHandler(fs))
+    fs = PyFileSystem(FSSpecHandler(fs))  # type: ignore[abstract]
     dataset = ds.dataset(path, format="parquet", filesystem=fs)
     assert dataset.to_table().equals(table)
 
@@ -3089,7 +3106,7 @@ def test_file_format_inspect_fsspec(tempdir):
     format = ds.ParquetFileFormat()
     # manually creating a PyFileSystem instead of using fs._ensure_filesystem
     # which would convert an fsspec local filesystem to a native one
-    filesystem = fs.PyFileSystem(fs.FSSpecHandler(fsspec_fs))
+    filesystem = fs.PyFileSystem(fs.FSSpecHandler(fsspec_fs))  # type: ignore[abstract]
     schema = format.inspect(path, filesystem)
     assert schema.equals(table.schema)
 
@@ -3107,11 +3124,11 @@ def test_filter_timestamp(tempdir, dataset_reader):
         "id": range(10)})
 
     # write dataset partitioned on dates (as strings)
-    part = ds.partitioning(table.select(['dates']).schema, flavor="hive")
+    part = ds.partitioning(schema=table.select(['dates']).schema, flavor="hive")
     ds.write_dataset(table, path, partitioning=part, format="feather")
 
     # read dataset partitioned on dates (as timestamps)
-    part = ds.partitioning(pa.schema([("dates", pa.timestamp("s"))]),
+    part = ds.partitioning(schema=pa.schema([("dates", pa.timestamp("s"))]),
                            flavor="hive")
     dataset = ds.dataset(path, format="feather", partitioning=part)
 
@@ -3162,7 +3179,7 @@ def test_filter_compute_expression(tempdir, dataset_reader):
     filter_ = pc.is_in(ds.field('A'), pa.array(["a", "b"]))
     assert dataset_reader.to_table(dataset, filter=filter_).num_rows == 3
 
-    filter_ = pc.hour(ds.field('B')) >= 3
+    filter_ = pc.hour(ds.field('B')) >= 3  # type: ignore[operator]
     assert dataset_reader.to_table(dataset, filter=filter_).num_rows == 2
 
     days = pc.days_between(ds.field('B'), ds.field("C"))
@@ -3194,12 +3211,12 @@ def test_union_dataset_from_other_datasets(tempdir, multisourcefs):
 
     assert child1.schema != child2.schema != child3.schema
 
-    assembled = ds.dataset([child1, child2, child3])
+    assembled = ds.dataset([child1, child2, child3])  # type: ignore[arg-type]
     assert isinstance(assembled, ds.UnionDataset)
 
     msg = 'cannot pass any additional arguments'
     with pytest.raises(ValueError, match=msg):
-        ds.dataset([child1, child2], filesystem=multisourcefs)
+        ds.dataset([child1, child2], filesystem=multisourcefs)  # type: ignore[arg-type]
 
     expected_schema = pa.schema([
         ('date', pa.date32()),
@@ -3213,7 +3230,7 @@ def test_union_dataset_from_other_datasets(tempdir, multisourcefs):
     assert assembled.schema.equals(expected_schema)
     assert assembled.to_table().schema.equals(expected_schema)
 
-    assembled = ds.dataset([child1, child3])
+    assembled = ds.dataset([child1, child3])  # type: ignore[arg-type]
     expected_schema = pa.schema([
         ('date', pa.date32()),
         ('index', pa.int64()),
@@ -3230,6 +3247,7 @@ def test_union_dataset_from_other_datasets(tempdir, multisourcefs):
         ('color', pa.string()),
         ('date', pa.date32()),
     ])
+    # type: ignore[arg-type]
     assembled = ds.dataset([child1, child3], schema=expected_schema)
     assert assembled.to_table().schema.equals(expected_schema)
 
@@ -3238,6 +3256,7 @@ def test_union_dataset_from_other_datasets(tempdir, multisourcefs):
         ('color', pa.string()),
         ('unknown', pa.string())  # fill with nulls
     ])
+    # type: ignore[arg-type]
     assembled = ds.dataset([child1, child3], schema=expected_schema)
     assert assembled.to_table().schema.equals(expected_schema)
 
@@ -3248,7 +3267,7 @@ def test_union_dataset_from_other_datasets(tempdir, multisourcefs):
     child4 = ds.dataset(path)
 
     with pytest.raises(pa.ArrowTypeError, match='Unable to merge'):
-        ds.dataset([child1, child4])
+        ds.dataset([child1, child4])  # type: ignore[arg-type]
 
 
 def test_dataset_from_a_list_of_local_directories_raises(multisourcefs):
@@ -3259,7 +3278,7 @@ def test_dataset_from_a_list_of_local_directories_raises(multisourcefs):
 
 def test_union_dataset_filesystem_datasets(multisourcefs):
     # without partitioning
-    dataset = ds.dataset([
+    dataset = ds.dataset([  # type: ignore[arg-type]
         ds.dataset('/plain', filesystem=multisourcefs),
         ds.dataset('/schema', filesystem=multisourcefs),
         ds.dataset('/hive', filesystem=multisourcefs),
@@ -3273,7 +3292,7 @@ def test_union_dataset_filesystem_datasets(multisourcefs):
     assert dataset.schema.equals(expected_schema)
 
     # with hive partitioning for two hive sources
-    dataset = ds.dataset([
+    dataset = ds.dataset([  # type: ignore[arg-type]
         ds.dataset('/plain', filesystem=multisourcefs),
         ds.dataset('/schema', filesystem=multisourcefs),
         ds.dataset('/hive', filesystem=multisourcefs, partitioning='hive')
@@ -3333,7 +3352,7 @@ def _check_dataset(schema, expected, expected_schema=None):
     # Specifying with differing field types
     schema = pa.schema([('a', 'int32'), ('b', 'float64')])
     dataset = ds.dataset(str(tempdir / "data.parquet"), schema=schema)
-    expected = pa.table([table['a'].cast('int32'),
+    expected = pa.table([table['a'].cast('int32'),  # type: ignore[arg-type]
                          table['b']],
                         names=['a', 'b'])
     _check_dataset(schema, expected)
@@ -3834,7 +3853,7 @@ def test_parquet_dataset_factory_fsspec(tempdir):
     fsspec_fs = fsspec.filesystem("file")
     # manually creating a PyFileSystem, because passing the local fsspec
     # filesystem would internally be converted to native LocalFileSystem
-    filesystem = fs.PyFileSystem(fs.FSSpecHandler(fsspec_fs))
+    filesystem = fs.PyFileSystem(fs.FSSpecHandler(fsspec_fs))  # type: ignore[abstract]
     dataset = ds.parquet_dataset(metadata_path, filesystem=filesystem)
     assert dataset.schema.equals(table.schema)
     assert len(dataset.files) == 4
@@ -4042,12 +4061,14 @@ def test_filter_mismatching_schema(tempdir, dataset_reader):
     # filtering on a column with such type mismatch should implicitly
     # cast the column
     filtered = dataset_reader.to_table(dataset, filter=ds.field("col") > 2)
-    assert filtered["col"].equals(table["col"].cast('int64').slice(2))
+    assert filtered["col"].equals(table["col"].cast(
+        'int64').slice(2))  # type: ignore[arg-type]
 
     fragment = list(dataset.get_fragments())[0]
     filtered = dataset_reader.to_table(
         fragment, filter=ds.field("col") > 2, schema=schema)
-    assert filtered["col"].equals(table["col"].cast('int64').slice(2))
+    assert filtered["col"].equals(table["col"].cast(
+        'int64').slice(2))  # type: ignore[arg-type]
 
 
 @pytest.mark.parquet
@@ -4112,6 +4133,7 @@ def test_dataset_preserved_partitioning(tempdir):
     # through discovery, but without partitioning
     _, path = _create_single_file(tempdir)
     dataset = ds.dataset(path)
+    assert isinstance(dataset, ds.FileSystemDataset)
     assert isinstance(dataset.partitioning, ds.DirectoryPartitioning)
     # TODO(GH-34884) partitioning attribute not preserved in pickling
     # dataset_ = ds.dataset(path)
@@ -4121,10 +4143,12 @@ def test_dataset_preserved_partitioning(tempdir):
     # through discovery, with hive partitioning but not specified
     full_table, path = _create_partitioned_dataset(tempdir)
     dataset = ds.dataset(path)
+    assert isinstance(dataset, ds.FileSystemDataset)
     assert isinstance(dataset.partitioning, ds.DirectoryPartitioning)
 
     # through discovery, with hive partitioning (from a partitioning factory)
     dataset = ds.dataset(path, partitioning="hive")
+    assert isinstance(dataset, ds.FileSystemDataset)
     part = dataset.partitioning
     assert part is not None
     assert isinstance(part, ds.HivePartitioning)
@@ -4133,11 +4157,12 @@ def test_dataset_preserved_partitioning(tempdir):
     assert part.dictionaries[0] == pa.array([0, 1, 2], pa.int32())
 
     # through discovery, with hive partitioning (from a partitioning object)
-    part = ds.partitioning(pa.schema([("part", pa.int32())]), flavor="hive")
+    part = ds.partitioning(schema=pa.schema([("part", pa.int32())]), flavor="hive")
     assert isinstance(part, ds.HivePartitioning)  # not a factory
     assert len(part.dictionaries) == 1
     assert all(x is None for x in part.dictionaries)
     dataset = ds.dataset(path, partitioning=part)
+    assert isinstance(dataset, ds.FileSystemDataset)
     part = dataset.partitioning
     assert isinstance(part, ds.HivePartitioning)
     assert part.schema == pa.schema([("part", pa.int32())])
@@ -4147,6 +4172,7 @@ def test_dataset_preserved_partitioning(tempdir):
 
     # through manual creation -> not available
     dataset = ds.dataset(path, partitioning="hive")
+    assert isinstance(dataset, ds.FileSystemDataset)
     dataset2 = ds.FileSystemDataset(
         list(dataset.get_fragments()), schema=dataset.schema,
         format=dataset.format, filesystem=dataset.filesystem
@@ -4192,7 +4218,7 @@ def _sort_table(tab, sort_col):
     import pyarrow.compute as pc
     sorted_indices = pc.sort_indices(
         tab, options=pc.SortOptions([(sort_col, 'ascending')]))
-    return pc.take(tab, sorted_indices)
+    return pc.take(tab, sorted_indices)  # type: ignore[arg-type]
 
 
 def _check_dataset_roundtrip(dataset, base_dir, expected_files, sort_col,
@@ -4265,7 +4291,7 @@ def test_write_dataset_partitioned(tempdir):
         target / "part=b", target / "part=b" / "part-0.arrow"
     ]
     partitioning_schema = ds.partitioning(
-        pa.schema([("part", pa.string())]), flavor="hive")
+        schema=pa.schema([("part", pa.string())]), flavor="hive")
     _check_dataset_roundtrip(
         dataset, str(target), expected_paths, 'f1', target,
         partitioning=partitioning_schema)
@@ -4277,7 +4303,7 @@ def test_write_dataset_partitioned(tempdir):
         target / "b", target / "b" / "part-0.arrow"
     ]
     partitioning_schema = ds.partitioning(
-        pa.schema([("part", pa.string())]))
+        schema=pa.schema([("part", pa.string())]))
     _check_dataset_roundtrip(
         dataset, str(target), expected_paths, 'f1', target,
         partitioning=partitioning_schema)
@@ -4290,6 +4316,7 @@ def test_write_dataset_with_field_names(tempdir):
                      partitioning=["b"])
 
     load_back = ds.dataset(tempdir, format='ipc', partitioning=["b"])
+    assert isinstance(load_back, ds.FileSystemDataset)
     files = load_back.files
     partitioning_dirs = {
         str(pathlib.Path(f).relative_to(tempdir).parent) for f in files
@@ -4307,6 +4334,7 @@ def test_write_dataset_with_field_names_hive(tempdir):
                      partitioning=["b"], partitioning_flavor="hive")
 
     load_back = ds.dataset(tempdir, format='ipc', partitioning="hive")
+    assert isinstance(load_back, ds.FileSystemDataset)
     files = load_back.files
     partitioning_dirs = {
         str(pathlib.Path(f).relative_to(tempdir).parent) for f in files
@@ -4624,7 +4652,7 @@ def test_write_dataset_max_open_files(tempdir):
                                    record_batch_3, record_batch_4])
 
     partitioning = ds.partitioning(
-        pa.schema([(column_names[partition_column_id], pa.string())]),
+        schema=pa.schema([(column_names[partition_column_id], pa.string())]),
         flavor="hive")
 
     data_source_1 = directory / "default"
@@ -4638,7 +4666,8 @@ def test_write_dataset_max_open_files(tempdir):
     def _get_compare_pair(data_source, record_batch, file_format, col_id):
         num_of_files_generated = _get_num_of_files_generated(
             base_directory=data_source, file_format=file_format)
-        number_of_partitions = len(pa.compute.unique(record_batch[col_id]))
+        unique_vals = pa.compute.unique(record_batch[col_id])
+        number_of_partitions = len(unique_vals)  # type: ignore[arg-type]
         return num_of_files_generated, number_of_partitions
 
     # CASE 1: when max_open_files=default & max_open_files >= num_of_partitions
@@ -4685,7 +4714,7 @@ def test_write_dataset_partitioned_dict(tempdir):
         target / "a", target / "a" / "part-0.arrow",
         target / "b", target / "b" / "part-0.arrow"
     ]
-    partitioning = ds.partitioning(pa.schema([
+    partitioning = ds.partitioning(schema=pa.schema([
         dataset.schema.field('part')]),
         dictionaries={'part': pa.array(['a', 'b'])})
     # NB: dictionaries required here since we use partitioning to parse
@@ -4704,7 +4733,7 @@ def test_write_dataset_use_threads(tempdir):
     dataset = ds.dataset(directory, partitioning="hive")
 
     partitioning = ds.partitioning(
-        pa.schema([("part", pa.string())]), flavor="hive")
+        schema=pa.schema([("part", pa.string())]), flavor="hive")
 
     target1 = tempdir / 'partitioned1'
     paths_written = []
@@ -4744,7 +4773,7 @@ def test_write_dataset_use_threads_preserve_order(tempdir):
     batches = table.to_batches(max_chunksize=2)
     ds.write_dataset(batches, tempdir, format="parquet",
                      use_threads=True, preserve_order=True)
-    seq = ds.dataset(tempdir).to_table(use_threads=False)['a'].to_numpy()
+    seq = ds.dataset(tempdir).to_table(use_threads=False).column('a').to_numpy()
     prev = -1
     for item in seq:
         curr = int(item)
@@ -4784,7 +4813,7 @@ def file_visitor(written_file):
         visited_sizes.append(written_file.size)
 
     partitioning = ds.partitioning(
-        pa.schema([("part", pa.string())]), flavor="hive")
+        schema=pa.schema([("part", pa.string())]), flavor="hive")
     ds.write_dataset(table, base_dir, format="feather",
                      basename_template='dat_{i}.arrow',
                      partitioning=partitioning, file_visitor=file_visitor)
@@ -4896,7 +4925,7 @@ def test_write_table_partitioned_dict(tempdir):
         pa.array(['a'] * 10 + ['b'] * 10).dictionary_encode(),
     ], names=['col', 'part'])
 
-    partitioning = ds.partitioning(table.select(["part"]).schema)
+    partitioning = ds.partitioning(schema=table.select(["part"]).schema)
 
     base_dir = tempdir / "dataset"
     ds.write_dataset(
@@ -4917,8 +4946,7 @@ def test_write_table_partitioned_dict(tempdir):
 def test_write_dataset_parquet(tempdir):
     table = pa.table([
         pa.array(range(20), type="uint32"),
-        pa.array(np.arange("2012-01-01", 20, dtype="datetime64[D]").astype(
-            "datetime64[ns]")),
+        pa.array(pd.date_range("2012-01-01", periods=20, freq='D').values.astype("datetime64[ns]")),
         pa.array(np.repeat(['a', 'b'], 10))
     ], names=["f1", "f2", "part"])
 
@@ -5014,7 +5042,7 @@ def test_partition_dataset_parquet_file_visitor(tempdir):
 
     root_path = tempdir / 'partitioned'
     partitioning = ds.partitioning(
-        pa.schema([("part", pa.string())]), flavor="hive")
+        schema=pa.schema([("part", pa.string())]), flavor="hive")
 
     paths_written = []
 
@@ -5047,11 +5075,11 @@ def test_write_dataset_arrow_schema_metadata(tempdir):
     # ensure we serialize ARROW schema in the parquet metadata, to have a
     # correct roundtrip (e.g. preserve non-UTC timezone)
     table = pa.table({"a": [pd.Timestamp("2012-01-01", tz="Europe/Brussels")]})
-    assert table["a"].type.tz == "Europe/Brussels"
+    assert table.column("a").type.tz == "Europe/Brussels"
 
     ds.write_dataset(table, tempdir, format="parquet")
     result = pq.read_table(tempdir / "part-0.parquet")
-    assert result["a"].type.tz == "Europe/Brussels"
+    assert result.column("a").type.tz == "Europe/Brussels"
 
 
 def test_write_dataset_schema_metadata(tempdir):
@@ -5092,7 +5120,7 @@ def test_write_dataset_s3(s3_example_simple):
         pa.array(['a'] * 10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
-    part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
+    part = ds.partitioning(schema=pa.schema([("part", pa.string())]), flavor="hive")
 
     # writing with filesystem object
     ds.write_dataset(
@@ -5171,7 +5199,7 @@ def test_write_dataset_s3_put_only(s3_server):
         pa.array(['a']*10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
-    part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
+    part = ds.partitioning(schema=pa.schema([("part", pa.string())]), flavor="hive")
 
     # writing with filesystem object with create_dir flag set to false
     ds.write_dataset(
@@ -5549,7 +5577,7 @@ def test_union_dataset_filter(tempdir, dstype):
     else:
         raise NotImplementedError
 
-    filtered_union_ds = ds.dataset((ds1, ds2)).filter(
+    filtered_union_ds = ds.dataset((ds1, ds2)).filter(  # type: ignore[arg-type]
         (pc.field("colA") < 3) | (pc.field("colA") == 9)
     )
     assert filtered_union_ds.to_table() == pa.table({
@@ -5571,7 +5599,7 @@ def test_union_dataset_filter(tempdir, dstype):
     filtered_ds2 = ds2.filter(pc.field("colA") < 10)
 
     with pytest.raises(ValueError, match="currently not supported"):
-        ds.dataset((filtered_ds1, filtered_ds2))
+        ds.dataset((filtered_ds1, filtered_ds2))  # type: ignore[arg-type]
 
 
 def test_parquet_dataset_filter(tempdir):
@@ -5672,8 +5700,9 @@ def test_dataset_partition_with_slash(tmpdir):
     assert dt_table == read_table.sort_by("exp_id")
 
     exp_meta = dt_table.column(1).to_pylist()
-    exp_meta = sorted(set(exp_meta))  # take unique
-    encoded_paths = ["exp_meta=" + quote(path, safe='') for path in exp_meta]
+    exp_meta = sorted(set(exp_meta), key=lambda x: (
+        x is None, x))  # take unique, handle None
+    encoded_paths = ["exp_meta=" + quote(str(path), safe='') for path in exp_meta]
     file_paths = sorted(os.listdir(path))
 
     assert encoded_paths == file_paths
@@ -5756,6 +5785,7 @@ def test_write_dataset_write_page_index(tempdir):
             )
             ds1 = ds.dataset(base_dir, format="parquet")
 
+            assert isinstance(ds1, ds.FileSystemDataset)
             for file in ds1.files:
                 # Can retrieve sorting columns from metadata
                 metadata = pq.read_metadata(file)
@@ -5898,13 +5928,13 @@ def test_make_write_options_error():
              "'pyarrow._dataset_parquet.ParquetFileFormat' objects "
              "doesn't apply to a 'int'")
     with pytest.raises(TypeError) as excinfo:
-        pa.dataset.ParquetFileFormat.make_write_options(43)
+        pa.dataset.ParquetFileFormat.make_write_options(43)  # type: ignore
     assert msg_1 in str(excinfo.value) or msg_2 in str(excinfo.value)
 
     pformat = pa.dataset.ParquetFileFormat()
     msg = "make_write_options\\(\\) takes exactly 0 positional arguments"
     with pytest.raises(TypeError, match=msg):
-        pformat.make_write_options(43)
+        pformat.make_write_options(43)  # type: ignore
 
 
 def test_scanner_from_substrait(dataset):
@@ -5939,3 +5969,4 @@ def test_scanner_from_substrait(dataset):
         filter=ps.BoundExpressions.from_substrait(filtering)
     ).to_table()
     assert result.to_pydict() == {'str': ['4', '4']}
+# Type stubs fixes applied
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index 0ef3931a4cf..3d658352372 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -30,8 +30,8 @@
     import pyarrow.parquet as pq
     import pyarrow.dataset as ds
 except ImportError:
-    pq = None
-    ds = None
+    pq = None  # type: ignore[assignment]
+    ds = None  # type: ignore[assignment]
 
 try:
     from pyarrow.tests.parquet.encryption import InMemoryKmsClient
@@ -85,7 +85,7 @@ def create_encryption_config(footer_key=FOOTER_KEY_NAME, column_keys=COLUMN_KEYS
 
 
 def create_decryption_config():
-    return pe.DecryptionConfiguration(cache_lifetime=300)
+    return pe.DecryptionConfiguration(cache_lifetime=timedelta(seconds=300))
 
 
 def create_kms_connection_config(keys=KEYS):
@@ -135,6 +135,8 @@ def assert_decrypts(
         encrypt_kms_connection_config = create_kms_connection_config(write_keys)
         decrypt_kms_connection_config = create_kms_connection_config(read_keys)
 
+        assert ds is not None
+        assert pe is not None
         crypto_factory = pe.CryptoFactory(kms_factory)
         parquet_encryption_cfg = ds.ParquetEncryptionConfig(
             crypto_factory, encrypt_kms_connection_config, encryption_config
@@ -370,11 +372,12 @@ def test_large_row_encryption_decryption():
     """Test encryption and decryption of a large number of rows."""
 
     class NoOpKmsClient(pe.KmsClient):
-        def wrap_key(self, key_bytes: bytes, _: str) -> bytes:
+        def wrap_key(self, key_bytes: bytes, _: str) -> bytes:  # type: ignore[override]
             b = base64.b64encode(key_bytes)
             return b
 
-        def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
+        def unwrap_key(self, wrapped_key: bytes, _: str  # type: ignore[override]
+                       ) -> bytes:
             b = base64.b64decode(wrapped_key)
             return b
 
@@ -395,6 +398,9 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
         plaintext_footer=False,
         data_key_length_bits=128,
     )
+    assert ds is not None
+    assert pe is not None
+    assert pq is not None
     pqe_config = ds.ParquetEncryptionConfig(
         crypto_factory, kms_config, encryption_config
     )
@@ -429,6 +435,9 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
     encryption_unavailable, reason="Parquet Encryption is not currently enabled"
 )
 def test_dataset_encryption_with_selected_column_statistics():
+    assert ds is not None
+    assert pq is not None
+
     table = create_sample_table()
 
     encryption_config = create_encryption_config()
@@ -472,7 +481,7 @@ def test_dataset_encryption_with_selected_column_statistics():
 
     for fragment in dataset.get_fragments():
         decryption_properties = crypto_factory.file_decryption_properties(
-            kms_connection_config, decryption_config, fragment.path, mockfs)
+            kms_connection_config, decryption_config, fragment.path, mockfs)  # type: ignore[call-arg]
         with pq.ParquetFile(
             fragment.path,
             decryption_properties=decryption_properties,
@@ -481,12 +490,14 @@ def test_dataset_encryption_with_selected_column_statistics():
             for rg_idx in range(parquet_file.metadata.num_row_groups):
                 row_group = parquet_file.metadata.row_group(rg_idx)
 
-                assert row_group.column(0).statistics is not None
-                assert row_group.column(0).statistics.min == 2019
-                assert row_group.column(0).statistics.max == 2022
+                stats0 = row_group.column(0).statistics
+                assert stats0 is not None
+                assert stats0.min == 2019
+                assert stats0.max == 2022
 
-                assert row_group.column(1).statistics is not None
-                assert row_group.column(1).statistics.min == 2
-                assert row_group.column(1).statistics.max == 100
+                stats1 = row_group.column(1).statistics
+                assert stats1 is not None
+                assert stats1.min == 2
+                assert stats1.max == 100
 
                 assert row_group.column(2).statistics is None
diff --git a/python/pyarrow/tests/test_device.py b/python/pyarrow/tests/test_device.py
index dc1a51e6d00..00f8bbf720d 100644
--- a/python/pyarrow/tests/test_device.py
+++ b/python/pyarrow/tests/test_device.py
@@ -59,11 +59,15 @@ def test_copy_to():
 
         batch_copied = batch.copy_to(dest)
         assert batch_copied.equals(batch)
-        assert batch_copied["col"].buffers()[1].device == mm.device
-        assert batch_copied["col"].buffers()[1].address != arr.buffers()[1].address
+        buffer = batch_copied.column("col").buffers()[1]
+        assert buffer is not None
+        assert buffer.device == mm.device
+        buffer_orig = arr.buffers()[1]
+        assert buffer_orig is not None
+        assert buffer.address != buffer_orig.address
 
     with pytest.raises(TypeError, match="Argument 'destination' has incorrect type"):
-        arr.copy_to(mm.device.device_type)
+        arr.copy_to(mm.device.device_type)  # type: ignore[arg-type]
 
     with pytest.raises(TypeError, match="Argument 'destination' has incorrect type"):
-        batch.copy_to(mm.device.device_type)
+        batch.copy_to(mm.device.device_type)  # type: ignore[arg-type]
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index ebac37e862b..941e73c8167 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -22,12 +22,13 @@
 import weakref
 from uuid import uuid4, UUID
 import sys
+from typing import cast
 
 import pytest
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 import pyarrow as pa
 from pyarrow.vendored.version import Version
@@ -79,12 +80,14 @@ def __init__(self):
 
     def __arrow_ext_serialize__(self):
         # XXX pa.BaseExtensionType should expose C++ serialization method
+        assert isinstance(self.storage_type, IntegerType)
         return self.storage_type.__arrow_ext_serialize__()
 
     @classmethod
     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+        assert isinstance(storage_type, IntegerType)
         deserialized_storage_type = storage_type.__arrow_ext_deserialize__(
-            serialized)
+            storage_type, serialized)
         assert deserialized_storage_type == storage_type
         return cls()
 
@@ -160,7 +163,7 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
 
 
 class MyStructType(pa.ExtensionType):
-    storage_type = pa.struct([('left', pa.int64()),
+    storage_type = pa.struct([('left', pa.int64()),  # type: ignore[assignment]
                               ('right', pa.int64())])
 
     def __init__(self):
@@ -221,7 +224,7 @@ def __arrow_ext_serialize__(self):
     @classmethod
     def __arrow_ext_deserialize__(cls, storage_type, serialized):
         assert serialized == b''
-        return cls(storage_type)
+        return cls(storage_type, annotation=None)
 
 
 def ipc_write_batch(batch):
@@ -432,8 +435,8 @@ def test_ext_array_wrap_array():
     arr.validate(full=True)
     assert isinstance(arr, pa.ChunkedArray)
     assert arr.type == ty
-    assert arr.chunk(0).storage == storage.chunk(0)
-    assert arr.chunk(1).storage == storage.chunk(1)
+    assert arr.chunk(0).storage == storage.chunk(0)  # type: ignore[union-attr]
+    assert arr.chunk(1).storage == storage.chunk(1)  # type: ignore[union-attr]
 
     # Wrong storage type
     storage = pa.array([b"foo", b"bar", None])
@@ -442,7 +445,7 @@ def test_ext_array_wrap_array():
 
     # Not an array or chunked array
     with pytest.raises(TypeError, match="Expected array or chunked array"):
-        ty.wrap_array(None)
+        ty.wrap_array(None)  # type: ignore[arg-type]
 
 
 def test_ext_scalar_from_array():
@@ -876,7 +879,7 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
     def __eq__(self, other):
         if isinstance(other, pa.BaseExtensionType):
             return (isinstance(self, type(other)) and
-                    self.freq == other.freq)
+                    self.freq == other.freq)  # type: ignore[attr-defined]
         else:
             return NotImplemented
 
@@ -902,7 +905,7 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
             storage_type, serialized).freq
         return PeriodTypeWithToPandasDtype(freq)
 
-    def to_pandas_dtype(self):
+    def to_pandas_dtype(self):  # type: ignore[override]
         import pandas as pd
         return pd.PeriodDtype(freq=self.freq)
 
@@ -1033,7 +1036,7 @@ def test_generic_ext_array_pickling(registered_period_type, pickle_module):
 def test_generic_ext_type_register(registered_period_type):
     # test that trying to register other type does not segfault
     with pytest.raises(TypeError):
-        pa.register_extension_type(pa.string())
+        pa.register_extension_type(pa.string())  # type: ignore[arg-type]
 
     # register second time raises KeyError
     period_type = PeriodType('D')
@@ -1058,11 +1061,13 @@ def test_parquet_period(tmpdir, registered_period_type):
     # in the serialized arrow schema
     meta = pq.read_metadata(filename)
     assert meta.schema.column(0).physical_type == "INT64"
+    assert meta.metadata is not None
     assert b"ARROW:schema" in meta.metadata
 
     import base64
     decoded_schema = base64.b64decode(meta.metadata[b"ARROW:schema"])
-    schema = pa.ipc.read_schema(pa.BufferReader(decoded_schema))
+    schema = pa.ipc.read_schema(pa.BufferReader(
+        decoded_schema))
     # Since the type could be reconstructed, the extension type metadata is
     # absent.
     assert schema.field("ext").metadata == {}
@@ -1434,6 +1439,7 @@ def test_tensor_class_methods(np_type_str):
     storage = pa.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
                        pa.list_(arrow_type, 6))
     arr = pa.ExtensionArray.from_storage(tensor_type, storage)
+    arr = cast(pa.FixedShapeTensorArray, arr)
     expected = np.array(
         [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
         dtype=np.dtype(np_type_str)
@@ -1442,7 +1448,7 @@ def test_tensor_class_methods(np_type_str):
     np.testing.assert_array_equal(arr.to_numpy_ndarray(), expected)
 
     expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=np.dtype(np_type_str))
-    result = arr[1:].to_numpy_ndarray()
+    result = arr[1:].to_numpy_ndarray()  # type: ignore[union-attr]
     np.testing.assert_array_equal(result, expected)
 
     values = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
@@ -1452,35 +1458,43 @@ def test_tensor_class_methods(np_type_str):
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 1, 2])
     result = pa.ExtensionArray.from_storage(tensor_type, storage)
+    result = cast(pa.FixedShapeTensorArray, result)
     expected = np.array(
         [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]],
         dtype=np.dtype(np_type_str)
     )
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
-    result = flat_arr.reshape(1, 2, 3, 2)
+    result_reshaped = flat_arr.reshape(1, 2, 3, 2)
     expected = np.array(
         [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
         dtype=np.dtype(np_type_str)
     )
-    np.testing.assert_array_equal(result, expected)
+    np.testing.assert_array_equal(result_reshaped, expected)
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 2, 1])
     result = pa.ExtensionArray.from_storage(tensor_type, storage)
+    result = cast(pa.FixedShapeTensorArray, result)
     expected = as_strided(flat_arr, shape=(1, 2, 3, 2),
                           strides=(bw * 12, bw * 6, bw, bw * 3))
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[2, 0, 1])
-    result = pa.ExtensionArray.from_storage(tensor_type, storage)
+    result = pa.ExtensionArray.from_storage(
+        tensor_type, storage)  # type: ignore[assignment]
     expected = as_strided(flat_arr, shape=(1, 3, 2, 2),
                           strides=(bw * 12, bw, bw * 6, bw * 2))
-    np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
-
-    assert result.type.permutation == [2, 0, 1]
-    assert result.type.shape == [2, 2, 3]
+    np.testing.assert_array_equal(
+        result.to_numpy_ndarray(), expected)  # type: ignore[union-attr]
+
+    result_type = result.type
+    assert isinstance(result, pa.FixedShapeTensorArray)
+    assert isinstance(result_type, pa.FixedShapeTensorType)
+    assert result_type.permutation == [2, 0, 1]
+    assert result_type.shape == [2, 2, 3]
     assert result.to_tensor().shape == (1, 3, 2, 2)
-    assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw, 2 * bw)
+    assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw,
+                                          2 * bw)
 
 
 @pytest.mark.numpy
@@ -1508,17 +1522,23 @@ def test_tensor_array_from_numpy(np_type_str):
 
     arr = flat_arr.reshape(1, 3, 4)
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
-    assert tensor_array_from_numpy.type.shape == [3, 4]
-    assert tensor_array_from_numpy.type.permutation == [0, 1]
-    assert tensor_array_from_numpy.type.dim_names is None
+    result_type = tensor_array_from_numpy.type
+    assert isinstance(tensor_array_from_numpy, pa.FixedShapeTensorArray)
+    assert isinstance(result_type, pa.FixedShapeTensorType)
+    assert result_type.shape == [3, 4]
+    assert result_type.permutation == [0, 1]
+    assert result_type.dim_names is None
     assert tensor_array_from_numpy.to_tensor() == pa.Tensor.from_numpy(arr)
 
     arr = as_strided(flat_arr, shape=(1, 2, 3, 2),
                      strides=(bw * 12, bw * 6, bw, bw * 3))
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
-    assert tensor_array_from_numpy.type.shape == [2, 2, 3]
-    assert tensor_array_from_numpy.type.permutation == [0, 2, 1]
-    assert tensor_array_from_numpy.type.dim_names is None
+    result_type = tensor_array_from_numpy.type
+    assert isinstance(tensor_array_from_numpy, pa.FixedShapeTensorArray)
+    assert isinstance(result_type, pa.FixedShapeTensorType)
+    assert result_type.shape == [2, 2, 3]
+    assert result_type.permutation == [0, 2, 1]
+    assert result_type.dim_names is None
     assert tensor_array_from_numpy.to_tensor() == pa.Tensor.from_numpy(arr)
 
     arr = flat_arr.reshape(1, 2, 3, 2)
@@ -1532,7 +1552,8 @@ def test_tensor_array_from_numpy(np_type_str):
     arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
                    dtype=np.dtype(np_type_str))
     expected = arr[1:]
-    result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)[1:].to_numpy_ndarray()
+    result = cast(pa.FixedShapeTensorArray, pa.FixedShapeTensorArray.from_numpy_ndarray(
+        arr)[1:]).to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
     arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=np.dtype(np_type_str))
@@ -1559,22 +1580,27 @@ def test_tensor_array_from_numpy(np_type_str):
     dim_names = ["a", "b"]
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(
         arr, dim_names=dim_names)
-    assert tensor_array_from_numpy.type.value_type == arrow_type
-    assert tensor_array_from_numpy.type.shape == [2, 3]
-    assert tensor_array_from_numpy.type.dim_names == dim_names
+    result_type = tensor_array_from_numpy.type
+    assert isinstance(tensor_array_from_numpy, pa.FixedShapeTensorArray)
+    assert isinstance(result_type, pa.FixedShapeTensorType)
+    assert result_type.value_type == arrow_type
+    assert result_type.shape == [2, 3]
+    assert result_type.dim_names == dim_names
 
     with pytest.raises(ValueError, match="The length of dim_names"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=['only_one'])
 
     with pytest.raises(TypeError, match="dim_names must be a tuple or list"):
-        pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=123)
+        pa.FixedShapeTensorArray.from_numpy_ndarray(
+            arr, dim_names=123)  # type: ignore[arg-type]
 
     with pytest.raises(TypeError, match="dim_names must be a tuple or list"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(
-            arr, dim_names=(x for x in range(2)))
+            arr, dim_names=(x for x in range(2)))  # type: ignore[arg-type]
 
     with pytest.raises(TypeError, match="Each element of dim_names must be a string"):
-        pa.FixedShapeTensorArray.from_numpy_ndarray(arr, dim_names=[0, 1])
+        pa.FixedShapeTensorArray.from_numpy_ndarray(
+            arr, dim_names=[0, 1])  # type: ignore[arg-type]
 
 
 @pytest.mark.numpy
@@ -1845,14 +1871,18 @@ def test_bool8_to_numpy_conversion():
     assert np.array_equal(arr_to_np, np_arr_no_nulls)
 
     # same underlying buffer
-    assert arr_to_np.ctypes.data == arr_no_nulls.buffers()[1].address
+    buffer = arr_no_nulls.buffers()[1]
+    assert buffer is not None
+    assert arr_to_np.ctypes.data == buffer.address
 
     # if the user requests a writable array, a copy should be performed
     arr_to_np_writable = arr_no_nulls.to_numpy(zero_copy_only=False, writable=True)
     assert np.array_equal(arr_to_np_writable, np_arr_no_nulls)
 
     # different underlying buffer
-    assert arr_to_np_writable.ctypes.data != arr_no_nulls.buffers()[1].address
+    buffer = arr_no_nulls.buffers()[1]
+    assert buffer is not None
+    assert arr_to_np_writable.ctypes.data != buffer.address
 
 
 @pytest.mark.numpy
@@ -1867,7 +1897,9 @@ def test_bool8_from_numpy_conversion():
     assert arr_from_np == canonical_bool8_arr_no_nulls
 
     # same underlying buffer
-    assert arr_from_np.buffers()[1].address == np_arr_no_nulls.ctypes.data
+    buffer = arr_from_np.buffers()[1]
+    assert buffer is not None
+    assert buffer.address == np_arr_no_nulls.ctypes.data
 
     # conversion only valid for 1-D arrays
     with pytest.raises(
@@ -1882,7 +1914,7 @@ def test_bool8_from_numpy_conversion():
         ValueError,
         match="Cannot convert 0-D array to bool8 array",
     ):
-        pa.Bool8Array.from_numpy(np.bool_())
+        pa.Bool8Array.from_numpy(np.bool_(False))  # type: ignore[arg-type]
 
     # must use compatible storage type
     with pytest.raises(
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 054bf920b26..a84b343b3dd 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -26,7 +26,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -47,7 +47,7 @@ def datadir(base_datadir):
 
 
 def random_path(prefix='feather_'):
-    return tempfile.mktemp(prefix=prefix)
+    return tempfile.mktemp(prefix=prefix)  # type: ignore[deprecated]
 
 
 @pytest.fixture(scope="module", params=[1, 2])
@@ -63,7 +63,7 @@ def compression(request):
     yield request.param
 
 
-TEST_FILES = None
+TEST_FILES: list[str] | None = None
 
 
 def setup_module(module):
@@ -72,7 +72,7 @@ def setup_module(module):
 
 
 def teardown_module(module):
-    for path in TEST_FILES:
+    for path in TEST_FILES:  # type: ignore[union-attr]
         try:
             os.remove(path)
         except os.error:
@@ -95,6 +95,7 @@ def _check_pandas_roundtrip(df, expected=None, path=None,
     if version is None:
         version = 2
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
     write_feather(df, path, compression=compression,
                   compression_level=compression_level, version=version)
@@ -114,6 +115,7 @@ def _check_arrow_roundtrip(table, path=None, compression=None):
     if path is None:
         path = random_path()
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
     write_feather(table, path, compression=compression)
     if not os.path.exists(path):
@@ -126,10 +128,12 @@ def _check_arrow_roundtrip(table, path=None, compression=None):
 def _assert_error_on_write(df, exc, path=None, version=2):
     # check that we are raising the exception
     # on writing
+    assert version in (1, 2)
 
     if path is None:
         path = random_path()
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     def f():
@@ -149,6 +153,7 @@ def test_dataset(version):
     }
     table = pa.table(data)
 
+    assert TEST_FILES is not None
     TEST_FILES.extend(paths)
     for index, path in enumerate(paths):
         rows = (
@@ -156,7 +161,8 @@ def test_dataset(version):
             (index + 1) * (num_values[0] // num_files),
         )
 
-        write_feather(table[rows[0]: rows[1]], path, version=version)
+        write_feather(table[rows[0]: rows[1]], path,
+                      version=version)  # type: ignore[arg-type]
 
     data = FeatherDataset(paths).read_table()
     assert data.equals(table)
@@ -181,6 +187,7 @@ def test_read_table(version):
     num_values = (100, 100)
     path = random_path()
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     values = np.random.randint(0, 100, size=num_values)
@@ -206,6 +213,7 @@ def test_use_threads(version):
     num_values = (10, 10)
     path = random_path()
 
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     values = np.random.randint(0, 10, size=num_values)
@@ -231,6 +239,7 @@ def test_float_nulls(version):
     num_values = 100
 
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     null_mask = np.random.randint(0, 10, size=num_values) < 3
@@ -292,6 +301,7 @@ def test_platform_numpy_integers(version):
 def test_integer_with_nulls(version):
     # pandas requires upcast to float dtype
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     int_dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8']
@@ -330,6 +340,7 @@ def test_boolean_no_nulls(version):
 def test_boolean_nulls(version):
     # pandas requires upcast to object dtype
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     num_values = 100
@@ -348,6 +359,7 @@ def test_boolean_nulls(version):
 def test_buffer_bounds_error(version):
     # ARROW-1676
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     for i in range(16, 256):
@@ -360,6 +372,7 @@ def test_buffer_bounds_error(version):
 
 @pytest.mark.numpy
 def test_boolean_object_nulls(version):
+    assert np is not None
     repeats = 100
     table = pa.Table.from_arrays(
         [np.array([False, None, True] * repeats, dtype=object)],
@@ -426,7 +439,8 @@ def test_empty_strings(version):
 @pytest.mark.pandas
 def test_all_none(version):
     df = pd.DataFrame({'all_none': [None] * 10})
-    if version == 1 and pa.pandas_compat._pandas_api.uses_string_dtype():
+    if (version == 1 and pa.pandas_compat  # type: ignore[attr-defined]
+            ._pandas_api.uses_string_dtype()):
         expected = df.astype("str")
     else:
         expected = df
@@ -552,6 +566,7 @@ def test_read_columns(version):
 @pytest.mark.numpy
 def test_overwritten_file(version):
     path = random_path()
+    assert TEST_FILES is not None
     TEST_FILES.append(path)
 
     num_values = 100
@@ -585,12 +600,12 @@ def test_filelike_objects(version):
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 @pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
 def test_sparse_dataframe(version):
-    if not pa.pandas_compat._pandas_api.has_sparse:
+    if not pa.pandas_compat._pandas_api.has_sparse:  # type: ignore[attr-defined]
         pytest.skip("version of pandas does not support SparseDataFrame")
     # GH #221
     data = {'A': [0, 1, 2],
             'B': [1, 0, 1]}
-    df = pd.DataFrame(data).to_sparse(fill_value=1)
+    df = pd.DataFrame(data).to_sparse(fill_value=1)  # type: ignore[attr-defined]
     expected = df.to_dense()
     _check_pandas_roundtrip(df, expected, version=version)
 
@@ -692,8 +707,9 @@ def test_v2_lz4_default_compression():
     if not pa.Codec.is_available('lz4_frame'):
         pytest.skip("LZ4 compression support is not built in C++")
 
+    assert np is not None
     # some highly compressible data
-    t = pa.table([np.repeat(0, 100000)], names=['f0'])
+    t = pa.table([np.repeat(0, 100000)], names=['f0'])  # type: ignore[arg-type]
 
     buf = io.BytesIO()
     write_feather(t, buf)
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 9e7bb312398..1294e681be4 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -28,19 +28,21 @@
 import traceback
 import json
 from datetime import datetime
+from typing import Any
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pytest
 import pyarrow as pa
 
 from pyarrow.lib import IpcReadOptions, ReadStats, tobytes
 from pyarrow.util import find_free_port
 from pyarrow.tests import util
+from typing import TYPE_CHECKING
 
-try:
+if TYPE_CHECKING:
     from pyarrow import flight
     from pyarrow.flight import (
         FlightClient, FlightServerBase,
@@ -49,13 +51,26 @@
         ClientMiddleware, ClientMiddlewareFactory,
         FlightCallOptions,
     )
-except ImportError:
-    flight = None
-    FlightClient, FlightServerBase = object, object
-    ServerAuthHandler, ClientAuthHandler = object, object
-    ServerMiddleware, ServerMiddlewareFactory = object, object
-    ClientMiddleware, ClientMiddlewareFactory = object, object
-    FlightCallOptions = object
+else:
+    try:
+        from pyarrow import flight
+        from pyarrow.flight import (
+            FlightClient, FlightServerBase,
+            ServerAuthHandler, ClientAuthHandler,
+            ServerMiddleware, ServerMiddlewareFactory,
+            ClientMiddleware, ClientMiddlewareFactory,
+            FlightCallOptions,
+        )
+    except ImportError:
+        flight = None  # type: ignore[assignment]
+        FlightClient, FlightServerBase = object, object
+        ServerAuthHandler, ClientAuthHandler = (  # type: ignore[misc]
+            object, object)  # type: ignore[assignment]
+        ServerMiddleware, ServerMiddlewareFactory = (  # type: ignore[misc]
+            object, object)  # type: ignore[assignment]
+        ClientMiddleware, ClientMiddlewareFactory = (  # type: ignore[misc]
+            object, object)  # type: ignore[assignment]
+        # FlightCallOptions = object  # type: ignore[assignment, misc]
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not flight'
@@ -196,7 +211,7 @@ def do_put(self, context, descriptor, reader, writer):
             assert buf is not None
             client_counter, = struct.unpack('<i', buf.to_pybytes())
             assert counter == client_counter
-            writer.write(struct.pack('<i', counter))
+            writer.write(struct.pack('<i', counter))  # type: ignore[arg-type]
             counter += 1
         assert reader.stats.num_messages == 6
         assert reader.stats.num_record_batches == 5
@@ -232,7 +247,8 @@ def do_exchange(self, context, descriptor, reader, writer):
 class EchoStreamFlightServer(EchoFlightServer):
     """An echo server that streams individual record batches."""
 
-    def do_get(self, context, ticket):
+    def do_get(self, context, ticket):  # type: ignore[override]
+        assert self.last_message is not None
         return flight.GeneratorStream(
             self.last_message.schema,
             self.last_message.to_batches(max_chunksize=1024))
@@ -250,6 +266,7 @@ class EchoTableStreamFlightServer(EchoFlightServer):
     """An echo server that streams the whole table."""
 
     def do_get(self, context, ticket):
+        assert self.last_message is not None
         return flight.GeneratorStream(
             self.last_message.schema,
             [self.last_message])
@@ -267,6 +284,7 @@ class EchoRecordBatchReaderStreamFlightServer(EchoFlightServer):
     """An echo server that streams the whole table as a RecordBatchReader."""
 
     def do_get(self, context, ticket):
+        assert self.last_message is not None
         return flight.GeneratorStream(
             self.last_message.schema,
             [self.last_message.to_reader()])
@@ -304,6 +322,7 @@ def get_flight_info(self, context, descriptor):
 
     def get_schema(self, context, descriptor):
         info = self.get_flight_info(context, descriptor)
+        assert info.schema is not None
         return flight.SchemaResult(info.schema)
 
 
@@ -355,7 +374,7 @@ class InvalidStreamFlightServer(FlightServerBase):
     def do_get(self, context, ticket):
         data1 = [pa.array([-10, -5, 0, 5, 10], type=pa.int32())]
         data2 = [pa.array([-10.0, -5.0, 0.0, 5.0, 10.0], type=pa.float64())]
-        assert data1.type != data2.type
+        assert data1[0].type != data2[0].type  # type: ignore[misc]
         table1 = pa.Table.from_arrays(data1, names=['a'])
         table2 = pa.Table.from_arrays(data2, names=['a'])
         assert table1.schema == self.schema
@@ -543,7 +562,7 @@ def authenticate(self, outgoing, incoming):
             raise flight.FlightUnauthenticatedError("unknown user")
         if self.creds[auth.username] != auth.password:
             raise flight.FlightUnauthenticatedError("wrong password")
-        outgoing.write(tobytes(auth.username))
+        outgoing.write(tobytes(auth.username))  # type: ignore[arg-type]
 
     def is_valid(self, token):
         if not token:
@@ -581,7 +600,8 @@ def authenticate(self, outgoing, incoming):
         username = incoming.read()
         password = incoming.read()
         if username in self.creds and self.creds[username] == password:
-            outgoing.write(base64.b64encode(b'secret:' + username))
+            outgoing.write(
+                base64.b64encode(b'secret:' + username))  # type: ignore[arg-type]
         else:
             raise flight.FlightUnauthenticatedError(
                 "invalid username/password")
@@ -671,7 +691,8 @@ def received_headers(self, headers):
         if auth_header:
             self.factory.set_call_credential([
                 b'authorization',
-                auth_header[0].encode("utf-8")])
+                auth_header[0].encode("utf-8") if isinstance(auth_header[0], str)
+                else auth_header[0]])
 
 
 class HeaderAuthServerMiddlewareFactory(ServerMiddlewareFactory):
@@ -682,7 +703,9 @@ def start_call(self, info, headers):
             headers,
             'Authorization'
         )
-        values = auth_header[0].split(' ')
+        if auth_header:
+            values = auth_header[0].split(b' ') if isinstance(
+                auth_header[0], bytes) else auth_header[0].split(' ')
         token = ''
         error_message = 'Invalid credentials'
 
@@ -720,8 +743,9 @@ def do_action(self, context, action):
         if middleware:
             auth_header = case_insensitive_header_lookup(
                 middleware.sending_headers(), 'Authorization')
-            values = auth_header.split(' ')
-            return [values[1].encode("utf-8")]
+            if auth_header:
+                values = auth_header.split(' ')  # type: ignore[union-attr]
+                return [values[1].encode("utf-8")]  # type: ignore[misc]
         raise flight.FlightUnauthenticatedError(
             'No token auth middleware found.')
 
@@ -758,9 +782,12 @@ def do_action(self, context, action):
                 headers,
                 'test-header-2'
             )
-            value1 = header_1[0].encode("utf-8")
-            value2 = header_2[0].encode("utf-8")
-            return [value1, value2]
+            if header_1 and header_2:
+                value1 = header_1[0].encode(
+                    "utf-8") if isinstance(header_1[0], str) else header_1[0]
+                value2 = header_2[0].encode(
+                    "utf-8") if isinstance(header_2[0], str) else header_2[0]
+                return [value1, value2]
         raise flight.FlightServerError("No headers middleware found")
 
 
@@ -784,7 +811,7 @@ class HeaderFlightServer(FlightServerBase):
     def do_action(self, context, action):
         middleware = context.get_middleware("test")
         if middleware:
-            return [middleware.special_value.encode()]
+            return [middleware.special_value.encode()]  # type: ignore[attr-defined]
         return [b""]
 
 
@@ -793,8 +820,10 @@ class MultiHeaderFlightServer(FlightServerBase):
 
     def do_action(self, context, action):
         middleware = context.get_middleware("test")
-        headers = repr(middleware.client_headers).encode("utf-8")
-        return [headers]
+        if middleware:
+            headers = repr(middleware.client_headers  # type: ignore[attr-defined]
+                           ).encode("utf-8")
+            return [headers]
 
 
 class SelectiveAuthServerMiddlewareFactory(ServerMiddlewareFactory):
@@ -1139,7 +1168,7 @@ def test_client_wait_for_available():
     server = None
 
     def serve():
-        global server
+        global server  # type: ignore[unresolved-global]
         time.sleep(0.5)
         server = FlightServerBase(location)
         server.serve()
@@ -1872,7 +1901,8 @@ def test_flight_do_put_metadata():
                 writer.write_with_metadata(batch, metadata)
                 buf = metadata_reader.read()
                 assert buf is not None
-                server_idx, = struct.unpack('<i', buf.to_pybytes())
+                server_idx, = struct.unpack(
+                    '<i', buf.to_pybytes())  # type: ignore[attr-defined]
                 assert idx == server_idx
 
 
@@ -2510,7 +2540,7 @@ def test_large_metadata_client():
 
 class ActionNoneFlightServer(EchoFlightServer):
     """A server that implements a side effect to a non iterable action."""
-    VALUES = []
+    VALUES: list[Any] = []
 
     def do_action(self, context, action):
         if action.type == "get_value":
@@ -2562,14 +2592,14 @@ def do_exchange(self, context, descriptor, reader, writer):
 
         # Set a concurrent reader - ensure this doesn't block the
         # writer side from calling Close()
-        def _reader():
+        def reader_fn():  # Renamed to avoid redeclaration error
             try:
                 while True:
                     reader.read()
             except flight.FlightError:
                 return
 
-        thread = threading.Thread(target=_reader, daemon=True)
+        thread = threading.Thread(target=reader_fn, daemon=True)
         thread.start()
 
         with pytest.raises(flight.FlightCancelledError) as exc_info:
@@ -2618,7 +2648,8 @@ class TracingFlightServer(FlightServerBase):
     """A server that echoes back trace context values."""
 
     def do_action(self, context, action):
-        trace_context = context.get_middleware("tracing").trace_context
+        middleware = context.get_middleware("tracing")
+        trace_context = middleware.trace_context if middleware else {}
         # Don't turn this method into a generator since then
         # trace_context will be evaluated after we've exited the scope
         # of the OTel span (and so the value we want won't be present)
@@ -2656,6 +2687,7 @@ def test_do_put_does_not_crash_when_schema_is_none():
 
 def test_headers_trailers():
     """Ensure that server-sent headers/trailers make it through."""
+    assert flight is not None
 
     class HeadersTrailersFlightServer(FlightServerBase):
         def get_flight_info(self, context, descriptor):
@@ -2696,6 +2728,7 @@ def received_headers(self, headers):
 
 
 def test_flight_dictionary_deltas_do_exchange():
+    assert flight is not None
     expected_stats = {
         'dict_deltas': ReadStats(
             num_messages=6,
@@ -2718,6 +2751,7 @@ def do_exchange(self, context, descriptor, reader, writer):
             expected_table = simple_dicts_table()
             received_table = reader.read_all()
             assert received_table.equals(expected_table)
+            assert descriptor.command is not None
             assert reader.stats == expected_stats[descriptor.command.decode()]
             if descriptor.command == b'dict_deltas':
                 options = pa.ipc.IpcWriteOptions(emit_dictionary_deltas=True)
diff --git a/python/pyarrow/tests/test_flight_async.py b/python/pyarrow/tests/test_flight_async.py
index 197c78cc073..1e6160a1df2 100644
--- a/python/pyarrow/tests/test_flight_async.py
+++ b/python/pyarrow/tests/test_flight_async.py
@@ -24,8 +24,10 @@
 flight = pytest.importorskip("pyarrow.flight")
 pytestmark = pytest.mark.flight
 
+from pyarrow.flight import FlightServerBase  # noqa: E402
 
-class ExampleServer(flight.FlightServerBase):
+
+class ExampleServer(FlightServerBase):
     simple_info = flight.FlightInfo(
         pyarrow.schema([("a", "int32")]),
         flight.FlightDescriptor.for_command(b"simple"),
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 376398baa07..963c921c6f7 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -19,6 +19,7 @@
 import gzip
 import os
 import pathlib
+from typing import cast
 from urllib.request import urlopen
 import subprocess
 import sys
@@ -44,17 +45,19 @@
 
 
 class DummyHandler(FileSystemHandler):
+    _value: int
+
     def __init__(self, value=42):
         self._value = value
 
     def __eq__(self, other):
         if isinstance(other, FileSystemHandler):
-            return self._value == other._value
+            return self._value == other._value  # type: ignore[attr-defined]
         return NotImplemented
 
     def __ne__(self, other):
         if isinstance(other, FileSystemHandler):
-            return self._value != other._value
+            return self._value != other._value  # type: ignore[attr-defined]
         return NotImplemented
 
     def get_type_name(self):
@@ -106,7 +109,7 @@ def create_dir(self, path, recursive):
     def delete_dir(self, path):
         assert path == "delete_dir"
 
-    def delete_dir_contents(self, path, missing_dir_ok):
+    def delete_dir_contents(self, path, missing_dir_ok):  # type: ignore[override]
         if not path.strip("/"):
             raise ValueError
         assert path == "delete_dir_contents"
@@ -231,7 +234,7 @@ def gcsfs(request, gcs_server):
 
     yield dict(
         fs=fs,
-        pathfn=bucket.__add__,
+        pathfn=lambda p: bucket + p,
         allow_move_dir=False,
         allow_append_to_file=False,
     )
@@ -258,7 +261,7 @@ def s3fs(request, s3_server):
 
     yield dict(
         fs=fs,
-        pathfn=bucket.__add__,
+        pathfn=lambda p: bucket + p,
         allow_move_dir=False,
         allow_append_to_file=False,
     )
@@ -270,7 +273,7 @@ def subtree_s3fs(request, s3fs):
     prefix = 'pyarrow-filesystem/prefix/'
     return dict(
         fs=SubTreeFileSystem(prefix, s3fs['fs']),
-        pathfn=prefix.__add__,
+        pathfn=lambda p: prefix + p,
         allow_move_dir=False,
         allow_append_to_file=False,
     )
@@ -330,7 +333,7 @@ def azurefs(request, azure_server):
 
     yield dict(
         fs=fs,
-        pathfn=container.__add__,
+        pathfn=lambda p: container + p,
         allow_move_dir=True,
         allow_append_to_file=True,
     )
@@ -361,7 +364,7 @@ def py_fsspec_localfs(request, tempdir):
     fsspec = pytest.importorskip("fsspec")
     fs = fsspec.filesystem('file')
     return dict(
-        fs=PyFileSystem(FSSpecHandler(fs)),
+        fs=PyFileSystem(FSSpecHandler(fs)),  # type: ignore[abstract]
         pathfn=lambda p: (tempdir / p).as_posix(),
         allow_move_dir=True,
         allow_append_to_file=True,
@@ -376,7 +379,7 @@ def py_fsspec_memoryfs(request, tempdir):
         pytest.skip("Bug in fsspec 0.8.5 for in-memory filesystem")
     fs = fsspec.filesystem('memory')
     return dict(
-        fs=PyFileSystem(FSSpecHandler(fs)),
+        fs=PyFileSystem(FSSpecHandler(fs)),  # type: ignore[abstract]
         pathfn=lambda p: p,
         allow_move_dir=True,
         allow_append_to_file=True,
@@ -394,12 +397,12 @@ def py_fsspec_s3fs(request, s3_server):
         secret=secret_key,
         client_kwargs=dict(endpoint_url=f'http://{host}:{port}')
     )
-    fs = PyFileSystem(FSSpecHandler(fs))
+    fs = PyFileSystem(FSSpecHandler(fs))  # type: ignore[abstract]
     fs.create_dir(bucket)
 
     yield dict(
         fs=fs,
-        pathfn=bucket.__add__,
+        pathfn=lambda p: bucket + p,
         allow_move_dir=False,
         allow_append_to_file=True,
     )
@@ -601,7 +604,7 @@ def test_filesystem_equals():
     assert fs0.equals(fs0)
     assert fs0.equals(fs1)
     with pytest.raises(TypeError):
-        fs0.equals('string')
+        fs0.equals('string')  # type: ignore[arg-type]
     assert fs0 == fs0 == fs1
     assert fs0 != 4
 
@@ -778,7 +781,7 @@ def test_get_file_info_with_selector(fs, pathfn):
         infos = fs.get_file_info(selector)
         if fs.type_name == "py::fsspec+('s3', 's3a')":
             # s3fs only lists directories if they are not empty
-            len(infos) == 4
+            assert len(infos) == 4
         else:
             assert len(infos) == 5
 
@@ -1123,7 +1126,7 @@ def test_localfs_options():
     LocalFileSystem(use_mmap=False)
 
     with pytest.raises(TypeError):
-        LocalFileSystem(xxx=False)
+        LocalFileSystem(xxx=False)  # type: ignore[call-arg]
 
 
 def test_localfs_errors(localfs):
@@ -1166,7 +1169,7 @@ def test_mockfs_mtime_roundtrip(mockfs):
 
     with fs.open_output_stream('foo'):
         pass
-    [info] = fs.get_file_info(['foo'])
+    [info] = cast(list[FileInfo], fs.get_file_info(['foo']))
     assert info.mtime == dt
 
 
@@ -1437,20 +1440,24 @@ def test_s3_proxy_options(monkeypatch, pickle_module):
         S3FileSystem(proxy_options=('http', 'localhost', 9090))
     # Missing scheme
     with pytest.raises(KeyError):
-        S3FileSystem(proxy_options={'host': 'localhost', 'port': 9090})
+        S3FileSystem(proxy_options={  # type: ignore[missing-typed-dict-key]
+            'host': 'localhost', 'port': 9090})
     # Missing host
     with pytest.raises(KeyError):
-        S3FileSystem(proxy_options={'scheme': 'https', 'port': 9090})
+        S3FileSystem(proxy_options={  # type: ignore[missing-typed-dict-key]
+            'scheme': 'https', 'port': 9090})
     # Missing port
     with pytest.raises(KeyError):
-        S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost'})
+        S3FileSystem(proxy_options={  # type: ignore[missing-typed-dict-key]
+            'scheme': 'http', 'host': 'localhost'})
     # Invalid proxy URI (invalid scheme httpsB)
     with pytest.raises(pa.ArrowInvalid):
         S3FileSystem(proxy_options='httpsB://localhost:9000')
     # Invalid proxy_options dict (invalid scheme httpA)
     with pytest.raises(pa.ArrowInvalid):
-        S3FileSystem(proxy_options={'scheme': 'httpA', 'host': 'localhost',
-                                    'port': 8999})
+        S3FileSystem(proxy_options={
+            'scheme': 'httpA',  # type: ignore[typeddict-item]
+            'host': 'localhost', 'port': 8999})
 
 
 @pytest.mark.s3
@@ -1709,7 +1716,7 @@ def test_filesystem_from_uri_s3(s3_server):
     assert path == "mybucket/foo/bar"
 
     fs.create_dir(path)
-    [info] = fs.get_file_info([path])
+    [info] = cast(list[FileInfo], fs.get_file_info([path]))
     assert info.path == path
     assert info.type == FileType.Directory
 
@@ -1729,7 +1736,7 @@ def test_filesystem_from_uri_gcs(gcs_server):
     assert path == "mybucket/foo/bar"
 
     fs.create_dir(path)
-    [info] = fs.get_file_info([path])
+    [info] = cast(list[FileInfo], fs.get_file_info([path]))
     assert info.path == path
     assert info.type == FileType.Directory
 
@@ -1772,6 +1779,7 @@ def test_py_filesystem_pickling(pickle_module):
     serialized = pickle_module.dumps(fs)
     restored = pickle_module.loads(serialized)
     assert isinstance(restored, FileSystem)
+    assert isinstance(restored, PyFileSystem)
     assert restored == fs
     assert restored.handler == handler
     assert restored.type_name == "py::dummy"
@@ -1802,15 +1810,15 @@ def test_py_filesystem_get_file_info():
     handler = DummyHandler()
     fs = PyFileSystem(handler)
 
-    [info] = fs.get_file_info(['some/dir'])
+    [info] = cast(list[FileInfo], fs.get_file_info(['some/dir']))
     assert info.path == 'some/dir'
     assert info.type == FileType.Directory
 
-    [info] = fs.get_file_info(['some/file'])
+    [info] = cast(list[FileInfo], fs.get_file_info(['some/file']))
     assert info.path == 'some/file'
     assert info.type == FileType.File
 
-    [info] = fs.get_file_info(['notfound'])
+    [info] = cast(list[FileInfo], fs.get_file_info(['notfound']))
     assert info.path == 'notfound'
     assert info.type == FileType.NotFound
 
@@ -1826,7 +1834,7 @@ def test_py_filesystem_get_file_info_selector():
     fs = PyFileSystem(handler)
 
     selector = FileSelector(base_dir="somedir")
-    infos = fs.get_file_info(selector)
+    infos = cast(list[FileInfo], fs.get_file_info(selector))
     assert len(infos) == 2
     assert infos[0].path == "somedir/file1"
     assert infos[0].type == FileType.File
@@ -1836,7 +1844,7 @@ def test_py_filesystem_get_file_info_selector():
     assert infos[1].size is None
 
     selector = FileSelector(base_dir="somedir", recursive=True)
-    infos = fs.get_file_info(selector)
+    infos = cast(list[FileInfo], fs.get_file_info(selector))
     assert len(infos) == 3
     assert infos[0].path == "somedir/file1"
     assert infos[1].path == "somedir/subdir1"
@@ -1913,8 +1921,9 @@ def test_s3_real_aws():
     assert fs.region == default_region
 
     fs = S3FileSystem(anonymous=True, region='us-east-1')
-    entries = fs.get_file_info(FileSelector(
-        'arrow-datasets/nyc-taxi'))
+    entries = cast(list[FileInfo], fs.get_file_info(FileSelector(
+        'voltrondata-labs-datasets/nyc-taxi')))
+
     assert len(entries) > 0
     key = 'arrow-datasets/nyc-taxi/year=2019/month=6/part-0.parquet'
     with fs.open_input_stream(key) as f:
@@ -1931,6 +1940,8 @@ def test_s3_real_aws_region_selection():
     # Taken from a registry of open S3-hosted datasets
     # at https://github.com/awslabs/open-data-registry
     fs, path = FileSystem.from_uri('s3://mf-nwp-models/README.txt')
+    from pyarrow.fs import S3FileSystem
+    assert isinstance(fs, S3FileSystem)
     assert fs.region == 'eu-west-1'
     with fs.open_input_stream(path) as f:
         assert b"Meteo-France Atmospheric models on AWS" in f.read(50)
@@ -1938,6 +1949,8 @@ def test_s3_real_aws_region_selection():
     # Passing an explicit region disables auto-selection
     fs, path = FileSystem.from_uri(
         's3://mf-nwp-models/README.txt?region=us-east-2')
+    from pyarrow.fs import S3FileSystem
+    assert isinstance(fs, S3FileSystem)
     assert fs.region == 'us-east-2'
     # Reading from the wrong region may still work for public buckets...
 
@@ -1948,6 +1961,8 @@ def test_s3_real_aws_region_selection():
     with pytest.raises(IOError, match="Bucket '.*' not found"):
         FileSystem.from_uri('s3://x-arrow..nonexistent-bucket')
     fs, path = FileSystem.from_uri('s3://x-arrow-nonexistent-bucket?region=us-east-3')
+    from pyarrow.fs import S3FileSystem
+    assert isinstance(fs, S3FileSystem)
     assert fs.region == 'us-east-3'
 
     # allow_delayed_open has a side-effect of delaying errors until I/O is performed.
@@ -2188,13 +2203,16 @@ def test_uwsgi_integration():
 
 def test_fsspec_filesystem_from_uri():
     try:
-        from fsspec.implementations.local import LocalFileSystem
-        from fsspec.implementations.memory import MemoryFileSystem
+        from fsspec.implementations.local import (  # type: ignore[import-untyped]
+            LocalFileSystem)
+        from fsspec.implementations.memory import (  # type: ignore[import-untyped]
+            MemoryFileSystem)
     except ImportError:
         pytest.skip("fsspec not installed")
 
     fs, path = FileSystem.from_uri("fsspec+memory://path/to/data.parquet")
-    expected_fs = PyFileSystem(FSSpecHandler(MemoryFileSystem()))
+    expected_fs = PyFileSystem(FSSpecHandler(
+        MemoryFileSystem()))  # type: ignore[abstract]
     assert fs == expected_fs
     assert path == "/path/to/data.parquet"
 
@@ -2202,7 +2220,8 @@ def test_fsspec_filesystem_from_uri():
     # arrow local filesystem
     uri = "file:///tmp/my.file"
     fs, _ = FileSystem.from_uri(f"fsspec+{uri}")
-    expected_fs = PyFileSystem(FSSpecHandler(LocalFileSystem()))
+    expected_fs = PyFileSystem(FSSpecHandler(
+        LocalFileSystem()))  # type: ignore[abstract]
     assert fs == expected_fs
 
 
@@ -2212,7 +2231,7 @@ def test_fsspec_delete_root_dir_contents():
     except ImportError:
         pytest.skip("fsspec not installed")
 
-    fs = FSSpecHandler(MemoryFileSystem())
+    fs = FSSpecHandler(MemoryFileSystem())  # type: ignore[abstract]
 
     # Create some files and directories
     fs.create_dir("test_dir", recursive=True)
@@ -2226,7 +2245,7 @@ def test_fsspec_delete_root_dir_contents():
 
     # Verify files exist before deletion
     def get_type(path):
-        return fs.get_file_info([path])[0].type
+        return cast(list[FileInfo], fs.get_file_info([path]))[0].type
 
     assert get_type("test_file.txt") == FileType.File
     assert get_type("test_dir") == FileType.Directory
@@ -2244,13 +2263,13 @@ def get_type(path):
 def test_huggingface_filesystem_from_uri():
     pytest.importorskip("fsspec")
     try:
-        from huggingface_hub import HfFileSystem
+        from huggingface_hub import HfFileSystem  # type: ignore[import-not-found]
     except ImportError:
         pytest.skip("huggingface_hub not installed")
 
     fs, path = FileSystem.from_uri(
         "hf://datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet"
     )
-    expected_fs = PyFileSystem(FSSpecHandler(HfFileSystem()))
+    expected_fs = PyFileSystem(FSSpecHandler(HfFileSystem()))  # type: ignore[abstract]
     assert fs == expected_fs
     assert path == "datasets/stanfordnlp/imdb/plain_text/train-00000-of-00001.parquet"
diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py
index 80d119a4853..01fc6f032d5 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -174,9 +174,12 @@ def test_in_expr_todo():
     assert result.to_array().equals(pa.array([1, 2], type=pa.uint32()))
 
     # timestamp
-    datetime_1 = datetime.datetime.utcfromtimestamp(1542238951.621877)
-    datetime_2 = datetime.datetime.utcfromtimestamp(1542238911.621877)
-    datetime_3 = datetime.datetime.utcfromtimestamp(1542238051.621877)
+    datetime_1 = datetime.datetime.fromtimestamp(
+        1542238951.621877, tz=datetime.timezone.utc)
+    datetime_2 = datetime.datetime.fromtimestamp(
+        1542238911.621877, tz=datetime.timezone.utc)
+    datetime_3 = datetime.datetime.fromtimestamp(
+        1542238051.621877, tz=datetime.timezone.utc)
 
     arr = pa.array([datetime_1, datetime_2, datetime_3])
     table = pa.Table.from_arrays([arr], ["a"])
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 912953ae60d..50d81b686ac 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -101,6 +101,8 @@ def wait_until_ready(self):
         Record output until the gdb prompt displays.  Return recorded output.
         """
         # TODO: add timeout?
+        assert self.proc is not None
+        assert self.proc.stdout is not None
         while (not self.last_stdout_line.startswith(b"(gdb) ") and
                self.proc.poll() is None):
             block = self.proc.stdout.read(4096)
@@ -125,6 +127,8 @@ def wait_until_ready(self):
         return out
 
     def issue_command(self, line):
+        assert self.proc is not None
+        assert self.proc.stdin is not None
         line = line.encode('utf-8') + b"\n"
         if self.verbose:
             sys.stdout.buffer.write(line)
@@ -158,6 +162,7 @@ def select_frame(self, func_name):
         m = re.search(pat, out)
         if m is None:
             pytest.fail(f"Could not select frame for function {func_name}")
+            return  # Never reached, but helps type checker
 
         frame_num = int(m[1])
         out = self.run_command(f"frame {frame_num}")
@@ -165,6 +170,8 @@ def select_frame(self, func_name):
 
     def join(self):
         if self.proc is not None:
+            assert self.proc.stdin is not None
+            assert self.proc.stdout is not None
             self.proc.stdin.close()
             self.proc.stdout.close()  # avoid ResourceWarning
             self.proc.kill()
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index a6d3546e57c..3837b553b8b 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -24,16 +24,17 @@
 import math
 import os
 import pathlib
-import pytest
+import pytest  # type: ignore[import-not-found]
 import random
 import sys
 import tempfile
+from typing import cast
 import weakref
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 from pyarrow.util import guid
 from pyarrow import Codec
@@ -44,7 +45,7 @@ def check_large_seeks(file_factory, expected_error=None):
     if sys.platform in ('win32', 'darwin', 'emscripten'):
         pytest.skip("need sparse file support")
     try:
-        filename = tempfile.mktemp(prefix='test_io')
+        filename = tempfile.mkstemp(prefix='test_io')[1]
         with open(filename, 'wb') as f:
             f.truncate(2 ** 32 + 10)
             f.seek(2 ** 32 + 5)
@@ -234,7 +235,7 @@ def read_buffer(self, nbytes):
             return memoryview(dst_buf)[:nbytes]
 
     duck_reader = DuckReader()
-    with pa.PythonFile(duck_reader, mode='r') as f:
+    with pa.PythonFile(duck_reader, mode='r') as f:  # type: ignore[arg-type]
         buf = f.read_buffer(length)
         assert len(buf) == length
         assert memoryview(buf).tobytes() == dst_buf[:length]
@@ -474,7 +475,7 @@ def test_buffer_to_numpy():
     byte_array = bytearray(20)
     byte_array[0] = 42
     buf = pa.py_buffer(byte_array)
-    array = np.frombuffer(buf, dtype="uint8")
+    array = np.frombuffer(buf, dtype="uint8")  # type: ignore[arg-type]
     assert array[0] == byte_array[0]
     byte_array[0] += 1
     assert array[0] == byte_array[0]
@@ -557,7 +558,7 @@ def test_buffer_eq_bytes():
     assert buf != b'some dat1'
 
     with pytest.raises(TypeError):
-        buf == 'some data'
+        _ = buf == 'some data'
 
 
 def test_buffer_getitem():
@@ -598,22 +599,22 @@ def test_buffer_slicing():
 
     with pytest.raises(IndexError):
         buf.slice(len(buf) + 1)
-    assert buf[11:].to_pybytes() == b""
+    assert cast(pa.Buffer, buf[11:]).to_pybytes() == b""
 
     # Slice stop exceeds buffer length
     with pytest.raises(IndexError):
         buf.slice(1, len(buf))
-    assert buf[1:11].to_pybytes() == buf.to_pybytes()[1:]
+    assert cast(pa.Buffer, buf[1:11]).to_pybytes() == buf.to_pybytes()[1:]
 
     # Negative length
     with pytest.raises(IndexError):
         buf.slice(1, -1)
 
     # Test slice notation
-    assert buf[2:].equals(buf.slice(2))
-    assert buf[2:5].equals(buf.slice(2, 3))
-    assert buf[-5:].equals(buf.slice(len(buf) - 5))
-    assert buf[-5:-2].equals(buf.slice(len(buf) - 5, 3))
+    assert cast(pa.Buffer, buf[2:]).equals(buf.slice(2))
+    assert cast(pa.Buffer, buf[2:5]).equals(buf.slice(2, 3))
+    assert cast(pa.Buffer, buf[-5:]).equals(buf.slice(len(buf) - 5))
+    assert cast(pa.Buffer, buf[-5:-2]).equals(buf.slice(len(buf) - 5, 3))
 
     with pytest.raises(IndexError):
         buf[::-1]
@@ -623,7 +624,8 @@ def test_buffer_slicing():
     n = len(buf)
     for start in range(-n * 2, n * 2):
         for stop in range(-n * 2, n * 2):
-            assert buf[start:stop].to_pybytes() == buf.to_pybytes()[start:stop]
+            assert cast(pa.Buffer, buf[start:stop]).to_pybytes(
+            ) == buf.to_pybytes()[start:stop]
 
 
 def test_buffer_hashing():
@@ -640,7 +642,7 @@ def test_buffer_protocol_respects_immutability():
     # immutable
     a = b'12345'
     arrow_ref = pa.py_buffer(a)
-    numpy_ref = np.frombuffer(arrow_ref, dtype=np.uint8)
+    numpy_ref = np.frombuffer(arrow_ref, dtype=np.uint8)  # type: ignore[arg-type]
     assert not numpy_ref.flags.writeable
 
 
@@ -652,7 +654,8 @@ def test_foreign_buffer():
     buf = pa.foreign_buffer(addr, size, obj)
     wr = weakref.ref(obj)
     del obj
-    assert np.frombuffer(buf, dtype=np.int32).tolist() == [1, 2]
+    assert (np.frombuffer(buf, dtype=np.int32).tolist()  # type: ignore[arg-type]
+            == [1, 2])
     assert wr() is not None
     del buf
     assert wr() is None
@@ -688,6 +691,7 @@ def test_non_cpu_buffer(pickle_module):
     cuda_buf = ctx.buffer_from_data(data)
     arr = pa.FixedSizeBinaryArray.from_buffers(pa.binary(7), 1, [None, cuda_buf])
     buf_on_gpu = arr.buffers()[1]
+    assert buf_on_gpu is not None
 
     assert buf_on_gpu.size == cuda_buf.size
     assert buf_on_gpu.address == cuda_buf.address
@@ -708,7 +712,7 @@ def test_non_cpu_buffer(pickle_module):
     assert cuda_sliced.to_pybytes() == b'st'
 
     # Sliced buffers with same address
-    assert buf_on_gpu_sliced.equals(cuda_buf[2:4])
+    assert cast(pa.Buffer, buf_on_gpu_sliced).equals(cuda_buf[2:4])
 
     # Buffers on different devices
     msg_device = "Device on which the data resides differs between buffers"
@@ -720,13 +724,14 @@ def test_non_cpu_buffer(pickle_module):
     arr_short = np.array([b'sting'])
     cuda_buf_short = ctx.buffer_from_data(arr_short)
     with pytest.raises(NotImplementedError, match=msg):
-        buf_on_gpu_sliced.equals(cuda_buf_short)
+        cast(pa.Buffer, buf_on_gpu_sliced).equals(cuda_buf_short)
     arr_short = pa.FixedSizeBinaryArray.from_buffers(
         pa.binary(5), 1, [None, cuda_buf_short]
     )
     buf_on_gpu_short = arr_short.buffers()[1]
+    assert buf_on_gpu_short is not None
     with pytest.raises(NotImplementedError, match=msg):
-        buf_on_gpu_sliced.equals(buf_on_gpu_short)
+        cast(pa.Buffer, buf_on_gpu_sliced).equals(buf_on_gpu_short)
 
     with pytest.raises(NotImplementedError, match=msg):
         buf_on_gpu.hex()
@@ -811,8 +816,9 @@ def test_cache_options_pickling(pickle_module):
 
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
-    pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("bz2", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     ),
     "brotli",
     "gzip",
@@ -843,6 +849,7 @@ def test_compress_decompress(compression):
 
     assert isinstance(decompressed_bytes, bytes)
 
+    assert isinstance(decompressed_buf, pa.Buffer)
     assert decompressed_buf.equals(test_buf)
     assert decompressed_bytes == test_data
 
@@ -852,8 +859,9 @@ def test_compress_decompress(compression):
 
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
-    pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("bz2", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     ),
     "brotli",
     "gzip",
@@ -910,6 +918,7 @@ def test_compression_level(compression):
 
         assert isinstance(decompressed_bytes, bytes)
 
+        assert isinstance(decompressed_buf, pa.Buffer)
         assert decompressed_buf.equals(test_buf)
         assert decompressed_bytes == test_data
 
@@ -951,12 +960,12 @@ def test_buffer_memoryview_is_immutable():
     assert result.readonly
 
     with pytest.raises(TypeError) as exc:
-        result[0] = b'h'
+        result[0] = b'h'  # type: ignore[index]
         assert 'cannot modify read-only' in str(exc.value)
 
     b = bytes(buf)
     with pytest.raises(TypeError) as exc:
-        b[0] = b'h'
+        b[0] = b'h'  # type: ignore[index]
         assert 'cannot modify read-only' in str(exc.value)
 
 
@@ -1748,9 +1757,9 @@ def test_unknown_compression_raises():
     "gzip",
     "lz4",
     "zstd",
-    pytest.param(
-        "snappy",
-        marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("snappy", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     )
 ])
 def test_compressed_roundtrip(compression):
@@ -2021,7 +2030,7 @@ def test_input_stream_native_file():
 def test_input_stream_errors(tmpdir):
     buf = memoryview(b"")
     with pytest.raises(ValueError):
-        pa.input_stream(buf, compression="foo")
+        pa.input_stream(buf, compression="foo")  # type: ignore[reportArgumentType]
 
     for arg in [bytearray(), StringIO()]:
         with pytest.raises(TypeError):
@@ -2198,7 +2207,7 @@ def check_data(data, **kwargs):
 def test_output_stream_errors(tmpdir):
     buf = memoryview(bytearray())
     with pytest.raises(ValueError):
-        pa.output_stream(buf, compression="foo")
+        pa.output_stream(buf, compression="foo")  # type: ignore[reportArgumentType]
 
     for arg in [bytearray(), StringIO()]:
         with pytest.raises(TypeError):
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index b4db9cd0875..0a096041bae 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -24,23 +24,27 @@
 import socket
 import threading
 import weakref
+from typing import TYPE_CHECKING, cast
 
-try:
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+    import pandas as pd
+    from pandas.testing import assert_frame_equal
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        pass
+    try:
+        from pandas.testing import assert_frame_equal
+        import pandas as pd
+    except ImportError:
+        pass
 
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
 
 
-try:
-    from pandas.testing import assert_frame_equal
-    import pandas as pd
-except ImportError:
-    pass
-
-
 class IpcFixture:
     write_stats = None
 
@@ -48,6 +52,9 @@ def __init__(self, sink_factory=lambda: io.BytesIO()):
         self._sink_factory = sink_factory
         self.sink = self.get_sink()
 
+    def _get_writer(self, sink, schema):
+        ...  # Implemented in subclasses
+
     def get_sink(self):
         return self._sink_factory()
 
@@ -59,6 +66,7 @@ def write_batches(self, num_batches=5, as_table=False):
         schema = pa.schema([('one', pa.float64()), ('two', pa.utf8())])
 
         writer = self._get_writer(self.sink, schema)
+        assert writer is not None
 
         batches = []
         for i in range(num_batches):
@@ -385,7 +393,8 @@ def test_stream_write_table_batches(stream_fixture):
         'one': np.random.randn(20),
     })
 
-    b1 = pa.RecordBatch.from_pandas(df[:10], preserve_index=False)
+    b1 = pa.RecordBatch.from_pandas(
+        df[:10], preserve_index=False)  # type: ignore[arg-type]
     b2 = pa.RecordBatch.from_pandas(df, preserve_index=False)
 
     table = pa.Table.from_batches([b1, b2, b1])
@@ -929,7 +938,7 @@ def test_ipc_file_stream_has_eos():
     buffer = sink.getvalue()
 
     # skip the file magic
-    reader = pa.ipc.open_stream(buffer[8:])
+    reader = pa.ipc.open_stream(cast(pa.Buffer, buffer[8:]))
 
     # will fail if encounters footer data instead of eos
     rdf = reader.read_pandas()
@@ -968,7 +977,8 @@ def test_batches_with_custom_metadata_roundtrip(ipc_type):
 
     with file_factory(sink, batch.schema) as writer:
         for i in range(batch_count):
-            writer.write_batch(batch, custom_metadata={"batch_id": str(i)})
+            writer.write_batch(batch, custom_metadata={  # type: ignore[arg-type]
+                "batch_id": str(i)})
         # write a batch without custom metadata
         writer.write_batch(batch)
 
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index c3f9fe333bd..c0b6b8ecd0d 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -23,11 +23,16 @@
 import json
 import string
 import unittest
+from typing import TYPE_CHECKING
 
-try:
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        pass
+
 import pytest
 
 import pyarrow as pa
@@ -317,6 +322,9 @@ def test_stress_block_sizes(self):
 
 class BaseTestJSONRead(BaseTestJSON):
 
+    def read_json(self, *args, **kwargs) -> pa.Table:  # type: ignore[empty-body]
+        ...  # Implemented in subclasses
+
     def read_bytes(self, b, **kwargs):
         return self.read_json(pa.py_buffer(b), **kwargs)
 
@@ -352,6 +360,8 @@ def test_reconcile_across_blocks(self):
 
 
 class BaseTestStreamingJSONRead(BaseTestJSON):
+    use_threads: bool = False  # Set by subclasses
+
     def open_json(self, json, *args, **kwargs):
         """
         Reads the JSON file into memory using pyarrow's open_json
diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py
index d2ba780efc7..b5d4e74f126 100644
--- a/python/pyarrow/tests/test_jvm.py
+++ b/python/pyarrow/tests/test_jvm.py
@@ -38,11 +38,13 @@ def root_allocator():
         arrow_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..')
     pom_path = os.path.join(arrow_dir, 'java', 'pom.xml')
     tree = ET.parse(pom_path)
-    version = tree.getroot().find(
+    version_element = tree.getroot().find(
         'POM:version',
         namespaces={
             'POM': 'http://maven.apache.org/POM/4.0.0'
-        }).text
+        })
+    assert version_element is not None
+    version = version_element.text
     jar_path = os.path.join(
         arrow_dir, 'java', 'tools', 'target',
         f'arrow-tools-{version}-jar-with-dependencies.jar')
@@ -76,8 +78,8 @@ def test_jvm_buffer(root_allocator):
 
 
 def test_jvm_buffer_released(root_allocator):
-    import jpype.imports  # noqa
-    from java.lang import IllegalArgumentException
+    import jpype.imports  # type: ignore[import-untyped, import-not-found] # noqa
+    from java.lang import IllegalArgumentException  # type: ignore[import-not-found]
 
     jvm_buffer = root_allocator.buffer(8)
     jvm_buffer.release()
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index 27154a6f34f..d0e61d758cb 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -77,7 +77,7 @@ def fix_example_values(actual_cols, expected_cols):
                 if not pd.isnull(v):
                     exp = d.as_tuple().exponent
                     factor = 10 ** -exp
-                    converted_decimals[i] = (
+                    converted_decimals[i] = (  # type: ignore[call-overload,assignment]
                         decimal.Decimal(round(v * factor)).scaleb(exp))
             expected = pd.Series(converted_decimals)
 
@@ -314,7 +314,7 @@ def test_buffer_readwrite():
     # deprecated keyword order
     buffer_output_stream = pa.BufferOutputStream()
     with pytest.warns(FutureWarning):
-        orc.write_table(buffer_output_stream, table)
+        orc.write_table(buffer_output_stream, table)  # type: ignore[arg-type]
     buffer_reader = pa.BufferReader(buffer_output_stream.getvalue())
     orc_file = orc.ORCFile(buffer_reader)
     output_table = orc_file.read()
@@ -356,8 +356,8 @@ def test_buffer_readwrite_with_writeoptions():
     buffer_output_stream = pa.BufferOutputStream()
     with pytest.warns(FutureWarning):
         orc.write_table(
-            buffer_output_stream,
-            table,
+            buffer_output_stream,  # type: ignore[reportArgumentType]
+            table,  # type: ignore[reportArgumentType]
             compression='uncompressed',
             file_version='0.11',
             row_index_stride=20000,
@@ -444,20 +444,20 @@ def test_buffer_readwrite_with_bad_writeoptions():
         orc.write_table(
             table,
             buffer_output_stream,
-            compression=0,
+            compression=0,  # type: ignore[reportArgumentType]
         )
 
     with pytest.raises(ValueError):
         orc.write_table(
             table,
             buffer_output_stream,
-            compression='none',
+            compression='none',  # type: ignore[reportArgumentType]
         )
     with pytest.raises(ValueError):
         orc.write_table(
             table,
             buffer_output_stream,
-            compression='zlid',
+            compression='zlid',  # type: ignore[reportArgumentType]
         )
 
     # compression_block_size must be a positive integer
@@ -487,20 +487,20 @@ def test_buffer_readwrite_with_bad_writeoptions():
         orc.write_table(
             table,
             buffer_output_stream,
-            compression_strategy=0,
+            compression_strategy=0,  # type: ignore[reportArgumentType]
         )
 
     with pytest.raises(ValueError):
         orc.write_table(
             table,
             buffer_output_stream,
-            compression_strategy='no',
+            compression_strategy='no',  # type: ignore[reportArgumentType]
         )
     with pytest.raises(ValueError):
         orc.write_table(
             table,
             buffer_output_stream,
-            compression_strategy='large',
+            compression_strategy='large',  # type: ignore[reportArgumentType]
         )
 
     # row_index_stride must be a positive integer
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 7f9b04eaabd..b151ef4a80b 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -28,37 +28,34 @@
 import hypothesis as h
 import hypothesis.strategies as st
 import pytest
-try:
-    import numpy as np
-    import numpy.testing as npt
-    try:
-        _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
-    except AttributeError:
-        from numpy.exceptions import (
-            VisibleDeprecationWarning as _np_VisibleDeprecationWarning
-        )
-except ImportError:
-    np = None
 
+import pyarrow as pa
 from pyarrow.pandas_compat import get_logical_type, _pandas_api
 from pyarrow.tests.util import invoke_script, random_ascii, rands
 import pyarrow.tests.strategies as past
 import pyarrow.tests.util as test_util
 from pyarrow.vendored.version import Version
 
-import pyarrow as pa
 try:
     from pyarrow import parquet as pq
 except ImportError:
     pass
 
-try:
-    import pandas as pd
-    import pandas.testing as tm
-    from .pandas_examples import dataframe_with_arrays, dataframe_with_lists
-except ImportError:
-    pass
+pd = pytest.importorskip("pandas")
+np = pytest.importorskip("numpy")
+
+import numpy.testing as npt  # noqa: E402
+import pandas.testing as tm  # noqa: E402
+from .pandas_examples import dataframe_with_arrays, dataframe_with_lists  # noqa: E402
 
+try:
+    _np_VisibleDeprecationWarning = (
+        np.VisibleDeprecationWarning  # type: ignore[attr-defined]
+    )
+except AttributeError:
+    from numpy.exceptions import (
+        VisibleDeprecationWarning as _np_VisibleDeprecationWarning
+    )
 
 # Marks all of the tests in this module
 pytestmark = pytest.mark.pandas
@@ -77,14 +74,10 @@ def _alltypes_example(size=100):
         'float32': np.arange(size, dtype=np.float32),
         'float64': np.arange(size, dtype=np.float64),
         'bool': np.random.randn(size) > 0,
-        'datetime[s]': np.arange("2016-01-01T00:00:00.001", size,
-                                 dtype='datetime64[s]'),
-        'datetime[ms]': np.arange("2016-01-01T00:00:00.001", size,
-                                  dtype='datetime64[ms]'),
-        'datetime[us]': np.arange("2016-01-01T00:00:00.001", size,
-                                  dtype='datetime64[us]'),
-        'datetime[ns]': np.arange("2016-01-01T00:00:00.001", size,
-                                  dtype='datetime64[ns]'),
+        'datetime[s]': pd.date_range("2016-01-01T00:00:00.001", periods=size, freq='s').values,
+        'datetime[ms]': pd.date_range("2016-01-01T00:00:00.001", periods=size, freq='ms').values,
+        'datetime[us]': pd.date_range("2016-01-01T00:00:00.001", periods=size, freq='us').values,
+        'datetime[ns]': pd.date_range("2016-01-01T00:00:00.001", periods=size, freq='ns').values,
         'timedelta64[s]': np.arange(0, size, dtype='timedelta64[s]'),
         'timedelta64[ms]': np.arange(0, size, dtype='timedelta64[ms]'),
         'timedelta64[us]': np.arange(0, size, dtype='timedelta64[us]'),
@@ -98,7 +91,7 @@ def _alltypes_example(size=100):
 def _check_pandas_roundtrip(df, expected=None, use_threads=False,
                             expected_schema=None,
                             check_dtype=True, schema=None,
-                            preserve_index=False,
+                            preserve_index: bool | None = False,
                             as_batch=False):
     klass = pa.RecordBatch if as_batch else pa.Table
     table = klass.from_pandas(df, schema=schema,
@@ -714,7 +707,7 @@ def test_mismatch_metadata_schema(self):
         # OPTION 1: casting after conversion
         table = pa.Table.from_pandas(df)
         # cast the "datetime" column to be tz-aware
-        new_col = table["datetime"].cast(pa.timestamp('ns', tz="UTC"))
+        new_col = table.column(0).cast(pa.timestamp('ns', tz="UTC"))
         new_table1 = table.set_column(
             0, pa.field("datetime", new_col.type), new_col
         )
@@ -982,7 +975,7 @@ def test_float_with_null_as_integer(self):
             schema = pa.schema([pa.field('has_nulls', ty)])
             result = pa.Table.from_pandas(df, schema=schema,
                                           preserve_index=False)
-            assert result[0].chunk(0).equals(expected)
+            assert result.column(0).chunk(0).equals(expected)
 
     def test_int_object_nulls(self):
         arr = np.array([None, 1, np.int64(3)] * 5, dtype=object)
@@ -1144,7 +1137,7 @@ def test_python_datetime(self):
         })
 
         table = pa.Table.from_pandas(df)
-        assert isinstance(table[0].chunk(0), pa.TimestampArray)
+        assert isinstance(table.column(0).chunk(0), pa.TimestampArray)
 
         result = table.to_pandas()
         # Pandas v2 defaults to [ns], but Arrow defaults to [us] time units
@@ -1201,7 +1194,7 @@ class MyDatetime(datetime):
         df = pd.DataFrame({"datetime": pd.Series(date_array, dtype=object)})
 
         table = pa.Table.from_pandas(df)
-        assert isinstance(table[0].chunk(0), pa.TimestampArray)
+        assert isinstance(table.column(0).chunk(0), pa.TimestampArray)
 
         result = table.to_pandas()
 
@@ -1225,7 +1218,7 @@ class MyDate(date):
         df = pd.DataFrame({"date": pd.Series(date_array, dtype=object)})
 
         table = pa.Table.from_pandas(df)
-        assert isinstance(table[0].chunk(0), pa.Date32Array)
+        assert isinstance(table.column(0).chunk(0), pa.Date32Array)
 
         result = table.to_pandas()
         expected_df = pd.DataFrame(
@@ -1737,7 +1730,7 @@ def test_bytes_to_binary(self):
         df = pd.DataFrame({'strings': values})
 
         table = pa.Table.from_pandas(df)
-        assert table[0].type == pa.binary()
+        assert table.column(0).type == pa.binary()
 
         values2 = [b'qux', b'foo', None, b'barz', b'qux', None]
         expected = pd.DataFrame({'strings': values2})
@@ -1758,7 +1751,7 @@ def test_bytes_exceed_2gb(self):
         arr = None
 
         table = pa.Table.from_pandas(df)
-        assert table[0].num_chunks == 2
+        assert table.column(0).num_chunks == 2
 
     @pytest.mark.large_memory
     @pytest.mark.parametrize('char', ['x', b'x'])
@@ -1900,13 +1893,13 @@ def test_table_str_to_categorical_without_na(self, string_type):
                             zero_copy_only=True)
 
         # chunked array
-        result = table["strings"].to_pandas(strings_to_categorical=True)
+        result = table.column("strings").to_pandas(strings_to_categorical=True)
         expected = pd.Series(pd.Categorical(values), name="strings")
         tm.assert_series_equal(result, expected)
 
         with pytest.raises(pa.ArrowInvalid):
-            table["strings"].to_pandas(strings_to_categorical=True,
-                                       zero_copy_only=True)
+            table.column("strings").to_pandas(strings_to_categorical=True,
+                                              zero_copy_only=True)
 
     @pytest.mark.parametrize(
         "string_type", [pa.string(), pa.large_string(), pa.string_view()]
@@ -1927,13 +1920,13 @@ def test_table_str_to_categorical_with_na(self, string_type):
                             zero_copy_only=True)
 
         # chunked array
-        result = table["strings"].to_pandas(strings_to_categorical=True)
+        result = table.column("strings").to_pandas(strings_to_categorical=True)
         expected = pd.Series(pd.Categorical(values), name="strings")
         tm.assert_series_equal(result, expected)
 
         with pytest.raises(pa.ArrowInvalid):
-            table["strings"].to_pandas(strings_to_categorical=True,
-                                       zero_copy_only=True)
+            table.column("strings").to_pandas(strings_to_categorical=True,
+                                              zero_copy_only=True)
 
     # Regression test for ARROW-2101
     def test_array_of_bytes_to_strings(self):
@@ -2515,7 +2508,7 @@ def test_auto_chunking_on_list_overflow(self):
         table = pa.Table.from_pandas(df)
         table.validate(full=True)
 
-        column_a = table[0]
+        column_a = table.column(0)
         assert column_a.num_chunks == 2
         assert len(column_a.chunk(0)) == 2**21 - 1
         assert len(column_a.chunk(1)) == 1
@@ -3159,9 +3152,8 @@ def test_strided_data_import(self):
         boolean_objects[5] = None
         cases.append(boolean_objects)
 
-        cases.append(np.arange("2016-01-01T00:00:00.001", N * K,
-                               dtype='datetime64[ms]')
-                     .reshape(N, K).copy())
+        cases.append(pd.date_range("2016-01-01T00:00:00.001", periods=N * K, freq='ms')
+                     .values.reshape(N, K).copy())
 
         strided_mask = (random_numbers > 0).astype(bool)[:, 0]
 
@@ -3775,8 +3767,8 @@ def test_recordbatchlist_to_pandas():
 def test_recordbatch_table_pass_name_to_pandas():
     rb = pa.record_batch([pa.array([1, 2, 3, 4])], names=['a0'])
     t = pa.table([pa.array([1, 2, 3, 4])], names=['a0'])
-    assert rb[0].to_pandas().name == 'a0'
-    assert t[0].to_pandas().name == 'a0'
+    assert rb.column(0).to_pandas().name == 'a0'
+    assert t.column(0).to_pandas().name == 'a0'
 
 
 # ----------------------------------------------------------------------
@@ -4314,13 +4306,13 @@ def test_array_protocol():
     # default conversion
     result = pa.table(df)
     expected = pa.array([1, 2, None], pa.int64())
-    assert result[0].chunk(0).equals(expected)
+    assert result.column(0).chunk(0).equals(expected)
 
     # with specifying schema
     schema = pa.schema([('a', pa.float64())])
     result = pa.table(df, schema=schema)
     expected2 = pa.array([1, 2, None], pa.float64())
-    assert result[0].chunk(0).equals(expected2)
+    assert result.column(0).chunk(0).equals(expected2)
 
     # pass Series to pa.array
     result = pa.array(df['a'])
@@ -4450,7 +4442,7 @@ def __init__(self):
     def __arrow_ext_serialize__(self):
         return b''
 
-    def to_pandas_dtype(self):
+    def to_pandas_dtype(self):  # type: ignore[override]
         return pd.Int64Dtype()
 
 
@@ -4550,7 +4542,7 @@ def test_array_to_pandas():
         expected = pd.Series(arr)
         tm.assert_series_equal(result, expected)
 
-        result = pa.table({"col": arr})["col"].to_pandas()
+        result = pa.table({"col": arr}).column("col").to_pandas()
         expected = pd.Series(arr, name="col")
         tm.assert_series_equal(result, expected)
 
@@ -4609,7 +4601,6 @@ def test_array_to_pandas_types_mapper():
     assert result.dtype == np.dtype("int64")
 
 
-@pytest.mark.pandas
 def test_chunked_array_to_pandas_types_mapper():
     # https://issues.apache.org/jira/browse/ARROW-9664
     if Version(pd.__version__) < Version("1.2.0"):
@@ -5100,7 +5091,7 @@ def test_roundtrip_nested_map_array_with_pydicts_sliced():
 
     ty = pa.list_(pa.map_(pa.string(), pa.list_(pa.string())))
 
-    def assert_roundtrip(series: pd.Series, data) -> None:
+    def assert_roundtrip(series, data):
         array_roundtrip = pa.chunked_array(pa.Array.from_pandas(series, type=ty))
         array_roundtrip.validate(full=True)
         assert data.equals(array_roundtrip)
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 65f0c608136..20a33a382e4 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -20,11 +20,12 @@
 import pytest
 import weakref
 from collections.abc import Sequence, Mapping
+from typing import cast
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -68,7 +69,7 @@
      pa.Time32Scalar),
     (datetime.datetime.now().time(), None, pa.Time64Scalar),
     (datetime.timedelta(days=1), None, pa.DurationScalar),
-    (pa.MonthDayNano([1, -1, -10100]), None,
+    (pa.MonthDayNano([1, -1, -10100]), None,  # type: ignore[call-arg, arg-type]
      pa.MonthDayNanoIntervalScalar),
     ({'a': 1, 'b': [1, 2]}, None, pa.StructScalar),
     ([('a', 1), ('b', 2)], pa.map_(pa.string(), pa.int8()), pa.MapScalar),
@@ -360,7 +361,8 @@ def test_time_from_datetime_time():
 def test_temporal_values(value, time_type: pa.DataType):
     time_scalar = pa.scalar(value, type=time_type)
     time_scalar.validate(full=True)
-    assert time_scalar.value == value
+    assert (time_scalar.value  # type: ignore[union-attr, reportAttributeAccessIssue]
+            == value)
 
 
 def test_cast():
@@ -422,7 +424,9 @@ def test_timestamp():
         expected = pd.Timestamp('2000-01-01 12:34:56')
 
         assert arrow_arr[0].as_py() == expected
-        assert arrow_arr[0].value * 1000**i == expected.value
+        value = cast(pa.TimestampScalar, arrow_arr[0]).value
+        assert value is not None
+        assert value * 1000**i == expected.value
 
         tz = 'America/New_York'
         arrow_type = pa.timestamp(unit, tz=tz)
@@ -434,7 +438,9 @@ def test_timestamp():
                     .tz_convert(tz))
 
         assert arrow_arr[0].as_py() == expected
-        assert arrow_arr[0].value * 1000**i == expected.value
+        value = cast(pa.TimestampScalar, arrow_arr[0]).value
+        assert value is not None
+        assert value * 1000**i == expected.value
 
 
 @pytest.mark.nopandas
@@ -529,7 +535,7 @@ def test_duration_nanos_nopandas():
 
 
 def test_month_day_nano_interval():
-    triple = pa.MonthDayNano([-3600, 1800, -50])
+    triple = pa.MonthDayNano([-3600, 1800, -50])  # type: ignore[invalid-argument-type]
     arr = pa.array([triple])
     assert isinstance(arr[0].as_py(), pa.MonthDayNano)
     assert arr[0].as_py() == triple
@@ -577,7 +583,7 @@ def test_binary(value, ty, scalar_typ):
         with pytest.raises(ValueError):
             memoryview(s)
     else:
-        assert buf.to_pybytes() == value
+        assert buf.to_pybytes() == value  # type: ignore[union-attr]
         assert isinstance(buf, pa.Buffer)
         assert bytes(s) == value
 
@@ -852,7 +858,7 @@ def test_dictionary(pickle_module):
     assert arr.to_pylist() == expected
 
     for j, (i, v) in enumerate(zip(indices, expected)):
-        s = arr[j]
+        s = cast(pa.DictionaryScalar, arr[j])
 
         assert s.as_py() == v
         assert s.value.as_py() == v
@@ -868,14 +874,14 @@ def test_run_end_encoded():
     values = [1, 2, 1, None, 3]
     arr = pa.RunEndEncodedArray.from_arrays(run_ends, values)
 
-    scalar = arr[0]
+    scalar = cast(pa.RunEndEncodedScalar, arr[0])
     assert isinstance(scalar, pa.RunEndEncodedScalar)
     assert isinstance(scalar.value, pa.Int64Scalar)
     assert scalar.value == pa.array(values)[0]
     assert scalar.as_py() == 1
 
     # null -> .value is still a scalar, as_py returns None
-    scalar = arr[10]
+    scalar = cast(pa.RunEndEncodedScalar, arr[10])
     assert isinstance(scalar.value, pa.Int64Scalar)
     assert scalar.as_py() is None
 
@@ -901,13 +907,13 @@ def test_union(pickle_module):
         with pytest.raises(pa.ArrowNotImplementedError):
             pickle_module.loads(pickle_module.dumps(s))
 
-    assert arr[0].type_code == 0
+    assert cast(pa.UnionScalar, arr[0]).type_code == 0
     assert arr[0].as_py() == "a"
-    assert arr[1].type_code == 0
+    assert cast(pa.UnionScalar, arr[1]).type_code == 0
     assert arr[1].as_py() == "b"
-    assert arr[2].type_code == 1
+    assert cast(pa.UnionScalar, arr[2]).type_code == 1
     assert arr[2].as_py() == 3
-    assert arr[3].type_code == 1
+    assert cast(pa.UnionScalar, arr[3]).type_code == 1
     assert arr[3].as_py() == 4
 
     # dense
@@ -927,9 +933,9 @@ def test_union(pickle_module):
         with pytest.raises(pa.ArrowNotImplementedError):
             pickle_module.loads(pickle_module.dumps(s))
 
-    assert arr[0].type_code == 0
+    assert cast(pa.UnionScalar, arr[0]).type_code == 0
     assert arr[0].as_py() == b'a'
-    assert arr[5].type_code == 1
+    assert cast(pa.UnionScalar, arr[5]).type_code == 1
     assert arr[5].as_py() == 3
 
 
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 029e14ca162..5a7b9989358 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -23,7 +23,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
@@ -259,7 +259,7 @@ def test_schema():
   child 0, item: int8"""
 
     with pytest.raises(TypeError):
-        pa.schema([None])
+        pa.schema([None])  # type: ignore[list-item]
 
 
 def test_schema_weakref():
@@ -548,7 +548,7 @@ def test_schema_equals_invalid_type():
 
     for val in [None, 'string', pa.array([1, 2])]:
         with pytest.raises(TypeError):
-            schema.equals(val)
+            schema.equals(val)  # type: ignore[invalid-argument-type]
 
 
 def test_schema_equality_operators():
@@ -594,7 +594,7 @@ def test_schema_get_fields():
     with pytest.raises(KeyError):
         schema.field('other')
     with pytest.raises(TypeError):
-        schema.field(0.0)
+        schema.field(0.0)  # type: ignore[arg-type]
     with pytest.raises(IndexError):
         schema.field(4)
 
@@ -706,6 +706,7 @@ def test_empty_table():
         assert table.schema == schema
 
 
+@pytest.mark.numpy
 @pytest.mark.pandas
 def test_schema_from_pandas():
     import pandas as pd
@@ -782,7 +783,7 @@ def test_schema_merge():
 
     # raise proper error when passing a non-Schema value
     with pytest.raises(TypeError):
-        pa.unify_schemas([a, 1])
+        pa.unify_schemas([a, 1])  # type: ignore[list-item]
 
 
 def test_undecodable_metadata():
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index eca8090d77a..2ce48b651b1 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -26,15 +26,16 @@
 import pyarrow as pa
 
 try:
-    from scipy.sparse import csr_array, coo_array, csr_matrix, coo_matrix
+    from scipy.sparse import (  # type: ignore[reportMissingModuleSource]
+        csr_array, coo_array, csr_matrix, coo_matrix)
 except ImportError:
-    coo_matrix = None
-    csr_matrix = None
-    csr_array = None
-    coo_array = None
+    coo_matrix = None  # type: ignore[assignment, misc]
+    csr_matrix = None  # type: ignore[assignment, misc]
+    csr_array = None  # type: ignore[assignment, misc]
+    coo_array = None  # type: ignore[assignment, misc]
 
 try:
-    import sparse
+    import sparse  # type: ignore[import-untyped, import-not-found]
 except ImportError:
     sparse = None
 
@@ -401,7 +402,7 @@ def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type):
     assert np.array_equal(array, result_array)
 
 
-@pytest.mark.skipif(not coo_matrix, reason="requires scipy")
+@pytest.mark.skipif(coo_matrix is None, reason="requires scipy")
 @pytest.mark.parametrize('sparse_object', (coo_array, coo_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
@@ -443,7 +444,7 @@ def test_sparse_coo_tensor_scipy_roundtrip(dtype_str, arrow_type,
     assert out_scipy_matrix.has_canonical_format
 
 
-@pytest.mark.skipif(not csr_matrix, reason="requires scipy")
+@pytest.mark.skipif(csr_matrix is None, reason="requires scipy")
 @pytest.mark.parametrize('sparse_object', (csr_array, csr_matrix))
 @pytest.mark.parametrize('dtype_str,arrow_type', scipy_type_pairs)
 def test_sparse_csr_matrix_scipy_roundtrip(dtype_str, arrow_type,
@@ -483,7 +484,8 @@ def test_pydata_sparse_sparse_coo_tensor_roundtrip(dtype_str, arrow_type):
     shape = (4, 6)
     dim_names = ("x", "y")
 
-    sparse_array = sparse.COO(data=data, coords=coords, shape=shape)
+    sparse_array = sparse.COO(  # type: ignore[reportOptionalMemberAccess]
+        data=data, coords=coords, shape=shape)
     sparse_tensor = pa.SparseCOOTensor.from_pydata_sparse(sparse_array,
                                                           dim_names=dim_names)
     out_sparse_array = sparse_tensor.to_pydata_sparse()
diff --git a/python/pyarrow/tests/test_strategies.py b/python/pyarrow/tests/test_strategies.py
index babb839b534..9505b9a11b0 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -25,7 +25,7 @@
 
 @h.given(past.all_types)
 def test_types(ty):
-    assert isinstance(ty, pa.lib.DataType)
+    assert isinstance(ty, pa.DataType)
 
 
 @h.given(past.all_fields)
@@ -41,7 +41,7 @@ def test_schemas(schema):
 @pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_arrays(array):
-    assert isinstance(array, pa.lib.Array)
+    assert isinstance(array, pa.Array)
 
 
 @pytest.mark.numpy
diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py
index fcd1c8d48c5..9ad65f0738d 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -25,13 +25,10 @@
 from pyarrow.lib import tobytes
 from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError
 
-try:
-    import pyarrow.substrait as substrait
-except ImportError:
-    substrait = None
-
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not substrait'
+substrait = pytest.importorskip('pyarrow.substrait')
+_substrait = pytest.importorskip('pyarrow._substrait')
 pytestmark = pytest.mark.substrait
 
 
@@ -85,7 +82,7 @@ def test_run_serialized_query(tmpdir, use_threads):
     query = tobytes(substrait_query.replace(
         "FILENAME_PLACEHOLDER", pathlib.Path(path).as_uri()))
 
-    buf = pa._substrait._parse_json_plan(query)
+    buf = _substrait._parse_json_plan(query)
 
     reader = substrait.run_query(buf, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -116,7 +113,7 @@ def test_invalid_plan():
         ]
     }
     """
-    buf = pa._substrait._parse_json_plan(tobytes(query))
+    buf = _substrait._parse_json_plan(tobytes(query))
     exec_message = "Plan has no relations"
     with pytest.raises(ArrowInvalid, match=exec_message):
         substrait.run_query(buf)
@@ -162,7 +159,7 @@ def test_binary_conversion_with_json_options(tmpdir, use_threads):
     path = _write_dummy_data_to_disk(tmpdir, file_name, table)
     query = tobytes(substrait_query.replace(
         "FILENAME_PLACEHOLDER", pathlib.Path(path).as_uri()))
-    buf = pa._substrait._parse_json_plan(tobytes(query))
+    buf = _substrait._parse_json_plan(tobytes(query))
 
     reader = substrait.run_query(buf, use_threads=use_threads)
     res_tb = reader.read_all()
@@ -181,7 +178,7 @@ def has_function(fns, ext_file, fn_name):
 
 
 def test_get_supported_functions():
-    supported_functions = pa._substrait.get_supported_functions()
+    supported_functions = _substrait.get_supported_functions()
     # It probably doesn't make sense to exhaustively verify this list but
     # we can check a sample aggregate and a sample non-aggregate entry
     assert has_function(supported_functions,
@@ -232,8 +229,8 @@ def table_provider(names, schema):
     }
     """
 
-    buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
-    reader = pa.substrait.run_query(
+    buf = _substrait._parse_json_plan(tobytes(substrait_query))
+    reader = substrait.run_query(
         buf, table_provider=table_provider, use_threads=use_threads)
     res_tb = reader.read_all()
     assert res_tb == test_table_1
@@ -275,7 +272,7 @@ def table_provider(names, _):
     }
     """
 
-    buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
+    buf = _substrait._parse_json_plan(tobytes(substrait_query))
     exec_message = "Invalid NamedTable Source"
     with pytest.raises(ArrowInvalid, match=exec_message):
         substrait.run_query(buf, table_provider=table_provider)
@@ -317,7 +314,7 @@ def table_provider(names, _):
     }
     """
     query = tobytes(substrait_query)
-    buf = pa._substrait._parse_json_plan(tobytes(query))
+    buf = _substrait._parse_json_plan(tobytes(query))
     exec_message = "names for NamedTable not provided"
     with pytest.raises(ArrowInvalid, match=exec_message):
         substrait.run_query(buf, table_provider=table_provider)
@@ -436,8 +433,8 @@ def table_provider(names, _):
 }
     """
 
-    buf = pa._substrait._parse_json_plan(substrait_query)
-    reader = pa.substrait.run_query(
+    buf = _substrait._parse_json_plan(substrait_query)
+    reader = substrait.run_query(
         buf, table_provider=table_provider, use_threads=use_threads)
     res_tb = reader.read_all()
 
@@ -559,9 +556,9 @@ def table_provider(names, _):
 }
     """
 
-    buf = pa._substrait._parse_json_plan(substrait_query)
+    buf = _substrait._parse_json_plan(substrait_query)
     with pytest.raises(pa.ArrowKeyError) as excinfo:
-        pa.substrait.run_query(buf, table_provider=table_provider)
+        substrait.run_query(buf, table_provider=table_provider)
     assert "No function registered" in str(excinfo.value)
 
 
@@ -598,8 +595,8 @@ def table_provider(names, schema):
     }
     """
 
-    buf = pa._substrait._parse_json_plan(tobytes(substrait_query))
-    reader = pa.substrait.run_query(
+    buf = _substrait._parse_json_plan(tobytes(substrait_query))
+    reader = substrait.run_query(
         buf, table_provider=table_provider, use_threads=use_threads)
     res_tb = reader.read_all()
 
@@ -744,8 +741,8 @@ def table_provider(names, _):
   ],
 }
 """
-    buf = pa._substrait._parse_json_plan(substrait_query)
-    reader = pa.substrait.run_query(
+    buf = _substrait._parse_json_plan(substrait_query)
+    reader = substrait.run_query(
         buf, table_provider=table_provider, use_threads=False)
     res_tb = reader.read_all()
 
@@ -913,8 +910,8 @@ def table_provider(names, _):
   ],
 }
 """
-    buf = pa._substrait._parse_json_plan(substrait_query)
-    reader = pa.substrait.run_query(
+    buf = _substrait._parse_json_plan(substrait_query)
+    reader = substrait.run_query(
         buf, table_provider=table_provider, use_threads=False)
     res_tb = reader.read_all()
 
@@ -929,8 +926,8 @@ def table_provider(names, _):
 
 
 @pytest.mark.parametrize("expr", [
-    pc.equal(pc.field("x"), 7),
-    pc.equal(pc.field("x"), pc.field("y")),
+    pc.equal(pc.field("x"), 7),  # type: ignore[attr-defined]
+    pc.equal(pc.field("x"), pc.field("y")),  # type: ignore[attr-defined]
     pc.field("x") > 50
 ])
 def test_serializing_expressions(expr):
@@ -939,8 +936,8 @@ def test_serializing_expressions(expr):
         pa.field("y", pa.int32())
     ])
 
-    buf = pa.substrait.serialize_expressions([expr], ["test_expr"], schema)
-    returned = pa.substrait.deserialize_expressions(buf)
+    buf = substrait.serialize_expressions([expr], ["test_expr"], schema)
+    returned = substrait.deserialize_expressions(buf)
     assert schema == returned.schema
     assert len(returned.expressions) == 1
     assert "test_expr" in returned.expressions
@@ -958,8 +955,8 @@ def test_arrow_specific_types():
     schema = pa.schema([pa.field(name, typ) for name, (typ, _) in fields.items()])
 
     def check_round_trip(expr):
-        buf = pa.substrait.serialize_expressions([expr], ["test_expr"], schema)
-        returned = pa.substrait.deserialize_expressions(buf)
+        buf = substrait.serialize_expressions([expr], ["test_expr"], schema)
+        returned = substrait.deserialize_expressions(buf)
         assert schema == returned.schema
 
     for name, (typ, val) in fields.items():
@@ -986,8 +983,8 @@ def test_arrow_one_way_types():
 
     def check_one_way(field):
         expr = pc.is_null(pc.field(field.name))
-        buf = pa.substrait.serialize_expressions([expr], ["test_expr"], schema)
-        returned = pa.substrait.deserialize_expressions(buf)
+        buf = substrait.serialize_expressions([expr], ["test_expr"], schema)
+        returned = substrait.deserialize_expressions(buf)
         assert alt_schema == returned.schema
 
     for field in schema:
@@ -1003,14 +1000,14 @@ def test_invalid_expression_ser_des():
     bad_expr = pc.equal(pc.field("z"), 7)
     # Invalid number of names
     with pytest.raises(ValueError) as excinfo:
-        pa.substrait.serialize_expressions([expr], [], schema)
+        substrait.serialize_expressions([expr], [], schema)
     assert 'need to have the same length' in str(excinfo.value)
     with pytest.raises(ValueError) as excinfo:
-        pa.substrait.serialize_expressions([expr], ["foo", "bar"], schema)
+        substrait.serialize_expressions([expr], ["foo", "bar"], schema)
     assert 'need to have the same length' in str(excinfo.value)
     # Expression doesn't match schema
     with pytest.raises(ValueError) as excinfo:
-        pa.substrait.serialize_expressions([bad_expr], ["expr"], schema)
+        substrait.serialize_expressions([bad_expr], ["expr"], schema)
     assert 'No match for FieldRef' in str(excinfo.value)
 
 
@@ -1020,8 +1017,8 @@ def test_serializing_multiple_expressions():
         pa.field("y", pa.int32())
     ])
     exprs = [pc.equal(pc.field("x"), 7), pc.equal(pc.field("x"), pc.field("y"))]
-    buf = pa.substrait.serialize_expressions(exprs, ["first", "second"], schema)
-    returned = pa.substrait.deserialize_expressions(buf)
+    buf = substrait.serialize_expressions(exprs, ["first", "second"], schema)
+    returned = substrait.deserialize_expressions(buf)
     assert schema == returned.schema
     assert len(returned.expressions) == 2
 
@@ -1037,8 +1034,8 @@ def test_serializing_with_compute():
     ])
     expr = pc.equal(pc.field("x"), 7)
     expr_norm = pc.equal(pc.field(0), 7)
-    buf = expr.to_substrait(schema)
-    returned = pa.substrait.deserialize_expressions(buf)
+    buf = expr.to_substrait(schema)  # type: ignore[union-attr]
+    returned = substrait.deserialize_expressions(buf)
 
     assert schema == returned.schema
     assert len(returned.expressions) == 1
@@ -1046,13 +1043,13 @@ def test_serializing_with_compute():
     assert str(returned.expressions["expression"]) == str(expr_norm)
 
     # Compute can't deserialize messages with multiple expressions
-    buf = pa.substrait.serialize_expressions([expr, expr], ["first", "second"], schema)
+    buf = substrait.serialize_expressions([expr, expr], ["first", "second"], schema)
     with pytest.raises(ValueError) as excinfo:
         pc.Expression.from_substrait(buf)
     assert 'contained multiple expressions' in str(excinfo.value)
 
     # Deserialization should be possible regardless of the expression name
-    buf = pa.substrait.serialize_expressions([expr], ["weirdname"], schema)
+    buf = substrait.serialize_expressions([expr], ["weirdname"], schema)
     expr2 = pc.Expression.from_substrait(buf)
     assert str(expr2) == str(expr_norm)
 
@@ -1069,11 +1066,11 @@ def test_serializing_udfs():
     exprs = [pc.shift_left(a, b)]
 
     with pytest.raises(ArrowNotImplementedError):
-        pa.substrait.serialize_expressions(exprs, ["expr"], schema)
+        substrait.serialize_expressions(exprs, ["expr"], schema)
 
-    buf = pa.substrait.serialize_expressions(
+    buf = substrait.serialize_expressions(
         exprs, ["expr"], schema, allow_arrow_extensions=True)
-    returned = pa.substrait.deserialize_expressions(buf)
+    returned = substrait.deserialize_expressions(buf)
     assert schema == returned.schema
     assert len(returned.expressions) == 1
     assert str(returned.expressions["expr"]) == str(exprs[0])
@@ -1085,19 +1082,19 @@ def test_serializing_schema():
         pa.field("x", pa.int32()),
         pa.field("y", pa.string())
     ])
-    returned = pa.substrait.deserialize_schema(substrait_schema)
+    returned = substrait.deserialize_schema(substrait_schema)
     assert expected_schema == returned
 
-    arrow_substrait_schema = pa.substrait.serialize_schema(returned)
+    arrow_substrait_schema = substrait.serialize_schema(returned)
     assert arrow_substrait_schema.schema == substrait_schema
 
-    returned = pa.substrait.deserialize_schema(arrow_substrait_schema)
+    returned = substrait.deserialize_schema(arrow_substrait_schema)
     assert expected_schema == returned
 
-    returned = pa.substrait.deserialize_schema(arrow_substrait_schema.schema)
+    returned = substrait.deserialize_schema(arrow_substrait_schema.schema)
     assert expected_schema == returned
 
-    returned = pa.substrait.deserialize_expressions(arrow_substrait_schema.expression)
+    returned = substrait.deserialize_expressions(arrow_substrait_schema.expression)
     assert returned.schema == expected_schema
 
 
@@ -1114,7 +1111,7 @@ def SerializeToString(self):
                b'\x1a\x19\n\x06\x12\x04\n\x02\x12\x00\x1a\x0fproject_version'
                b'"0\n\x0fproject_version\n\x0fproject_release'
                b'\x12\x0c\n\x04:\x02\x10\x01\n\x04b\x02\x10\x01')
-    exprs = pa.substrait.BoundExpressions.from_substrait(FakeMessage(message))
+    exprs = substrait.BoundExpressions.from_substrait(FakeMessage(message))
     assert len(exprs.expressions) == 2
     assert 'project_release' in exprs.expressions
     assert 'project_version' in exprs.expressions
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index b65fb7d952c..6263afd03a5 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -18,12 +18,13 @@
 from collections import OrderedDict
 from collections.abc import Iterable
 import sys
+from typing import cast
 import weakref
 
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 import pytest
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -418,7 +419,8 @@ def test_to_pandas_empty_table():
     table = pa.table(df)
     result = table.schema.empty_table().to_pandas()
     assert result.shape == (0, 2)
-    tm.assert_frame_equal(result, df.iloc[:0])
+    expected = df.iloc[:0]
+    tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.pandas
@@ -486,12 +488,25 @@ def test_chunked_array_unify_dictionaries():
         pa.array(["foo", "bar", None, "foo"]).dictionary_encode(),
         pa.array(["quux", None, "foo"]).dictionary_encode(),
     ])
-    assert arr.chunk(0).dictionary.equals(pa.array(["foo", "bar"]))
-    assert arr.chunk(1).dictionary.equals(pa.array(["quux", "foo"]))
+    chunk_0 = arr.chunk(0)
+    assert isinstance(chunk_0, pa.DictionaryArray)
+    assert chunk_0.dictionary.equals(pa.array(["foo", "bar"]))
+
+    chunk_1 = arr.chunk(1)
+    assert isinstance(chunk_1, pa.DictionaryArray)
+    assert chunk_1.dictionary.equals(pa.array(["quux", "foo"]))
+
     arr = arr.unify_dictionaries()
     expected_dict = pa.array(["foo", "bar", "quux"])
-    assert arr.chunk(0).dictionary.equals(expected_dict)
-    assert arr.chunk(1).dictionary.equals(expected_dict)
+
+    chunk_0 = arr.chunk(0)
+    assert isinstance(chunk_0, pa.DictionaryArray)
+    assert chunk_0.dictionary.equals(expected_dict)
+
+    chunk_1 = arr.chunk(1)
+    assert isinstance(chunk_1, pa.DictionaryArray)
+    assert chunk_1.dictionary.equals(expected_dict)
+
     assert arr.to_pylist() == ["foo", "bar", None, "foo", "quux", None, "foo"]
 
 
@@ -716,7 +731,7 @@ def test_recordbatch_take():
 def test_recordbatch_column_sets_private_name():
     # ARROW-6429
     rb = pa.record_batch([pa.array([1, 2, 3, 4])], names=['a0'])
-    assert rb[0]._name == 'a0'
+    assert rb.column(0)._name == 'a0'
 
 
 def test_recordbatch_from_arrays_validate_schema():
@@ -798,7 +813,7 @@ def test_recordbatch_get_field():
         batch.field('d')
 
     with pytest.raises(TypeError):
-        batch.field(None)
+        batch.field(None)  # type: ignore[arg-type]
 
     with pytest.raises(IndexError):
         batch.field(4)
@@ -819,7 +834,7 @@ def test_recordbatch_select_column():
         batch.column('d')
 
     with pytest.raises(TypeError):
-        batch.column(None)
+        batch.column(None)  # type: ignore[arg-type]
 
     with pytest.raises(IndexError):
         batch.column(4)
@@ -933,7 +948,10 @@ def test_table_from_struct_array_chunked_array():
         [[{"ints": 1}, {"floats": 1.0}]],
         type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
     )
-    result = pa.Table.from_struct_array(chunked_struct_array)
+    assert isinstance(chunked_struct_array.type, pa.StructType)
+    # Cast to the proper type for type checker
+    struct_chunked_array = cast(pa.ChunkedArray, chunked_struct_array)
+    result = pa.Table.from_struct_array(struct_chunked_array)
     assert result.equals(pa.Table.from_arrays(
         [
             pa.array([1, None], type=pa.int32()),
@@ -1189,7 +1207,7 @@ def test_recordbatch_to_tensor_null():
         batch.to_tensor()
 
     result = batch.to_tensor(null_to_nan=True, row_major=False)
-    x = np.column_stack([arr1, arr2]).astype(np.float64, order="F")
+    x = np.column_stack([arr1, arr2]).astype(np.float64, order="F")  # type: ignore[no-matching-overload]
     expected = pa.Tensor.from_numpy(x)
 
     np.testing.assert_equal(result.to_numpy(), x)
@@ -1223,7 +1241,7 @@ def test_recordbatch_to_tensor_null():
     )
 
     result = batch.to_tensor(null_to_nan=True, row_major=False)
-    x = np.column_stack([arr1, arr2]).astype(np.float32, order="F")
+    x = np.column_stack([arr1, arr2]).astype(np.float32, order="F")  # type: ignore[no-matching-overload]
     expected = pa.Tensor.from_numpy(x)
 
     np.testing.assert_equal(result.to_numpy(), x)
@@ -1339,7 +1357,7 @@ def test_recordbatchlist_schema_equals():
 def test_table_column_sets_private_name():
     # ARROW-6429
     t = pa.table([pa.array([1, 2, 3, 4])], names=['a0'])
-    assert t[0]._name == 'a0'
+    assert t.column(0)._name == 'a0'
 
 
 def test_table_equals():
@@ -1500,7 +1518,8 @@ def test_table_from_arrays_preserves_column_metadata():
     field1 = pa.field('field2', pa.int64(), nullable=False)
     table = pa.Table.from_arrays([arr0, arr1],
                                  schema=pa.schema([field0, field1]))
-    assert b"a" in table.field(0).metadata
+    field0_metadata = table.field(0).metadata
+    assert field0_metadata is not None and b"a" in field0_metadata
     assert table.field(1).nullable is False
 
 
@@ -1565,7 +1584,7 @@ def test_table_get_field():
         table.field('d')
 
     with pytest.raises(TypeError):
-        table.field(None)
+        table.field(None)  # type: ignore[arg-type]
 
     with pytest.raises(IndexError):
         table.field(4)
@@ -1586,7 +1605,7 @@ def test_table_select_column():
         table.column('d')
 
     with pytest.raises(TypeError):
-        table.column(None)
+        table.column(None)  # type: ignore[arg-type]
 
     with pytest.raises(IndexError):
         table.column(4)
@@ -1879,22 +1898,41 @@ def test_table_unify_dictionaries():
 
     table = pa.Table.from_batches([batch1, batch2])
     table = table.replace_schema_metadata({b"key1": b"value1"})
-    assert table.column(0).chunk(0).dictionary.equals(
-        pa.array(["foo", "bar"]))
-    assert table.column(0).chunk(1).dictionary.equals(
-        pa.array(["quux", "foo"]))
-    assert table.column(1).chunk(0).dictionary.equals(
-        pa.array([123, 456, 789]))
-    assert table.column(1).chunk(1).dictionary.equals(
-        pa.array([456, 789]))
+    chunk_0_0 = table.column(0).chunk(0)
+    assert isinstance(chunk_0_0, pa.DictionaryArray)
+    assert chunk_0_0.dictionary.equals(pa.array(["foo", "bar"]))
+
+    chunk_0_1 = table.column(0).chunk(1)
+    assert isinstance(chunk_0_1, pa.DictionaryArray)
+    assert chunk_0_1.dictionary.equals(pa.array(["quux", "foo"]))
+
+    chunk_1_0 = table.column(1).chunk(0)
+    assert isinstance(chunk_1_0, pa.DictionaryArray)
+    assert chunk_1_0.dictionary.equals(pa.array([123, 456, 789]))
+
+    chunk_1_1 = table.column(1).chunk(1)
+    assert isinstance(chunk_1_1, pa.DictionaryArray)
+    assert chunk_1_1.dictionary.equals(pa.array([456, 789]))
 
     table = table.unify_dictionaries(pa.default_memory_pool())
     expected_dict_0 = pa.array(["foo", "bar", "quux"])
     expected_dict_1 = pa.array([123, 456, 789])
-    assert table.column(0).chunk(0).dictionary.equals(expected_dict_0)
-    assert table.column(0).chunk(1).dictionary.equals(expected_dict_0)
-    assert table.column(1).chunk(0).dictionary.equals(expected_dict_1)
-    assert table.column(1).chunk(1).dictionary.equals(expected_dict_1)
+
+    chunk_0_0 = table.column(0).chunk(0)
+    assert isinstance(chunk_0_0, pa.DictionaryArray)
+    assert chunk_0_0.dictionary.equals(expected_dict_0)
+
+    chunk_0_1 = table.column(0).chunk(1)
+    assert isinstance(chunk_0_1, pa.DictionaryArray)
+    assert chunk_0_1.dictionary.equals(expected_dict_0)
+
+    chunk_1_0 = table.column(1).chunk(0)
+    assert isinstance(chunk_1_0, pa.DictionaryArray)
+    assert chunk_1_0.dictionary.equals(expected_dict_1)
+
+    chunk_1_1 = table.column(1).chunk(1)
+    assert isinstance(chunk_1_1, pa.DictionaryArray)
+    assert chunk_1_1.dictionary.equals(expected_dict_1)
 
     assert table.to_pydict() == {
         'a': ["foo", "bar", None, "foo", "quux", "foo", None, "quux"],
@@ -1964,13 +2002,13 @@ def test_concat_tables_invalid_option():
     t = pa.Table.from_arrays([list(range(10))], names=('a',))
 
     with pytest.raises(ValueError, match="Invalid promote_options: invalid"):
-        pa.concat_tables([t, t], promote_options="invalid")
+        pa.concat_tables([t, t], promote_options="invalid")  # type: ignore[arg-type]
 
 
 def test_concat_tables_none_table():
     # ARROW-11997
     with pytest.raises(AttributeError):
-        pa.concat_tables([None])
+        pa.concat_tables([None])  # type: ignore[arg-type]
 
 
 @pytest.mark.pandas
@@ -2113,7 +2151,7 @@ def test_concat_batches_different_schema():
 def test_concat_batches_none_batches():
     # ARROW-11997
     with pytest.raises(AttributeError):
-        pa.concat_batches([None])
+        pa.concat_batches([None])  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize(
@@ -2264,7 +2302,7 @@ def test_from_arrays_schema(data, klass):
     # with different and incompatible schema
     schema = pa.schema([('strs', pa.utf8()), ('floats', pa.timestamp('s'))])
     with pytest.raises((NotImplementedError, TypeError)):
-        pa.Table.from_pydict(data, schema=schema)
+        pa.Table.from_pydict(data, schema=schema)  # type: ignore[arg-type]
 
     # Cannot pass both schema and metadata / names
     with pytest.raises(ValueError):
@@ -2369,7 +2407,7 @@ def test_table_from_pydict_arrow_arrays(data, klass):
     # with different and incompatible schema
     schema = pa.schema([('strs', pa.utf8()), ('floats', pa.timestamp('s'))])
     with pytest.raises((NotImplementedError, TypeError)):
-        pa.Table.from_pydict(data, schema=schema)
+        pa.Table.from_pydict(data, schema=schema)  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize('data, klass', [
@@ -2386,7 +2424,7 @@ def test_table_from_pydict_schema(data, klass):
     schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float64()),
                         ('ints', pa.int64())])
     with pytest.raises(KeyError, match='ints'):
-        pa.Table.from_pydict(data, schema=schema)
+        pa.Table.from_pydict(data, schema=schema)  # type: ignore[arg-type]
 
     # data has columns not present in schema -> ignored
     schema = pa.schema([('strs', pa.utf8())])
@@ -2590,10 +2628,10 @@ def test_table_factory_function_args_pandas():
 
 def test_factory_functions_invalid_input():
     with pytest.raises(TypeError, match="Expected pandas DataFrame, python"):
-        pa.table("invalid input")
+        pa.table("invalid input")  # type: ignore[arg-type]
 
     with pytest.raises(TypeError, match="Expected pandas DataFrame"):
-        pa.record_batch("invalid input")
+        pa.record_batch("invalid input")  # type: ignore[arg-type]
 
 
 def test_table_repr_to_string():
@@ -2727,8 +2765,8 @@ def test_table_function_unicode_schema():
     schema = pa.schema([(col_a, pa.int32()), (col_b, pa.string())])
 
     result = pa.table(d, schema=schema)
-    assert result[0].chunk(0).equals(pa.array([1, 2, 3], type='int32'))
-    assert result[1].chunk(0).equals(pa.array(['a', 'b', 'c'], type='string'))
+    assert result.column(0).chunk(0).equals(pa.array([1, 2, 3], type='int32'))
+    assert result.column(1).chunk(0).equals(pa.array(['a', 'b', 'c'], type='string'))
 
 
 def test_table_take_vanilla_functionality():
@@ -3603,7 +3641,7 @@ def test_chunked_array_non_cpu(cuda_context, cpu_chunked_array, cuda_chunked_arr
 
     # equals() test
     with pytest.raises(NotImplementedError):
-        cuda_chunked_array == cuda_chunked_array
+        cuda_chunked_array == cuda_chunked_array  # type: ignore[reportUnusedExpression]
 
     # to_pandas() test
     with pytest.raises(NotImplementedError):
@@ -3860,7 +3898,7 @@ def test_recordbatch_non_cpu(cuda_context, cpu_recordbatch, cuda_recordbatch,
 
     # __dataframe__() test
     with pytest.raises(NotImplementedError):
-        from_dataframe(cuda_recordbatch.__dataframe__())
+        from_dataframe(cuda_recordbatch.__dataframe__())  # type: ignore[misc]
 
 
 def verify_cuda_table(table, expected_schema):
@@ -4059,7 +4097,7 @@ def test_table_non_cpu(cuda_context, cpu_table, cuda_table,
 
     # __dataframe__() test
     with pytest.raises(NotImplementedError):
-        from_dataframe(cuda_table.__dataframe__())
+        from_dataframe(cuda_table.__dataframe__())  # type: ignore[misc]
 
     # __reduce__() test
     with pytest.raises(NotImplementedError):
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index debb1066280..c3726fdbbf4 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -213,7 +213,7 @@ def test_tensor_memoryview():
         dtype = data.dtype
         lst = data.tolist()
         tensor = pa.Tensor.from_numpy(data)
-        m = memoryview(tensor)
+        m = memoryview(tensor)  # type: ignore[reportArgumentType]
         assert m.format == expected_format
         assert m.shape == data.shape
         assert m.strides == data.strides
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 539f0172454..c224392510d 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -24,16 +24,22 @@
 import pytest
 import hypothesis as h
 import hypothesis.strategies as st
-try:
-    import hypothesis.extra.pytz as tzst
-except ImportError:
-    tzst = None
+from typing import Any, TYPE_CHECKING
 import weakref
 
-try:
+if TYPE_CHECKING:
     import numpy as np
-except ImportError:
-    np = None
+    import hypothesis.extra.pytz as tzst
+else:
+    try:
+        import numpy as np
+    except ImportError:
+        np = None
+    try:
+        import hypothesis.extra.pytz as tzst
+    except ImportError:
+        tzst = None
+
 import pyarrow as pa
 import pyarrow.types as types
 import pyarrow.tests.strategies as past
@@ -411,7 +417,7 @@ def test_tzinfo_to_string_errors():
 if tzst:
     timezones = tzst.timezones()
 else:
-    timezones = st.none()
+    timezones = st.none()  # type: ignore[assignment]
 
 
 @h.given(timezones)
@@ -465,7 +471,7 @@ class BuggyTimezone2(datetime.tzinfo):
         def tzname(self, dt):
             return None
 
-        def utcoffset(self, dt):
+        def utcoffset(self, dt):  # type: ignore[override]
             return "one hour"
 
     class BuggyTimezone3(datetime.tzinfo):
@@ -473,7 +479,7 @@ class BuggyTimezone3(datetime.tzinfo):
         Wrong timezone name type
         """
 
-        def tzname(self, dt):
+        def tzname(self, dt):  # type: ignore[override]
             return 240
 
         def utcoffset(self, dt):
@@ -732,13 +738,13 @@ def test_struct_type():
 
     # Neither integer nor string
     with pytest.raises(TypeError):
-        ty[None]
+        ty[None]  # type: ignore[reportArgumentType]
 
     with pytest.raises(TypeError):
-        ty.field(None)
+        ty.field(None)  # type: ignore[reportArgumentType]
 
     for a, b in zip(ty, fields):
-        a == b
+        assert a == b
 
     # Construct from list of tuples
     ty = pa.struct([('a', pa.int64()),
@@ -746,7 +752,7 @@ def test_struct_type():
                     ('b', pa.int32())])
     assert list(ty) == fields
     for a, b in zip(ty, fields):
-        a == b
+        assert a == b
 
     # Construct from mapping
     fields = [pa.field('a', pa.int64()),
@@ -755,7 +761,7 @@ def test_struct_type():
                                 ('b', pa.int32())]))
     assert list(ty) == fields
     for a, b in zip(ty, fields):
-        a == b
+        assert a == b
 
     # Invalid args
     with pytest.raises(TypeError):
@@ -862,7 +868,7 @@ def test_dictionary_type():
 
     # invalid index type raises
     with pytest.raises(TypeError):
-        pa.dictionary(pa.string(), pa.int64())
+        pa.dictionary(pa.string(), pa.int64())  # type: ignore[reportArgumentType]
 
 
 def test_dictionary_ordered_equals():
@@ -951,7 +957,7 @@ def test_run_end_encoded_type():
         pa.run_end_encoded(None, pa.utf8())
 
     with pytest.raises(ValueError):
-        pa.run_end_encoded(pa.int8(), pa.utf8())
+        pa.run_end_encoded(pa.int8(), pa.utf8())  # type: ignore[reportArgumentType]
 
 
 @pytest.mark.parametrize('t,check_func', [
@@ -1084,12 +1090,12 @@ def test_timedelta_overflow():
         pa.scalar(d, type=pa.duration('ns'))
 
     # microsecond resolution, not overflow
-    pa.scalar(d, type=pa.duration('us')).as_py() == d
+    assert pa.scalar(d, type=pa.duration('us')).as_py() == d
 
     # second/millisecond resolution, not overflow
     for d in [datetime.timedelta.min, datetime.timedelta.max]:
-        pa.scalar(d, type=pa.duration('ms')).as_py() == d
-        pa.scalar(d, type=pa.duration('s')).as_py() == d
+        _ = pa.scalar(d, type=pa.duration('ms')).as_py() == d
+        _ = pa.scalar(d, type=pa.duration('s')).as_py() == d
 
 
 def test_type_equality_operators():
@@ -1127,11 +1133,11 @@ def test_key_value_metadata():
     assert m1 != {'a': 'A', 'b': 'C'}
 
     with pytest.raises(TypeError):
-        pa.KeyValueMetadata({'a': 1})
+        pa.KeyValueMetadata({'a': 1})  # type: ignore[reportArgumentType]
     with pytest.raises(TypeError):
-        pa.KeyValueMetadata({1: 'a'})
+        pa.KeyValueMetadata({1: 'a'})  # type: ignore[reportArgumentType]
     with pytest.raises(TypeError):
-        pa.KeyValueMetadata(a=1)
+        pa.KeyValueMetadata(a=1)  # type: ignore[reportArgumentType]
 
     expected = [(b'a', b'A'), (b'b', b'B')]
     result = [(k, v) for k, v in m3.items()]
@@ -1258,6 +1264,7 @@ def test_field_metadata():
 
     assert f1.metadata is None
     assert f2.metadata == {}
+    assert f3.metadata is not None
     assert f3.metadata[b'bizz'] == b'bazz'
 
 
@@ -1394,7 +1401,7 @@ def __arrow_c_schema__(self):
         return self.schema.__arrow_c_schema__()
 
 
-class SchemaMapping(Mapping):
+class SchemaMapping(Mapping[Any, Any]):
     def __init__(self, schema):
         self.schema = schema
 
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index 93004a30618..e028f1c0484 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -21,7 +21,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    pass
 
 import pyarrow as pa
 from pyarrow import compute as pc
@@ -35,7 +35,7 @@
 try:
     import pyarrow.dataset as ds
 except ImportError:
-    ds = None
+    pass
 
 
 def mock_udf_context(batch_length=10):
@@ -381,6 +381,7 @@ def check_scalar_function(func_fixture,
 
     func = pc.get_function(name)
     assert func.name == name
+    assert batch_length is not None
 
     result = pc.call_function(name, inputs, length=batch_length)
     expected_output = function(mock_udf_context(batch_length), *inputs)
@@ -580,8 +581,8 @@ def identity(ctx, val):
     }
     with pytest.raises(TypeError,
                        match="DataType expected, got <class 'dict'>"):
-        pc.register_scalar_function(identity, func_name,
-                                    doc, in_types, out_type)
+        pc.register_scalar_function(
+            identity, func_name, doc, in_types, out_type)  # type: ignore[arg-type]
 
 
 def test_wrong_input_type_declaration():
@@ -597,8 +598,9 @@ def identity(ctx, val):
     }
     with pytest.raises(TypeError,
                        match="DataType expected, got <class 'NoneType'>"):
-        pc.register_scalar_function(identity, func_name, doc,
-                                    in_types, out_type)
+        pc.register_scalar_function(
+            identity, func_name, doc, in_types,  # type: ignore[arg-type]
+            out_type)
 
 
 def test_scalar_udf_context(unary_func_fixture):
diff --git a/python/pyarrow/tests/test_without_numpy.py b/python/pyarrow/tests/test_without_numpy.py
index 55c12602ce8..c5f5671aabc 100644
--- a/python/pyarrow/tests/test_without_numpy.py
+++ b/python/pyarrow/tests/test_without_numpy.py
@@ -50,6 +50,7 @@ def test_tensor_to_np():
     arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
     storage = pa.array(arr, pa.list_(pa.int32(), 4))
     tensor_array = pa.ExtensionArray.from_storage(tensor_type, storage)
+    assert isinstance(tensor_array, pa.FixedShapeTensorArray)
 
     tensor = tensor_array.to_tensor()
     msg = "Cannot return a numpy.ndarray if NumPy is not present"
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 7e3dd4324e9..fca0fec1122 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -171,7 +171,8 @@ def get_modified_env_with_pythonpath():
     existing_pythonpath = env.get('PYTHONPATH', '')
 
     module_path = os.path.abspath(
-        os.path.dirname(os.path.dirname(pa.__file__)))
+        os.path.dirname(os.path.dirname(  # type: ignore[no-matching-overload]
+            pa.__file__)))
 
     if existing_pythonpath:
         new_pythonpath = os.pathsep.join((module_path, existing_pythonpath))
@@ -336,6 +337,7 @@ def _ensure_minio_component_version(component, minimum_year):
                           stderr=subprocess.PIPE, encoding='utf-8') as proc:
         if proc.wait(10) != 0:
             return False
+        assert proc.stdout is not None
         stdout = proc.stdout.read()
         pattern = component + r' version RELEASE\.(\d+)-.*'
         version_match = re.search(pattern, stdout)
@@ -367,6 +369,8 @@ def _run_mc_command(mcdir, *args):
         cmd_str = ' '.join(full_args)
         print(f'Cmd: {cmd_str}')
         print(f'  Return: {retval}')
+        assert proc.stdout is not None
+        assert proc.stderr is not None
         print(f'  Stdout: {proc.stdout.read()}')
         print(f'  Stderr: {proc.stderr.read()}')
         if retval != 0:
diff --git a/python/pyarrow/vendored/docscrape.py b/python/pyarrow/vendored/docscrape.py
index 6c4d6e01400..47aeeed40ae 100644
--- a/python/pyarrow/vendored/docscrape.py
+++ b/python/pyarrow/vendored/docscrape.py
@@ -18,7 +18,7 @@
 import sys
 
 
-def strip_blank_lines(l):
+def strip_blank_lines(l):  # noqa: E741
     "Remove leading and trailing blank lines from a list of lines"
     while l and not l[0].strip():
         del l[0]
@@ -62,7 +62,7 @@ def read(self):
             return ''
 
     def seek_next_non_empty_line(self):
-        for l in self[self._l:]:
+        for l in self[self._l:]:  # noqa: E741
             if l.strip():
                 break
             else:
@@ -185,8 +185,9 @@ def _is_at_section(self):
         l2 = self._doc.peek(1).strip()  # ---------- or ==========
         if len(l2) >= 3 and (set(l2) in ({'-'}, {'='})) and len(l2) != len(l1):
             snip = '\n'.join(self._doc._str[:2])+'...'
-            self._error_location("potentially wrong underline length... \n%s \n%s in \n%s"
-                                 % (l1, l2, snip), error=False)
+            self._error_location(
+                "potentially wrong underline length... \n%s \n%s in \n%s"
+                % (l1, l2, snip), error=False)
         return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
 
     def _strip(self, doc):
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 0a730fd4f78..8031c333a64 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -84,11 +84,11 @@ zip-safe=false
 include-package-data=true
 
 [tool.setuptools.packages.find]
-include = ["pyarrow"]
+include = ["pyarrow", "pyarrow.*"]
 namespaces = false
 
 [tool.setuptools.package-data]
-pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"]
+pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd", "py.typed"]
 
 [tool.setuptools_scm]
 root = '..'
@@ -96,3 +96,27 @@ version_file = 'pyarrow/_generated_version.py'
 version_scheme = 'guess-next-dev'
 git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
 fallback_version = '23.0.0a0'
+
+[tool.mypy]
+files = ["pyarrow"]
+exclude = 'pyarrow/interchange/.*|pyarrow/tests/interchange/.*|pyarrow/vendored/.*|pyarrow/tests/test_cuda*'
+mypy_path = "$MYPY_CONFIG_FILE_DIR/pyarrow-stubs"
+
+[tool.pyright]
+pythonPlatform = "All"
+pythonVersion = "3.10"
+include = ["pyarrow"]
+exclude = ["pyarrow/vendored", "pyarrow/interchange", "pyarrow/tests/interchange", "pyarrow/tests/test_cuda*"]
+stubPath = "pyarrow-stubs"
+typeCheckingMode = "basic"
+
+[tool.ty.src]
+include = ["pyarrow"]
+exclude = ["pyarrow/vendored", "pyarrow/interchange", "pyarrow/tests/interchange", "pyarrow/tests/test_cuda*"]
+
+[tool.ty.environment]
+root = ["pyarrow"]
+
+[tool.ty.rules]
+unresolved-import = "ignore"
+unresolved-attribute = "ignore"
diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py
index 53d3dd52bd8..6015cc211c1 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -114,7 +114,7 @@ def end_headers(self):
 
 
 def run_server_thread(dist_dir, q):
-    global _SERVER_ADDRESS
+    global _SERVER_ADDRESS  # noqa: F824
     os.chdir(dist_dir)
     server = http.server.HTTPServer(("", 0), TemplateOverrider)
     q.put(server.server_address)
diff --git a/python/setup.py b/python/setup.py
index a27bd3baefd..a25d2d76b36 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -121,8 +121,35 @@ def build_extensions(self):
 
     def run(self):
         self._run_cmake()
+        self._copy_stubs()
         _build_ext.run(self)
 
+    def _copy_stubs(self):
+        """Copy .pyi stub files from pyarrow-stubs to the build directory."""
+        build_cmd = self.get_finalized_command('build')
+        build_lib = os.path.abspath(build_cmd.build_lib)
+
+        stubs_src = pjoin(setup_dir, 'pyarrow-stubs', 'pyarrow')
+        stubs_dest = pjoin(build_lib, 'pyarrow')
+
+        if os.path.exists(stubs_src):
+            print(f"-- Copying stub files from {stubs_src} to {stubs_dest}")
+            for root, dirs, files in os.walk(stubs_src):
+                # Calculate relative path from stubs_src
+                rel_dir = os.path.relpath(root, stubs_src)
+                dest_dir = pjoin(stubs_dest, rel_dir) if rel_dir != '.' else stubs_dest
+
+                # Create destination directory if needed
+                if not os.path.exists(dest_dir):
+                    os.makedirs(dest_dir)
+
+                # Copy .pyi files
+                for file in files:
+                    if file.endswith('.pyi'):
+                        src_file = pjoin(root, file)
+                        dest_file = pjoin(dest_dir, file)
+                        shutil.copy2(src_file, dest_file)
+
     # adapted from cmake_build_ext in dynd-python
     # github.com/libdynd/dynd-python