From dc15c669213f71e17cda16d8b91c3045f71143d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Fri, 9 Jan 2026 21:26:15 +0100
Subject: [PATCH 1/4] move utils

---
 openml/utils/__init__.py |  35 +++
 openml/utils/_openml.py  | 471 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 506 insertions(+)
 create mode 100644 openml/utils/__init__.py
 create mode 100644 openml/utils/_openml.py

diff --git a/openml/utils/__init__.py b/openml/utils/__init__.py
new file mode 100644
index 000000000..83e379222
--- /dev/null
+++ b/openml/utils/__init__.py
@@ -0,0 +1,35 @@
+"""Utilities module."""
+
+from openml.utils._openml import (
+    ProgressBar,
+    _create_cache_directory,
+    _create_cache_directory_for_id,
+    _create_lockfiles_dir,
+    _delete_entity,
+    _get_cache_dir_for_id,
+    _get_cache_dir_for_key,
+    _get_rest_api_type_alias,
+    _list_all,
+    _remove_cache_dir_for_id,
+    _tag_entity,
+    _tag_openml_base,
+    extract_xml_tags,
+    thread_safe_if_oslo_installed,
+)
+
+__all__ = [
+    "ProgressBar",
+    "_create_cache_directory",
+    "_create_cache_directory_for_id",
+    "_create_lockfiles_dir",
+    "_delete_entity",
+    "_get_cache_dir_for_id",
+    "_get_cache_dir_for_key",
+    "_get_rest_api_type_alias",
+    "_list_all",
+    "_remove_cache_dir_for_id",
+    "_tag_entity",
+    "_tag_openml_base",
+    "extract_xml_tags",
+    "thread_safe_if_oslo_installed",
+]
diff --git a/openml/utils/_openml.py b/openml/utils/_openml.py
new file mode 100644
index 000000000..7e72e7aee
--- /dev/null
+++ b/openml/utils/_openml.py
@@ -0,0 +1,471 @@
+# License: BSD 3-Clause
+from __future__ import annotations
+
+import contextlib
+import shutil
+import warnings
+from functools import wraps
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Callable, Mapping, Sized, TypeVar, overload
+from typing_extensions import Literal, ParamSpec
+
+import numpy as np
+import xmltodict
+from minio.helpers import ProgressType
+from tqdm import tqdm
+
+import openml
+import openml._api_calls
+import openml.exceptions
+
+from . import config
+
+# Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
+if TYPE_CHECKING:
+    from openml.base import OpenMLBase
+
+    P = ParamSpec("P")
+    R = TypeVar("R")
+    _SizedT = TypeVar("_SizedT", bound=Sized)
+
+
+@overload
+def extract_xml_tags(
+    xml_tag_name: str,
+    node: Mapping[str, Any],
+    *,
+    allow_none: Literal[True] = ...,
+) -> Any | None: ...
+
+
+@overload
+def extract_xml_tags(
+    xml_tag_name: str,
+    node: Mapping[str, Any],
+    *,
+    allow_none: Literal[False],
+) -> Any: ...
+
+
+def extract_xml_tags(
+    xml_tag_name: str,
+    node: Mapping[str, Any],
+    *,
+    allow_none: bool = True,
+) -> Any | None:
+    """Helper to extract xml tags from xmltodict.
+
+    Parameters
+    ----------
+    xml_tag_name : str
+        Name of the xml tag to extract from the node.
+
+    node : Mapping[str, Any]
+        Node object returned by ``xmltodict`` from which ``xml_tag_name``
+        should be extracted.
+
+    allow_none : bool
+        If ``False``, the tag needs to exist in the node. Will raise a
+        ``ValueError`` if it does not.
+
+    Returns
+    -------
+    object
+    """
+    if xml_tag_name in node and node[xml_tag_name] is not None:
+        if isinstance(node[xml_tag_name], (dict, str)):
+            return [node[xml_tag_name]]
+        if isinstance(node[xml_tag_name], list):
+            return node[xml_tag_name]
+
+        raise ValueError("Received not string and non list as tag item")
+
+    if allow_none:
+        return None
+
+    raise ValueError(f"Could not find tag '{xml_tag_name}' in node '{node!s}'")
+
+
+def _get_rest_api_type_alias(oml_object: OpenMLBase) -> str:
+    """Return the alias of the openml entity as it is defined for the REST API."""
+    rest_api_mapping: list[tuple[type | tuple, str]] = [
+        (openml.datasets.OpenMLDataset, "data"),
+        (openml.flows.OpenMLFlow, "flow"),
+        (openml.tasks.OpenMLTask, "task"),
+        (openml.runs.OpenMLRun, "run"),
+        ((openml.study.OpenMLStudy, openml.study.OpenMLBenchmarkSuite), "study"),
+    ]
+    _, api_type_alias = next(
+        (python_type, api_alias)
+        for (python_type, api_alias) in rest_api_mapping
+        if isinstance(oml_object, python_type)
+    )
+    return api_type_alias
+
+
+def _tag_openml_base(oml_object: OpenMLBase, tag: str, untag: bool = False) -> None:  # noqa: FBT001, FBT002
+    api_type_alias = _get_rest_api_type_alias(oml_object)
+    if oml_object.id is None:
+        raise openml.exceptions.ObjectNotPublishedError(
+            f"Cannot tag an {api_type_alias} that has not been published yet."
+            "Please publish the object first before being able to tag it."
+            f"\n{oml_object}",
+        )
+    _tag_entity(entity_type=api_type_alias, entity_id=oml_object.id, tag=tag, untag=untag)
+
+
+def _tag_entity(entity_type: str, entity_id: int, tag: str, *, untag: bool = False) -> list[str]:
+    """
+    Function that tags or untags a given entity on OpenML. As the OpenML
+    API tag functions all consist of the same format, this function covers
+    all entity types (currently: dataset, task, flow, setup, run). Could
+    be used in a partial to provide dataset_tag, dataset_untag, etc.
+
+    Parameters
+    ----------
+    entity_type : str
+        Name of the entity to tag (e.g., run, flow, data)
+
+    entity_id : int
+        OpenML id of the entity
+
+    tag : str
+        The tag
+
+    untag : bool
+        Set to true if needed to untag, rather than tag
+
+    Returns
+    -------
+    tags : list
+        List of tags that the entity is (still) tagged with
+    """
+    legal_entities = {"data", "task", "flow", "setup", "run"}
+    if entity_type not in legal_entities:
+        raise ValueError(f"Can't tag a {entity_type}")
+
+    if untag:
+        uri = f"{entity_type}/untag"
+        main_tag = f"oml:{entity_type}_untag"
+    else:
+        uri = f"{entity_type}/tag"
+        main_tag = f"oml:{entity_type}_tag"
+
+    result_xml = openml._api_calls._perform_api_call(
+        uri,
+        "post",
+        {f"{entity_type}_id": entity_id, "tag": tag},
+    )
+
+    result = xmltodict.parse(result_xml, force_list={"oml:tag"})[main_tag]
+
+    if "oml:tag" in result:
+        return result["oml:tag"]  # type: ignore
+
+    # no tags, return empty list
+    return []
+
+
+# TODO(eddiebergman): Maybe this can be made more specific with a Literal
+def _delete_entity(entity_type: str, entity_id: int) -> bool:
+    """
+    Function that deletes a given entity on OpenML. As the OpenML
+    API tag functions all consist of the same format, this function covers
+    all entity types that can be deleted (currently: dataset, task, flow,
+    run, study and user).
+
+    Parameters
+    ----------
+    entity_type : str
+        Name of the entity to tag (e.g., run, flow, data)
+
+    entity_id : int
+        OpenML id of the entity
+
+    Returns
+    -------
+    bool
+        True iff the deletion was successful. False otherwse
+    """
+    legal_entities = {
+        "data",
+        "flow",
+        "task",
+        "run",
+        "study",
+        "user",
+    }
+    if entity_type not in legal_entities:
+        raise ValueError(f"Can't delete a {entity_type}")
+
+    url_suffix = "%s/%d" % (entity_type, entity_id)
+    try:
+        result_xml = openml._api_calls._perform_api_call(url_suffix, "delete")
+        result = xmltodict.parse(result_xml)
+        return f"oml:{entity_type}_delete" in result
+    except openml.exceptions.OpenMLServerException as e:
+        # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php
+        # Most exceptions are descriptive enough to be raised as their standard
+        # OpenMLServerException, however there are two cases where we add information:
+        #  - a generic "failed" message, we direct them to the right issue board
+        #  - when the user successfully authenticates with the server,
+        #    but user is not allowed to take the requested action,
+        #    in which case we specify a OpenMLNotAuthorizedError.
+        by_other_user = [323, 353, 393, 453, 594]
+        has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595]
+        unknown_reason = [325, 355, 394, 455, 593]
+        if e.code in by_other_user:
+            raise openml.exceptions.OpenMLNotAuthorizedError(
+                message=(
+                    f"The {entity_type} can not be deleted because it was not uploaded by you."
+                ),
+            ) from e
+        if e.code in has_dependent_entities:
+            raise openml.exceptions.OpenMLNotAuthorizedError(
+                message=(
+                    f"The {entity_type} can not be deleted because "
+                    f"it still has associated entities: {e.message}"
+                ),
+            ) from e
+        if e.code in unknown_reason:
+            raise openml.exceptions.OpenMLServerError(
+                message=(
+                    f"The {entity_type} can not be deleted for unknown reason,"
+                    " please open an issue at: https://github.com/openml/openml/issues/new"
+                ),
+            ) from e
+        raise e
+
+
+def _list_all(  # noqa: C901
+    listing_call: Callable[[int, int], _SizedT],
+    *,
+    limit: int | None = None,
+    offset: int | None = None,
+    batch_size: int | None = 10_000,
+) -> list[_SizedT]:
+    """Helper to handle paged listing requests.
+
+    Example usage:
+
+    ``evaluations = list_all(list_evaluations, "predictive_accuracy", task=mytask)``
+
+    Parameters
+    ----------
+    listing_call : callable
+        Call listing, e.g. list_evaluations. Takes two positional
+        arguments: batch_size and offset.
+    batch_size : int, optional
+        The batch size to use for the listing call.
+    offset : int, optional
+        The initial offset to use for the listing call.
+    limit : int, optional
+        The total size of the listing. If not provided, the function will
+        request the first batch and then continue until no more results are
+        returned
+
+    Returns
+    -------
+    List of types returned from type of the listing call
+    """
+    page = 0
+    results: list[_SizedT] = []
+
+    offset = offset if offset is not None else 0
+    batch_size = batch_size if batch_size is not None else 10_000
+
+    LIMIT = limit
+    BATCH_SIZE_ORIG = batch_size
+
+    # Default batch size per paging.
+    # This one can be set in filters (batch_size), but should not be
+    # changed afterwards. The derived batch_size can be changed.
+    if not isinstance(BATCH_SIZE_ORIG, int):
+        raise ValueError(f"'batch_size' should be an integer but got {BATCH_SIZE_ORIG}")
+
+    if (LIMIT is not None) and (not isinstance(LIMIT, int)) and (not np.isinf(LIMIT)):
+        raise ValueError(f"'limit' should be an integer or inf but got {LIMIT}")
+
+    # If our batch size is larger than the limit, we should only
+    # request one batch of size of LIMIT
+    if LIMIT is not None and BATCH_SIZE_ORIG > LIMIT:
+        BATCH_SIZE_ORIG = LIMIT
+
+    if not isinstance(offset, int):
+        raise ValueError(f"'offset' should be an integer but got {offset}")
+
+    batch_size = BATCH_SIZE_ORIG
+    while True:
+        try:
+            current_offset = offset + BATCH_SIZE_ORIG * page
+            new_batch = listing_call(batch_size, current_offset)
+        except openml.exceptions.OpenMLServerNoResult:
+            # NOTE: This above statement may not actually happen, but we could just return here
+            # to enforce it...
+            break
+
+        results.append(new_batch)
+
+        # If the batch is less than our requested batch_size, that's the last batch
+        # and we can bail out.
+        if len(new_batch) < batch_size:
+            break
+
+        page += 1
+        if LIMIT is not None:
+            # check if the number of required results has been achieved
+            # always do a 'bigger than' check,
+            # in case of bugs to prevent infinite loops
+            n_received = sum(len(result) for result in results)
+            if n_received >= LIMIT:
+                break
+
+            # check if there are enough results to fulfill a batch
+            if LIMIT - n_received < BATCH_SIZE_ORIG:
+                batch_size = LIMIT - n_received
+
+    return results
+
+
+def _get_cache_dir_for_key(key: str) -> Path:
+    return Path(config.get_cache_directory()) / key
+
+
+def _create_cache_directory(key: str) -> Path:
+    cache_dir = _get_cache_dir_for_key(key)
+
+    try:
+        cache_dir.mkdir(exist_ok=True, parents=True)
+    except Exception as e:
+        raise openml.exceptions.OpenMLCacheException(
+            f"Cannot create cache directory {cache_dir}."
+        ) from e
+
+    return cache_dir
+
+
+def _get_cache_dir_for_id(key: str, id_: int, create: bool = False) -> Path:  # noqa: FBT001, FBT002
+    cache_dir = _create_cache_directory(key) if create else _get_cache_dir_for_key(key)
+    return Path(cache_dir) / str(id_)
+
+
+def _create_cache_directory_for_id(key: str, id_: int) -> Path:
+    """Create the cache directory for a specific ID
+
+    In order to have a clearer cache structure and because every task
+    is cached in several files (description, split), there
+    is a directory for each task witch the task ID being the directory
+    name. This function creates this cache directory.
+
+    This function is NOT thread/multiprocessing safe.
+
+    Parameters
+    ----------
+    key : str
+
+    id_ : int
+
+    Returns
+    -------
+    cache_dir : Path
+        Path of the created dataset cache directory.
+    """
+    cache_dir = _get_cache_dir_for_id(key, id_, create=True)
+    if cache_dir.exists() and not cache_dir.is_dir():
+        raise ValueError(f"{key} cache dir exists but is not a directory!")
+
+    cache_dir.mkdir(exist_ok=True, parents=True)
+    return cache_dir
+
+
+def _remove_cache_dir_for_id(key: str, cache_dir: Path) -> None:
+    """Remove the task cache directory
+
+    This function is NOT thread/multiprocessing safe.
+
+    Parameters
+    ----------
+    key : str
+
+    cache_dir : str
+    """
+    try:
+        shutil.rmtree(cache_dir)
+    except OSError as e:
+        raise ValueError(
+            f"Cannot remove faulty {key} cache directory {cache_dir}. Please do this manually!",
+        ) from e
+
+
+def thread_safe_if_oslo_installed(func: Callable[P, R]) -> Callable[P, R]:
+    try:
+        # Currently, importing oslo raises a lot of warning that it will stop working
+        # under python3.8; remove this once they disappear
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            from oslo_concurrency import lockutils
+
+        @wraps(func)
+        def safe_func(*args: P.args, **kwargs: P.kwargs) -> R:
+            # Lock directories use the id that is passed as either positional or keyword argument.
+            id_parameters = [parameter_name for parameter_name in kwargs if "_id" in parameter_name]
+            if len(id_parameters) == 1:
+                id_ = kwargs[id_parameters[0]]
+            elif len(args) > 0:
+                id_ = args[0]
+            else:
+                raise RuntimeError(
+                    f"An id must be specified for {func.__name__}, was passed: ({args}, {kwargs}).",
+                )
+            # The [7:] gets rid of the 'openml.' prefix
+            lock_name = f"{func.__module__[7:]}.{func.__name__}:{id_}"
+            with lockutils.external_lock(name=lock_name, lock_path=_create_lockfiles_dir()):
+                return func(*args, **kwargs)
+
+        return safe_func
+    except ImportError:
+        return func
+
+
+def _create_lockfiles_dir() -> Path:
+    path = Path(config.get_cache_directory()) / "locks"
+    # TODO(eddiebergman): Not sure why this is allowed to error and ignore???
+    with contextlib.suppress(OSError):
+        path.mkdir(exist_ok=True, parents=True)
+    return path
+
+
+class ProgressBar(ProgressType):
+    """Progressbar for MinIO function's `progress` parameter."""
+
+    def __init__(self) -> None:
+        self._object_name = ""
+        self._progress_bar: tqdm | None = None
+
+    def set_meta(self, object_name: str, total_length: int) -> None:
+        """Initializes the progress bar.
+
+        Parameters
+        ----------
+        object_name: str
+          Not used.
+
+        total_length: int
+          File size of the object in bytes.
+        """
+        self._object_name = object_name
+        self._progress_bar = tqdm(total=total_length, unit_scale=True, unit="B")
+
+    def update(self, length: int) -> None:
+        """Updates the progress bar.
+
+        Parameters
+        ----------
+        length: int
+          Number of bytes downloaded since last `update` call.
+        """
+        if not self._progress_bar:
+            raise RuntimeError("Call `set_meta` before calling `update`.")
+        self._progress_bar.update(length)
+        if self._progress_bar.total <= self._progress_bar.n:
+            self._progress_bar.close()

From 91c138cfc54badcfd0be81f8de704b6e38fcb4cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Fri, 9 Jan 2026 21:27:45 +0100
Subject: [PATCH 2/4] Delete utils.py

---
 openml/utils.py | 471 ------------------------------------------------
 1 file changed, 471 deletions(-)
 delete mode 100644 openml/utils.py

diff --git a/openml/utils.py b/openml/utils.py
deleted file mode 100644
index 7e72e7aee..000000000
--- a/openml/utils.py
+++ /dev/null
@@ -1,471 +0,0 @@
-# License: BSD 3-Clause
-from __future__ import annotations
-
-import contextlib
-import shutil
-import warnings
-from functools import wraps
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Mapping, Sized, TypeVar, overload
-from typing_extensions import Literal, ParamSpec
-
-import numpy as np
-import xmltodict
-from minio.helpers import ProgressType
-from tqdm import tqdm
-
-import openml
-import openml._api_calls
-import openml.exceptions
-
-from . import config
-
-# Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
-if TYPE_CHECKING:
-    from openml.base import OpenMLBase
-
-    P = ParamSpec("P")
-    R = TypeVar("R")
-    _SizedT = TypeVar("_SizedT", bound=Sized)
-
-
-@overload
-def extract_xml_tags(
-    xml_tag_name: str,
-    node: Mapping[str, Any],
-    *,
-    allow_none: Literal[True] = ...,
-) -> Any | None: ...
-
-
-@overload
-def extract_xml_tags(
-    xml_tag_name: str,
-    node: Mapping[str, Any],
-    *,
-    allow_none: Literal[False],
-) -> Any: ...
-
-
-def extract_xml_tags(
-    xml_tag_name: str,
-    node: Mapping[str, Any],
-    *,
-    allow_none: bool = True,
-) -> Any | None:
-    """Helper to extract xml tags from xmltodict.
-
-    Parameters
-    ----------
-    xml_tag_name : str
-        Name of the xml tag to extract from the node.
-
-    node : Mapping[str, Any]
-        Node object returned by ``xmltodict`` from which ``xml_tag_name``
-        should be extracted.
-
-    allow_none : bool
-        If ``False``, the tag needs to exist in the node. Will raise a
-        ``ValueError`` if it does not.
-
-    Returns
-    -------
-    object
-    """
-    if xml_tag_name in node and node[xml_tag_name] is not None:
-        if isinstance(node[xml_tag_name], (dict, str)):
-            return [node[xml_tag_name]]
-        if isinstance(node[xml_tag_name], list):
-            return node[xml_tag_name]
-
-        raise ValueError("Received not string and non list as tag item")
-
-    if allow_none:
-        return None
-
-    raise ValueError(f"Could not find tag '{xml_tag_name}' in node '{node!s}'")
-
-
-def _get_rest_api_type_alias(oml_object: OpenMLBase) -> str:
-    """Return the alias of the openml entity as it is defined for the REST API."""
-    rest_api_mapping: list[tuple[type | tuple, str]] = [
-        (openml.datasets.OpenMLDataset, "data"),
-        (openml.flows.OpenMLFlow, "flow"),
-        (openml.tasks.OpenMLTask, "task"),
-        (openml.runs.OpenMLRun, "run"),
-        ((openml.study.OpenMLStudy, openml.study.OpenMLBenchmarkSuite), "study"),
-    ]
-    _, api_type_alias = next(
-        (python_type, api_alias)
-        for (python_type, api_alias) in rest_api_mapping
-        if isinstance(oml_object, python_type)
-    )
-    return api_type_alias
-
-
-def _tag_openml_base(oml_object: OpenMLBase, tag: str, untag: bool = False) -> None:  # noqa: FBT001, FBT002
-    api_type_alias = _get_rest_api_type_alias(oml_object)
-    if oml_object.id is None:
-        raise openml.exceptions.ObjectNotPublishedError(
-            f"Cannot tag an {api_type_alias} that has not been published yet."
-            "Please publish the object first before being able to tag it."
-            f"\n{oml_object}",
-        )
-    _tag_entity(entity_type=api_type_alias, entity_id=oml_object.id, tag=tag, untag=untag)
-
-
-def _tag_entity(entity_type: str, entity_id: int, tag: str, *, untag: bool = False) -> list[str]:
-    """
-    Function that tags or untags a given entity on OpenML. As the OpenML
-    API tag functions all consist of the same format, this function covers
-    all entity types (currently: dataset, task, flow, setup, run). Could
-    be used in a partial to provide dataset_tag, dataset_untag, etc.
-
-    Parameters
-    ----------
-    entity_type : str
-        Name of the entity to tag (e.g., run, flow, data)
-
-    entity_id : int
-        OpenML id of the entity
-
-    tag : str
-        The tag
-
-    untag : bool
-        Set to true if needed to untag, rather than tag
-
-    Returns
-    -------
-    tags : list
-        List of tags that the entity is (still) tagged with
-    """
-    legal_entities = {"data", "task", "flow", "setup", "run"}
-    if entity_type not in legal_entities:
-        raise ValueError(f"Can't tag a {entity_type}")
-
-    if untag:
-        uri = f"{entity_type}/untag"
-        main_tag = f"oml:{entity_type}_untag"
-    else:
-        uri = f"{entity_type}/tag"
-        main_tag = f"oml:{entity_type}_tag"
-
-    result_xml = openml._api_calls._perform_api_call(
-        uri,
-        "post",
-        {f"{entity_type}_id": entity_id, "tag": tag},
-    )
-
-    result = xmltodict.parse(result_xml, force_list={"oml:tag"})[main_tag]
-
-    if "oml:tag" in result:
-        return result["oml:tag"]  # type: ignore
-
-    # no tags, return empty list
-    return []
-
-
-# TODO(eddiebergman): Maybe this can be made more specific with a Literal
-def _delete_entity(entity_type: str, entity_id: int) -> bool:
-    """
-    Function that deletes a given entity on OpenML. As the OpenML
-    API tag functions all consist of the same format, this function covers
-    all entity types that can be deleted (currently: dataset, task, flow,
-    run, study and user).
-
-    Parameters
-    ----------
-    entity_type : str
-        Name of the entity to tag (e.g., run, flow, data)
-
-    entity_id : int
-        OpenML id of the entity
-
-    Returns
-    -------
-    bool
-        True iff the deletion was successful. False otherwse
-    """
-    legal_entities = {
-        "data",
-        "flow",
-        "task",
-        "run",
-        "study",
-        "user",
-    }
-    if entity_type not in legal_entities:
-        raise ValueError(f"Can't delete a {entity_type}")
-
-    url_suffix = "%s/%d" % (entity_type, entity_id)
-    try:
-        result_xml = openml._api_calls._perform_api_call(url_suffix, "delete")
-        result = xmltodict.parse(result_xml)
-        return f"oml:{entity_type}_delete" in result
-    except openml.exceptions.OpenMLServerException as e:
-        # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php
-        # Most exceptions are descriptive enough to be raised as their standard
-        # OpenMLServerException, however there are two cases where we add information:
-        #  - a generic "failed" message, we direct them to the right issue board
-        #  - when the user successfully authenticates with the server,
-        #    but user is not allowed to take the requested action,
-        #    in which case we specify a OpenMLNotAuthorizedError.
-        by_other_user = [323, 353, 393, 453, 594]
-        has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595]
-        unknown_reason = [325, 355, 394, 455, 593]
-        if e.code in by_other_user:
-            raise openml.exceptions.OpenMLNotAuthorizedError(
-                message=(
-                    f"The {entity_type} can not be deleted because it was not uploaded by you."
-                ),
-            ) from e
-        if e.code in has_dependent_entities:
-            raise openml.exceptions.OpenMLNotAuthorizedError(
-                message=(
-                    f"The {entity_type} can not be deleted because "
-                    f"it still has associated entities: {e.message}"
-                ),
-            ) from e
-        if e.code in unknown_reason:
-            raise openml.exceptions.OpenMLServerError(
-                message=(
-                    f"The {entity_type} can not be deleted for unknown reason,"
-                    " please open an issue at: https://github.com/openml/openml/issues/new"
-                ),
-            ) from e
-        raise e
-
-
-def _list_all(  # noqa: C901
-    listing_call: Callable[[int, int], _SizedT],
-    *,
-    limit: int | None = None,
-    offset: int | None = None,
-    batch_size: int | None = 10_000,
-) -> list[_SizedT]:
-    """Helper to handle paged listing requests.
-
-    Example usage:
-
-    ``evaluations = list_all(list_evaluations, "predictive_accuracy", task=mytask)``
-
-    Parameters
-    ----------
-    listing_call : callable
-        Call listing, e.g. list_evaluations. Takes two positional
-        arguments: batch_size and offset.
-    batch_size : int, optional
-        The batch size to use for the listing call.
-    offset : int, optional
-        The initial offset to use for the listing call.
-    limit : int, optional
-        The total size of the listing. If not provided, the function will
-        request the first batch and then continue until no more results are
-        returned
-
-    Returns
-    -------
-    List of types returned from type of the listing call
-    """
-    page = 0
-    results: list[_SizedT] = []
-
-    offset = offset if offset is not None else 0
-    batch_size = batch_size if batch_size is not None else 10_000
-
-    LIMIT = limit
-    BATCH_SIZE_ORIG = batch_size
-
-    # Default batch size per paging.
-    # This one can be set in filters (batch_size), but should not be
-    # changed afterwards. The derived batch_size can be changed.
-    if not isinstance(BATCH_SIZE_ORIG, int):
-        raise ValueError(f"'batch_size' should be an integer but got {BATCH_SIZE_ORIG}")
-
-    if (LIMIT is not None) and (not isinstance(LIMIT, int)) and (not np.isinf(LIMIT)):
-        raise ValueError(f"'limit' should be an integer or inf but got {LIMIT}")
-
-    # If our batch size is larger than the limit, we should only
-    # request one batch of size of LIMIT
-    if LIMIT is not None and BATCH_SIZE_ORIG > LIMIT:
-        BATCH_SIZE_ORIG = LIMIT
-
-    if not isinstance(offset, int):
-        raise ValueError(f"'offset' should be an integer but got {offset}")
-
-    batch_size = BATCH_SIZE_ORIG
-    while True:
-        try:
-            current_offset = offset + BATCH_SIZE_ORIG * page
-            new_batch = listing_call(batch_size, current_offset)
-        except openml.exceptions.OpenMLServerNoResult:
-            # NOTE: This above statement may not actually happen, but we could just return here
-            # to enforce it...
-            break
-
-        results.append(new_batch)
-
-        # If the batch is less than our requested batch_size, that's the last batch
-        # and we can bail out.
-        if len(new_batch) < batch_size:
-            break
-
-        page += 1
-        if LIMIT is not None:
-            # check if the number of required results has been achieved
-            # always do a 'bigger than' check,
-            # in case of bugs to prevent infinite loops
-            n_received = sum(len(result) for result in results)
-            if n_received >= LIMIT:
-                break
-
-            # check if there are enough results to fulfill a batch
-            if LIMIT - n_received < BATCH_SIZE_ORIG:
-                batch_size = LIMIT - n_received
-
-    return results
-
-
-def _get_cache_dir_for_key(key: str) -> Path:
-    return Path(config.get_cache_directory()) / key
-
-
-def _create_cache_directory(key: str) -> Path:
-    cache_dir = _get_cache_dir_for_key(key)
-
-    try:
-        cache_dir.mkdir(exist_ok=True, parents=True)
-    except Exception as e:
-        raise openml.exceptions.OpenMLCacheException(
-            f"Cannot create cache directory {cache_dir}."
-        ) from e
-
-    return cache_dir
-
-
-def _get_cache_dir_for_id(key: str, id_: int, create: bool = False) -> Path:  # noqa: FBT001, FBT002
-    cache_dir = _create_cache_directory(key) if create else _get_cache_dir_for_key(key)
-    return Path(cache_dir) / str(id_)
-
-
-def _create_cache_directory_for_id(key: str, id_: int) -> Path:
-    """Create the cache directory for a specific ID
-
-    In order to have a clearer cache structure and because every task
-    is cached in several files (description, split), there
-    is a directory for each task witch the task ID being the directory
-    name. This function creates this cache directory.
-
-    This function is NOT thread/multiprocessing safe.
-
-    Parameters
-    ----------
-    key : str
-
-    id_ : int
-
-    Returns
-    -------
-    cache_dir : Path
-        Path of the created dataset cache directory.
-    """
-    cache_dir = _get_cache_dir_for_id(key, id_, create=True)
-    if cache_dir.exists() and not cache_dir.is_dir():
-        raise ValueError(f"{key} cache dir exists but is not a directory!")
-
-    cache_dir.mkdir(exist_ok=True, parents=True)
-    return cache_dir
-
-
-def _remove_cache_dir_for_id(key: str, cache_dir: Path) -> None:
-    """Remove the task cache directory
-
-    This function is NOT thread/multiprocessing safe.
-
-    Parameters
-    ----------
-    key : str
-
-    cache_dir : str
-    """
-    try:
-        shutil.rmtree(cache_dir)
-    except OSError as e:
-        raise ValueError(
-            f"Cannot remove faulty {key} cache directory {cache_dir}. Please do this manually!",
-        ) from e
-
-
-def thread_safe_if_oslo_installed(func: Callable[P, R]) -> Callable[P, R]:
-    try:
-        # Currently, importing oslo raises a lot of warning that it will stop working
-        # under python3.8; remove this once they disappear
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            from oslo_concurrency import lockutils
-
-        @wraps(func)
-        def safe_func(*args: P.args, **kwargs: P.kwargs) -> R:
-            # Lock directories use the id that is passed as either positional or keyword argument.
-            id_parameters = [parameter_name for parameter_name in kwargs if "_id" in parameter_name]
-            if len(id_parameters) == 1:
-                id_ = kwargs[id_parameters[0]]
-            elif len(args) > 0:
-                id_ = args[0]
-            else:
-                raise RuntimeError(
-                    f"An id must be specified for {func.__name__}, was passed: ({args}, {kwargs}).",
-                )
-            # The [7:] gets rid of the 'openml.' prefix
-            lock_name = f"{func.__module__[7:]}.{func.__name__}:{id_}"
-            with lockutils.external_lock(name=lock_name, lock_path=_create_lockfiles_dir()):
-                return func(*args, **kwargs)
-
-        return safe_func
-    except ImportError:
-        return func
-
-
-def _create_lockfiles_dir() -> Path:
-    path = Path(config.get_cache_directory()) / "locks"
-    # TODO(eddiebergman): Not sure why this is allowed to error and ignore???
-    with contextlib.suppress(OSError):
-        path.mkdir(exist_ok=True, parents=True)
-    return path
-
-
-class ProgressBar(ProgressType):
-    """Progressbar for MinIO function's `progress` parameter."""
-
-    def __init__(self) -> None:
-        self._object_name = ""
-        self._progress_bar: tqdm | None = None
-
-    def set_meta(self, object_name: str, total_length: int) -> None:
-        """Initializes the progress bar.
-
-        Parameters
-        ----------
-        object_name: str
-          Not used.
-
-        total_length: int
-          File size of the object in bytes.
-        """
-        self._object_name = object_name
-        self._progress_bar = tqdm(total=total_length, unit_scale=True, unit="B")
-
-    def update(self, length: int) -> None:
-        """Updates the progress bar.
-
-        Parameters
-        ----------
-        length: int
-          Number of bytes downloaded since last `update` call.
-        """
-        if not self._progress_bar:
-            raise RuntimeError("Call `set_meta` before calling `update`.")
-        self._progress_bar.update(length)
-        if self._progress_bar.total <= self._progress_bar.n:
-            self._progress_bar.close()

From abc0f4929285422427bdd55dd93cc81322ae2bc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Fri, 9 Jan 2026 21:29:44 +0100
Subject: [PATCH 3/4] Update _openml.py

---
 openml/utils/_openml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/utils/_openml.py b/openml/utils/_openml.py
index 7e72e7aee..d86a62ce7 100644
--- a/openml/utils/_openml.py
+++ b/openml/utils/_openml.py
@@ -18,7 +18,7 @@
 import openml._api_calls
 import openml.exceptions
 
-from . import config
+from openml import config
 
 # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
 if TYPE_CHECKING:

From a39bddc9099bfb0896aa9d7e580bcd28685b5dba Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 9 Jan 2026 20:31:40 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openml/utils/_openml.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/openml/utils/_openml.py b/openml/utils/_openml.py
index d86a62ce7..f20aedcca 100644
--- a/openml/utils/_openml.py
+++ b/openml/utils/_openml.py
@@ -17,7 +17,6 @@
 import openml
 import openml._api_calls
 import openml.exceptions
-
 from openml import config
 
 # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles