From 071b86be5d2a625a253b5ba5ee91d94082a85084 Mon Sep 17 00:00:00 2001 From: Eman Abdelhaleem Date: Mon, 29 Dec 2025 12:22:50 +0200 Subject: [PATCH 1/4] add example, default to get_flow docstring --- openml/flows/functions.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/openml/flows/functions.py b/openml/flows/functions.py index 9906958e5..970e881cb 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -78,16 +78,32 @@ def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = T flow_id : int The OpenML flow id. - reinstantiate: bool + reinstantiate: bool, optional, default=False Whether to reinstantiate the flow to a model instance. - strict_version : bool, default=True + strict_version : bool, optional, default=True Whether to fail if version requirements are not fulfilled. Returns ------- flow : OpenMLFlow the flow + + Examples + -------- + >>> import openml + Download the flow with flow_id = 5 + >>> flow_id = 5 + >>> flow = openml.flows.get_flow() + >>> print(flow) + OpenML Flow + =========== + Flow ID.........: 5 (version 1) + Flow URL........: https://www.openml.org/f/5 + Flow Name.......: openml.evaluation.average_cost + Flow Description: An implementation of the evaluation measure "average_cost" + Upload Date.....: 2014-01-16 14:12:56 + Dependencies....: Build on top of Weka API (Jar version 3.?.?) """ flow_id = int(flow_id) flow = _get_flow_description(flow_id) From 230733e6bba1c9dcb80533dc57e1a564095b7692 Mon Sep 17 00:00:00 2001 From: Eman Abdelhaleem Date: Mon, 29 Dec 2025 12:31:42 +0200 Subject: [PATCH 2/4] ADD examples to list_flows docstring --- openml/flows/functions.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/openml/flows/functions.py b/openml/flows/functions.py index 970e881cb..4fa8ea28f 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -180,6 +180,13 @@ def list_flows( - version - external version - uploader + + Examples + -------- + >>> import openml + >>> flows = openml.flows.list_flows() + A df of all flows which are on openML is returned + [123519 rows x 6 columns] """ listing_call = partial(_list_flows, tag=tag, uploader=uploader) batches = openml.utils._list_all(listing_call, offset=offset, limit=size) From 3a9c3cca5e47ed8f3c6ed35ac0db87dd12c230a0 Mon Sep 17 00:00:00 2001 From: Eman Abdelhaleem Date: Mon, 29 Dec 2025 12:42:34 +0200 Subject: [PATCH 3/4] ADD examples to flow_exists docstring --- openml/flows/functions.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/openml/flows/functions.py b/openml/flows/functions.py index 4fa8ea28f..7eb979ecd 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -245,6 +245,14 @@ def flow_exists(name: str, external_version: str) -> int | bool: flow_exist : int or bool flow id iff exists, False otherwise + Examples + -------- + >>> import openml + >>> flow = openml.flows.flow_exists(name = "openml.evaluation.EuclideanDistance", \ + >>> external_version = "1") + >>> print(flow) + + Notes ----- see https://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version From 62cab85e1e57a445dd520299b19e6294fb16bedd Mon Sep 17 00:00:00 2001 From: Eman Abdelhaleem Date: Thu, 8 Jan 2026 23:11:19 +0200 Subject: [PATCH 4/4] [DOC] Enhance Docstrings of Flows Core Public Functions --- openml/flows/functions.py | 237 +++++++++++++++++++++++++------------- 1 file changed, 160 insertions(+), 77 deletions(-) diff --git a/openml/flows/functions.py b/openml/flows/functions.py index 7eb979ecd..c2755c595 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -71,39 +71,59 @@ def _get_cached_flow(fid: int) -> OpenMLFlow: @openml.utils.thread_safe_if_oslo_installed def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = True) -> OpenMLFlow: # noqa: FBT001, FBT002 - """Download the OpenML flow for a given flow ID. + """Fetch an OpenMLFlow by its server-assigned ID. + + Queries the OpenML REST API for the flow metadata and returns an + :class:`OpenMLFlow` instance. If the flow is already cached locally, + the cached copy is returned. Optionally the flow can be re-instantiated + into a concrete model instance using the registered extension. Parameters ---------- flow_id : int The OpenML flow id. - - reinstantiate: bool, optional, default=False - Whether to reinstantiate the flow to a model instance. - - strict_version : bool, optional, default=True - Whether to fail if version requirements are not fulfilled. + reinstantiate : bool, optional (default=False) + If True, convert the flow description into a concrete model instance + using the flow's extension (e.g., sklearn). If conversion fails and + ``strict_version`` is True, an exception will be raised. + strict_version : bool, optional (default=True) + When ``reinstantiate`` is True, whether to enforce exact version + requirements for the extension/model. If False, a fallback flow may + be returned when versions differ. Returns ------- - flow : OpenMLFlow - the flow + OpenMLFlow + The flow object with metadata; ``model`` may be populated when + ``reinstantiate=True``. + + Raises + ------ + OpenMLCacheException + When cached flow files are corrupted or cannot be read. + OpenMLServerException + When the REST API call fails. + + Side Effects + ------------ + - Writes to ``openml.config.cache_directory/flows/{flow_id}/flow.xml`` + when the flow is downloaded from the server. + + Preconditions + ------------- + - Network access to the OpenML server is required unless the flow is cached. + - For private flows, ``openml.config.apikey`` must be set. + + Notes + ----- + Results are cached to speed up subsequent calls. When ``reinstantiate`` is + True and version mismatches occur, a new flow may be returned to reflect + the converted model (only when ``strict_version`` is False). Examples -------- >>> import openml - Download the flow with flow_id = 5 - >>> flow_id = 5 - >>> flow = openml.flows.get_flow() - >>> print(flow) - OpenML Flow - =========== - Flow ID.........: 5 (version 1) - Flow URL........: https://www.openml.org/f/5 - Flow Name.......: openml.evaluation.average_cost - Flow Description: An implementation of the evaluation measure "average_cost" - Upload Date.....: 2014-01-16 14:12:56 - Dependencies....: Build on top of Weka API (Jar version 3.?.?) + >>> flow = openml.flows.get_flow(5) # doctest: +SKIP """ flow_id = int(flow_id) flow = _get_flow_description(flow_id) @@ -154,39 +174,47 @@ def list_flows( tag: str | None = None, uploader: str | None = None, ) -> pd.DataFrame: - """ - Return a list of all flows which are on OpenML. - (Supports large amount of results) + """List flows available on the OpenML server. + + This function supports paging and filtering and returns a pandas + DataFrame with one row per flow and columns for id, name, version, + external_version, full_name and uploader. Parameters ---------- offset : int, optional - the number of flows to skip, starting from the first + Number of flows to skip, starting from the first (for paging). size : int, optional - the maximum number of flows to return + Maximum number of flows to return. tag : str, optional - the tag to include - kwargs: dict, optional - Legal filter operators: uploader. + Only return flows having this tag. + uploader : str, optional + Only return flows uploaded by this user. Returns ------- - flows : dataframe - Each row maps to a dataset - Each column contains the following information: - - flow id - - full name - - name - - version - - external version - - uploader + pandas.DataFrame + Rows correspond to flows. Columns include ``id``, ``full_name``, + ``name``, ``version``, ``external_version``, and ``uploader``. + + Raises + ------ + OpenMLServerException + When the API call fails. + + Side Effects + ------------ + - None: results are fetched and returned; no local state is modified. + + Preconditions + ------------- + - Network access is required to list flows unless cached mechanisms are + used by the underlying API helper. Examples -------- >>> import openml - >>> flows = openml.flows.list_flows() - A df of all flows which are on openML is returned - [123519 rows x 6 columns] + >>> flows = openml.flows.list_flows(size=100) # doctest: +SKIP """ listing_call = partial(_list_flows, tag=tag, uploader=uploader) batches = openml.utils._list_all(listing_call, offset=offset, limit=size) @@ -229,33 +257,35 @@ def _list_flows(limit: int, offset: int, **kwargs: Any) -> pd.DataFrame: def flow_exists(name: str, external_version: str) -> int | bool: - """Retrieves the flow id. + """Check whether a flow (name + external_version) exists on the server. - A flow is uniquely identified by name + external_version. + The OpenML server defines uniqueness of flows by the pair + ``(name, external_version)``. This helper queries the server and + returns the corresponding flow id when present. Parameters ---------- - name : string - Name of the flow - external_version : string + name : str + Flow name (e.g., ``sklearn.tree._classes.DecisionTreeClassifier(1)``). + external_version : str Version information associated with flow. Returns ------- - flow_exist : int or bool - flow id iff exists, False otherwise + int or bool + The flow id if the flow exists on the server, otherwise ``False``. + + Raises + ------ + ValueError + If ``name`` or ``external_version`` are empty or not strings. + OpenMLServerException + When the API request fails. Examples -------- >>> import openml - >>> flow = openml.flows.flow_exists(name = "openml.evaluation.EuclideanDistance", \ - >>> external_version = "1") - >>> print(flow) - - - Notes - ----- - see https://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version + >>> openml.flows.flow_exists("weka.JRip", "Weka_3.9.0_10153") # doctest: +SKIP """ if not (isinstance(name, str) and len(name) > 0): raise ValueError("Argument 'name' should be a non-empty string") @@ -278,35 +308,58 @@ def get_flow_id( name: str | None = None, exact_version: bool = True, # noqa: FBT001, FBT002 ) -> int | bool | list[int]: - """Retrieves the flow id for a model or a flow name. + """Retrieve flow id(s) for a model instance or a flow name. - Provide either a model or a name to this function. Depending on the input, it does + Provide either a concrete ``model`` (which will be converted to a flow by + the appropriate extension) or a flow ``name``. Behavior depends on + ``exact_version``: - * ``model`` and ``exact_version == True``: This helper function first queries for the necessary - extension. Second, it uses that extension to convert the model into a flow. Third, it - executes ``flow_exists`` to potentially obtain the flow id the flow is published to the - server. - * ``model`` and ``exact_version == False``: This helper function first queries for the - necessary extension. Second, it uses that extension to convert the model into a flow. Third - it calls ``list_flows`` and filters the returned values based on the flow name. - * ``name``: Ignores ``exact_version`` and calls ``list_flows``, then filters the returned - values based on the flow name. + - ``model`` + ``exact_version=True``: convert ``model`` to a flow and call + :func:`flow_exists` to get a single flow id (or False). + - ``model`` + ``exact_version=False``: convert ``model`` to a flow and + return all server flow ids with the same flow name. + - ``name``: ignore ``exact_version`` and return all server flow ids that + match ``name``. Parameters ---------- - model : object - Any model. Must provide either ``model`` or ``name``. - name : str - Name of the flow. Must provide either ``model`` or ``name``. - exact_version : bool - Whether to return the flow id of the exact version or all flow ids where the name - of the flow matches. This is only taken into account for a model where a version number - is available (requires ``model`` to be set). + model : object, optional + A model instance that can be handled by a registered extension. Either + ``model`` or ``name`` must be provided. + name : str, optional + Flow name to query for. Either ``model`` or ``name`` must be provided. + exact_version : bool, optional (default=True) + When True and ``model`` is provided, only return the id for the exact + external version. When False, return a list of matching ids. Returns ------- - int or bool, List - flow id iff exists, ``False`` otherwise, List if ``exact_version is False`` + int or bool or list[int] + If ``exact_version`` is True: the flow id if found, otherwise ``False``. + If ``exact_version`` is False: a list of matching flow ids (may be empty). + + Raises + ------ + ValueError + If neither ``model`` nor ``name`` is provided, or if both are provided. + OpenMLServerException + If underlying API calls fail. + + Side Effects + ------------ + - May call server APIs (``flow/exists``, ``flow/list``) and therefore + depends on network access and API keys for private flows. + + Examples + -------- + >>> import openml + >>> # Lookup by flow name + >>> openml.flows.get_flow_id(name="weka.JRip") # doctest: +SKIP + >>> # Lookup by model instance (requires a registered extension) + >>> import sklearn + >>> import openml_sklearn + >>> clf = sklearn.tree.DecisionTreeClassifier() + >>> openml.flows.get_flow_id(model=clf) # doctest: +SKIP """ if model is not None and name is not None: raise ValueError("Must provide either argument `model` or argument `name`, but not both.") @@ -422,6 +475,21 @@ def assert_flows_equal( # noqa: C901, PLR0912, PLR0913, PLR0915 check_description : bool Whether to ignore matching of flow descriptions. + + Raises + ------ + TypeError + When either argument is not an :class:`OpenMLFlow`. + ValueError + When a relevant mismatch is found between the two flows. + + Examples + -------- + >>> import openml + >>> f1 = openml.flows.get_flow(5) # doctest: +SKIP + >>> f2 = openml.flows.get_flow(5) # doctest: +SKIP + >>> openml.flows.assert_flows_equal(f1, f2) # doctest: +SKIP + >>> # If flows differ, a ValueError is raised """ if not isinstance(flow1, OpenMLFlow): raise TypeError(f"Argument 1 must be of type OpenMLFlow, but is {type(flow1)}") @@ -581,5 +649,20 @@ def delete_flow(flow_id: int) -> bool: ------- bool True if the deletion was successful. False otherwise. + + Raises + ------ + OpenMLServerException + If the server-side deletion fails due to permissions or other errors. + + Side Effects + ------------ + - Removes the flow from the OpenML server (if permitted). + + Examples + -------- + >>> import openml + >>> # Deletes flow 23 if you are the uploader and it's not linked to runs + >>> openml.flows.delete_flow(23) # doctest: +SKIP """ return openml.utils._delete_entity("flow", flow_id)