Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions openml/_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from openml._api.runtime.core import APIContext


def set_api_version(version: str, *, strict: bool = False) -> None:
api_context.set_version(version=version, strict=strict)


api_context = APIContext()
5 changes: 5 additions & 0 deletions openml/_api/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import annotations

API_V1_SERVER = "https://www.openml.org/api/v1/xml"
API_V2_SERVER = "http://127.0.0.1:8001"
API_KEY = "..."
3 changes: 3 additions & 0 deletions openml/_api/http/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from openml._api.http.client import HTTPClient

__all__ = ["HTTPClient"]
39 changes: 39 additions & 0 deletions openml/_api/http/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from typing import Any, Mapping

import requests
from requests import Response

from openml.__version__ import __version__


class HTTPClient:
def __init__(self, base_url: str) -> None:
self.base_url = base_url
self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}

def get(
self,
path: str,
params: Mapping[str, Any] | None = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.get(url, params=params, headers=self.headers, timeout=10)

def post(
self,
path: str,
data: Mapping[str, Any] | None = None,
files: Any = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.post(url, data=data, files=files, headers=self.headers, timeout=10)

def delete(
self,
path: str,
params: Mapping[str, Any] | None = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.delete(url, params=params, headers=self.headers, timeout=10)
Empty file added openml/_api/http/utils.py
Empty file.
15 changes: 15 additions & 0 deletions openml/_api/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from openml._api.resources.datasets import DatasetsV1, DatasetsV2
from openml._api.resources.evaluation_measures import (
EvaluationMeasuresV1,
EvaluationMeasuresV2,
)
from openml._api.resources.tasks import TasksV1, TasksV2

__all__ = [
"DatasetsV1",
"DatasetsV2",
"TasksV1",
"TasksV2",
"EvaluationMeasuresV1",
"EvaluationMeasuresV2",
]
36 changes: 36 additions & 0 deletions openml/_api/resources/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from requests import Response

from openml._api.http import HTTPClient
from openml.datasets.dataset import OpenMLDataset
from openml.tasks.task import OpenMLTask


class ResourceAPI:
def __init__(self, http: HTTPClient):
self._http = http


class DatasetsAPI(ResourceAPI, ABC):
@abstractmethod
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ...


class TasksAPI(ResourceAPI, ABC):
@abstractmethod
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]: ...


class EvaluationMeasuresAPI(ResourceAPI, ABC):
@abstractmethod
def list(self) -> list[str]: ...
20 changes: 20 additions & 0 deletions openml/_api/resources/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from openml._api.resources.base import DatasetsAPI

if TYPE_CHECKING:
from responses import Response

from openml.datasets.dataset import OpenMLDataset


class DatasetsV1(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError


class DatasetsV2(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError
60 changes: 60 additions & 0 deletions openml/_api/resources/evaluation_measures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations

import xmltodict

from openml._api.resources.base import EvaluationMeasuresAPI


class EvaluationMeasuresV1(EvaluationMeasuresAPI):
"""V1 API implementation for evaluation measures.

Fetches evaluation measures from the v1 XML API endpoint.
"""

def list(self) -> list[str]:
"""List all evaluation measures available on OpenML.

Returns
-------
list[str]
A list of evaluation measure names.
"""
path = "evaluationmeasure/list"
response = self._http.get(path)
xml_content = response.text

qualities = xmltodict.parse(xml_content, force_list=("oml:measures"))
# Minimalistic check if the XML is useful
if "oml:evaluation_measures" not in qualities:
raise ValueError('Error in return XML, does not contain "oml:evaluation_measures"')

if not isinstance(
qualities["oml:evaluation_measures"]["oml:measures"][0]["oml:measure"], list
):
raise TypeError('Error in return XML, does not contain "oml:measure" as a list')

return qualities["oml:evaluation_measures"]["oml:measures"][0]["oml:measure"]


class EvaluationMeasuresV2(EvaluationMeasuresAPI):
"""V2 API implementation for evaluation measures.

Fetches evaluation measures from the v2 JSON API endpoint.
"""

def list(self) -> list[str]:
"""List all evaluation measures available on OpenML.

Returns
-------
list[str]
A list of evaluation measure names.
"""
path = "evaluationmeasure/list"
response = self._http.get(path)
data = response.json()

if not isinstance(data, list):
raise ValueError(f"Expected list, got {type(data)}")

return data
128 changes: 128 additions & 0 deletions openml/_api/resources/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import xmltodict

from openml._api.resources.base import TasksAPI
from openml.tasks.task import (
OpenMLClassificationTask,
OpenMLClusteringTask,
OpenMLLearningCurveTask,
OpenMLRegressionTask,
OpenMLTask,
TaskType,
)

if TYPE_CHECKING:
from requests import Response


class TasksV1(TasksAPI):
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]:
path = f"task/{task_id}"
response = self._http.get(path)
xml_content = response.text
task = self._create_task_from_xml(xml_content)

if return_response:
return task, response

return task

def _create_task_from_xml(self, xml: str) -> OpenMLTask:
"""Create a task given a xml string.

Parameters
----------
xml : string
Task xml representation.

Returns
-------
OpenMLTask
"""
dic = xmltodict.parse(xml)["oml:task"]
estimation_parameters = {}
inputs = {}
# Due to the unordered structure we obtain, we first have to extract
# the possible keys of oml:input; dic["oml:input"] is a list of
# OrderedDicts

# Check if there is a list of inputs
if isinstance(dic["oml:input"], list):
for input_ in dic["oml:input"]:
name = input_["@name"]
inputs[name] = input_
# Single input case
elif isinstance(dic["oml:input"], dict):
name = dic["oml:input"]["@name"]
inputs[name] = dic["oml:input"]

evaluation_measures = None
if "evaluation_measures" in inputs:
evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][
"oml:evaluation_measure"
]

task_type = TaskType(int(dic["oml:task_type_id"]))
common_kwargs = {
"task_id": dic["oml:task_id"],
"task_type": dic["oml:task_type"],
"task_type_id": task_type,
"data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
"evaluation_measure": evaluation_measures,
}
# TODO: add OpenMLClusteringTask?
if task_type in (
TaskType.SUPERVISED_CLASSIFICATION,
TaskType.SUPERVISED_REGRESSION,
TaskType.LEARNING_CURVE,
):
# Convert some more parameters
for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
"oml:parameter"
]:
name = parameter["@name"]
text = parameter.get("#text", "")
estimation_parameters[name] = text

common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][
"oml:estimation_procedure"
]["oml:type"]
common_kwargs["estimation_procedure_id"] = int(
inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"]
)

common_kwargs["estimation_parameters"] = estimation_parameters
common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][
"oml:target_feature"
]
common_kwargs["data_splits_url"] = inputs["estimation_procedure"][
"oml:estimation_procedure"
]["oml:data_splits_url"]

cls = {
TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
TaskType.CLUSTERING: OpenMLClusteringTask,
TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type)
if cls is None:
raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
return cls(**common_kwargs) # type: ignore


class TasksV2(TasksAPI):
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]:
raise NotImplementedError
Empty file added openml/_api/runtime/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions openml/_api/runtime/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from openml._api.config import (
API_V1_SERVER,
API_V2_SERVER,
)
from openml._api.http.client import HTTPClient
from openml._api.resources import (
DatasetsV1,
DatasetsV2,
EvaluationMeasuresV1,
EvaluationMeasuresV2,
TasksV1,
TasksV2,
)

if TYPE_CHECKING:
from openml._api.resources.base import DatasetsAPI, EvaluationMeasuresAPI, TasksAPI


class APIBackend:
def __init__(
self, *, datasets: DatasetsAPI, tasks: TasksAPI, evaluation_measures: EvaluationMeasuresAPI
):
self.datasets = datasets
self.tasks = tasks
self.evaluation_measures = evaluation_measures


def build_backend(version: str, *, strict: bool) -> APIBackend:
v1_http = HTTPClient(API_V1_SERVER)
v2_http = HTTPClient(API_V2_SERVER)

v1 = APIBackend(
datasets=DatasetsV1(v1_http),
tasks=TasksV1(v1_http),
evaluation_measures=EvaluationMeasuresV1(v1_http),
)

if version == "v1":
return v1

v2 = APIBackend(
datasets=DatasetsV2(v2_http),
tasks=TasksV2(v2_http),
evaluation_measures=EvaluationMeasuresV2(v2_http),
)

if strict:
return v2

return v1


class APIContext:
def __init__(self) -> None:
self._backend = build_backend("v1", strict=False)

def set_version(self, version: str, *, strict: bool = False) -> None:
self._backend = build_backend(version=version, strict=strict)

@property
def backend(self) -> APIBackend:
return self._backend
Loading
Loading