From 355939e7c493ee87d8c750fba07a987ba22eb648 Mon Sep 17 00:00:00 2001 From: adamlabadorf Date: Mon, 16 Feb 2026 07:57:37 -0500 Subject: [PATCH 1/5] config: add entry-point-based plugin schema discovery Allow DVC filesystem plugins to declare a REMOTE_CONFIG class attribute on their filesystem class. At import time, _discover_plugin_schemas() iterates over installed dvc.fs entry points and merges any declared config schemas into REMOTE_SCHEMAS, enabling ByUrl to accept custom URL schemes without changes to DVC core. This makes DVC truly extensible for third-party storage backends (e.g. OSF, GitLab packages) that define their own URL schemes. Existing hardcoded schemes are never overwritten. Relates to #9711. --- dvc/config_schema.py | 33 ++++ tests/unit/test_plugin_schema_discovery.py | 175 +++++++++++++++++++++ 2 files changed, 208 insertions(+) create mode 100644 tests/unit/test_plugin_schema_discovery.py diff --git a/dvc/config_schema.py b/dvc/config_schema.py index 07f79fc064..a4c1aac64b 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -1,4 +1,5 @@ import os +from importlib.metadata import entry_points from typing import TYPE_CHECKING from urllib.parse import urlparse @@ -281,6 +282,38 @@ def __call__(self, data): "remote": {str: object}, # Any of the above options are valid } + +def _discover_plugin_schemas(): + """Discover remote config schemas from installed DVC filesystem plugins. + + Plugins can declare a ``REMOTE_CONFIG`` class attribute (a dict of + config keys and their voluptuous validators) on their filesystem class. + This function loads all ``dvc.fs`` entry points, checks for that + attribute, and merges the schema into ``REMOTE_SCHEMAS`` so that + ``ByUrl`` accepts the plugin's URL scheme. + + Existing (hardcoded) schemes are never overwritten. + """ + for ep in entry_points(group="dvc.fs"): + try: + cls = ep.load() + except Exception: # noqa: BLE001 + continue + + remote_config = getattr(cls, "REMOTE_CONFIG", None) + if not remote_config: + continue + + protocol = getattr(cls, "protocol", ep.name) + # protocol may be a string or tuple of strings + schemes = (protocol,) if isinstance(protocol, str) else protocol + for scheme in schemes: + if scheme not in REMOTE_SCHEMAS: + REMOTE_SCHEMAS[scheme] = {**remote_config, **REMOTE_COMMON} + + +_discover_plugin_schemas() + SCHEMA = { "core": { "remote": Lower, diff --git a/tests/unit/test_plugin_schema_discovery.py b/tests/unit/test_plugin_schema_discovery.py new file mode 100644 index 0000000000..01b7e7b768 --- /dev/null +++ b/tests/unit/test_plugin_schema_discovery.py @@ -0,0 +1,175 @@ +"""Tests for plugin-based remote config schema discovery. + +Verifies that DVC filesystem plugins can declare a ``REMOTE_CONFIG`` +class attribute to register their URL scheme and config options with +DVC's config validation, without requiring changes to DVC core. +""" + +from unittest.mock import MagicMock, patch + +import pytest + + +class FakePluginFS: + """Minimal filesystem class that declares REMOTE_CONFIG.""" + + protocol = "myplugin" + REMOTE_CONFIG = { + "token": str, + "endpoint_url": str, + } + + +class FakePluginNoConfig: + """Filesystem class without REMOTE_CONFIG — should be skipped.""" + + protocol = "noplugin" + + +class FakePluginMultiProtocol: + """Filesystem class with tuple protocol.""" + + protocol = ("myproto", "myprotos") + REMOTE_CONFIG = { + "api_key": str, + } + + +def _make_entry_point(name, cls): + """Create a mock entry point that returns the given class on load().""" + ep = MagicMock() + ep.name = name + ep.load.return_value = cls + return ep + + +class TestDiscoverPluginSchemas: + """Tests for _discover_plugin_schemas.""" + + def test_plugin_schema_registered(self): + """A plugin with REMOTE_CONFIG gets its scheme added to REMOTE_SCHEMAS.""" + from dvc.config_schema import REMOTE_COMMON, REMOTE_SCHEMAS + + eps = [_make_entry_point("myplugin", FakePluginFS)] + with patch("dvc.config_schema.entry_points", return_value=eps): + # Clear any prior registration from this test key + REMOTE_SCHEMAS.pop("myplugin", None) + + from dvc.config_schema import _discover_plugin_schemas + + _discover_plugin_schemas() + + assert "myplugin" in REMOTE_SCHEMAS + schema = REMOTE_SCHEMAS["myplugin"] + # Should contain plugin-specific keys + assert "token" in schema + assert "endpoint_url" in schema + # Should contain REMOTE_COMMON keys + assert "url" in schema + + # Cleanup + REMOTE_SCHEMAS.pop("myplugin", None) + + def test_plugin_without_remote_config_skipped(self): + """A plugin without REMOTE_CONFIG is silently skipped.""" + from dvc.config_schema import REMOTE_SCHEMAS + + eps = [_make_entry_point("noplugin", FakePluginNoConfig)] + with patch("dvc.config_schema.entry_points", return_value=eps): + REMOTE_SCHEMAS.pop("noplugin", None) + + from dvc.config_schema import _discover_plugin_schemas + + _discover_plugin_schemas() + + assert "noplugin" not in REMOTE_SCHEMAS + + def test_existing_scheme_not_overwritten(self): + """Hardcoded schemes like 's3' are never overwritten by plugins.""" + from dvc.config_schema import REMOTE_SCHEMAS + + original_s3 = REMOTE_SCHEMAS["s3"].copy() + + class FakeS3: + protocol = "s3" + REMOTE_CONFIG = {"fake_key": str} + + eps = [_make_entry_point("s3", FakeS3)] + with patch("dvc.config_schema.entry_points", return_value=eps): + from dvc.config_schema import _discover_plugin_schemas + + _discover_plugin_schemas() + + # s3 schema should be unchanged + assert "fake_key" not in REMOTE_SCHEMAS["s3"] + assert REMOTE_SCHEMAS["s3"] == original_s3 + + def test_plugin_load_failure_skipped(self): + """Plugins that fail to load are silently skipped.""" + from dvc.config_schema import REMOTE_SCHEMAS + + ep = MagicMock() + ep.name = "broken" + ep.load.side_effect = ImportError("missing dependency") + + with patch("dvc.config_schema.entry_points", return_value=[ep]): + REMOTE_SCHEMAS.pop("broken", None) + + from dvc.config_schema import _discover_plugin_schemas + + _discover_plugin_schemas() + + assert "broken" not in REMOTE_SCHEMAS + + def test_multi_protocol_plugin(self): + """A plugin with tuple protocol registers all schemes.""" + from dvc.config_schema import REMOTE_SCHEMAS + + eps = [_make_entry_point("myproto", FakePluginMultiProtocol)] + with patch("dvc.config_schema.entry_points", return_value=eps): + REMOTE_SCHEMAS.pop("myproto", None) + REMOTE_SCHEMAS.pop("myprotos", None) + + from dvc.config_schema import _discover_plugin_schemas + + _discover_plugin_schemas() + + assert "myproto" in REMOTE_SCHEMAS + assert "myprotos" in REMOTE_SCHEMAS + assert "api_key" in REMOTE_SCHEMAS["myproto"] + assert "api_key" in REMOTE_SCHEMAS["myprotos"] + + # Cleanup + REMOTE_SCHEMAS.pop("myproto", None) + REMOTE_SCHEMAS.pop("myprotos", None) + + +class TestByUrlWithPlugin: + """Integration test: ByUrl accepts plugin-registered schemes.""" + + def test_byurl_validates_plugin_scheme(self): + """ByUrl should accept a URL with a plugin-registered scheme.""" + from dvc.config_schema import ByUrl, REMOTE_COMMON, REMOTE_SCHEMAS + + # Register a fake scheme + REMOTE_SCHEMAS["testplugin"] = {"token": str, **REMOTE_COMMON} + validator = ByUrl(REMOTE_SCHEMAS) + + # Should not raise + result = validator({"url": "testplugin://myhost/path", "token": "abc"}) + assert result["url"] == "testplugin://myhost/path" + assert result["token"] == "abc" + + # Cleanup + REMOTE_SCHEMAS.pop("testplugin", None) + + def test_byurl_rejects_unknown_scheme(self): + """ByUrl should reject an unregistered scheme.""" + from voluptuous import Invalid as VoluptuousInvalid + + from dvc.config_schema import ByUrl, REMOTE_SCHEMAS + + validator = ByUrl(REMOTE_SCHEMAS) + + with pytest.raises(VoluptuousInvalid, match="Unsupported URL type"): + validator({"url": "unknownscheme://host/path"}) From e244e120f7f0c147997b92306acd890fa3f90d09 Mon Sep 17 00:00:00 2001 From: adamlabadorf Date: Fri, 20 Feb 2026 16:22:08 -0500 Subject: [PATCH 2/5] config: add entry-point-based plugin schema discovery Allow DVC filesystem plugins to declare a REMOTE_CONFIG class attribute on their filesystem class. At import time, _discover_plugin_schemas() iterates over installed dvc.fs entry points and merges any declared config schemas into REMOTE_SCHEMAS, enabling ByUrl to accept custom URL schemes without changes to DVC core. This makes DVC truly extensible for third-party storage backends (e.g. OSF, GitLab packages) that define their own URL schemes. Existing hardcoded schemes are never overwritten. Fixes #10993 --- dvc/config_schema.py | 2 +- tests/unit/test_plugin_schema_discovery.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/dvc/config_schema.py b/dvc/config_schema.py index a4c1aac64b..9e2ee3f5ac 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -297,7 +297,7 @@ def _discover_plugin_schemas(): for ep in entry_points(group="dvc.fs"): try: cls = ep.load() - except Exception: # noqa: BLE001 + except Exception: # noqa: BLE001,S112 continue remote_config = getattr(cls, "REMOTE_CONFIG", None) diff --git a/tests/unit/test_plugin_schema_discovery.py b/tests/unit/test_plugin_schema_discovery.py index 01b7e7b768..de0ac1d706 100644 --- a/tests/unit/test_plugin_schema_discovery.py +++ b/tests/unit/test_plugin_schema_discovery.py @@ -5,6 +5,7 @@ class attribute to register their URL scheme and config options with DVC's config validation, without requiring changes to DVC core. """ +from typing import ClassVar from unittest.mock import MagicMock, patch import pytest @@ -14,7 +15,7 @@ class FakePluginFS: """Minimal filesystem class that declares REMOTE_CONFIG.""" protocol = "myplugin" - REMOTE_CONFIG = { + REMOTE_CONFIG: ClassVar[dict] = { "token": str, "endpoint_url": str, } @@ -30,7 +31,7 @@ class FakePluginMultiProtocol: """Filesystem class with tuple protocol.""" protocol = ("myproto", "myprotos") - REMOTE_CONFIG = { + REMOTE_CONFIG: ClassVar[dict] = { "api_key": str, } @@ -48,7 +49,7 @@ class TestDiscoverPluginSchemas: def test_plugin_schema_registered(self): """A plugin with REMOTE_CONFIG gets its scheme added to REMOTE_SCHEMAS.""" - from dvc.config_schema import REMOTE_COMMON, REMOTE_SCHEMAS + from dvc.config_schema import REMOTE_SCHEMAS eps = [_make_entry_point("myplugin", FakePluginFS)] with patch("dvc.config_schema.entry_points", return_value=eps): @@ -92,7 +93,7 @@ def test_existing_scheme_not_overwritten(self): class FakeS3: protocol = "s3" - REMOTE_CONFIG = {"fake_key": str} + REMOTE_CONFIG: ClassVar[dict] = {"fake_key": str} eps = [_make_entry_point("s3", FakeS3)] with patch("dvc.config_schema.entry_points", return_value=eps): @@ -149,7 +150,7 @@ class TestByUrlWithPlugin: def test_byurl_validates_plugin_scheme(self): """ByUrl should accept a URL with a plugin-registered scheme.""" - from dvc.config_schema import ByUrl, REMOTE_COMMON, REMOTE_SCHEMAS + from dvc.config_schema import REMOTE_COMMON, REMOTE_SCHEMAS, ByUrl # Register a fake scheme REMOTE_SCHEMAS["testplugin"] = {"token": str, **REMOTE_COMMON} @@ -167,7 +168,7 @@ def test_byurl_rejects_unknown_scheme(self): """ByUrl should reject an unregistered scheme.""" from voluptuous import Invalid as VoluptuousInvalid - from dvc.config_schema import ByUrl, REMOTE_SCHEMAS + from dvc.config_schema import REMOTE_SCHEMAS, ByUrl validator = ByUrl(REMOTE_SCHEMAS) From 3de87d171893064f2b9cd2fd786b151ce65b6b01 Mon Sep 17 00:00:00 2001 From: adamlabadorf Date: Sat, 21 Feb 2026 14:49:50 -0500 Subject: [PATCH 3/5] config: fix Python 3.9 compatibility in _discover_plugin_schemas entry_points(group=...) keyword argument was added in Python 3.10. On Python 3.9, use the dict-based entry_points().get(group, []) API instead. Fixes TypeError on Python 3.9: TypeError: entry_points() got an unexpected keyword argument 'group' --- dvc/config_schema.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dvc/config_schema.py b/dvc/config_schema.py index 9e2ee3f5ac..d2a97ce0d9 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -1,4 +1,5 @@ import os +import sys from importlib.metadata import entry_points from typing import TYPE_CHECKING from urllib.parse import urlparse @@ -294,7 +295,12 @@ def _discover_plugin_schemas(): Existing (hardcoded) schemes are never overwritten. """ - for ep in entry_points(group="dvc.fs"): + # entry_points(group=...) requires Python 3.10+; use dict API on 3.9 + if sys.version_info >= (3, 10): + eps = entry_points(group="dvc.fs") + else: + eps = entry_points().get("dvc.fs", []) # type: ignore[call-arg] + for ep in eps: try: cls = ep.load() except Exception: # noqa: BLE001,S112 From 8366ff3c14f4eca07eba1f8b8aaa852f43090b83 Mon Sep 17 00:00:00 2001 From: adamlabadorf Date: Sat, 21 Feb 2026 15:44:04 -0500 Subject: [PATCH 4/5] config: extract _get_dvc_fs_entry_points for testability and 3.9 compat Extract entry point fetching into _get_dvc_fs_entry_points() helper so that tests can mock it directly rather than mocking entry_points(). This avoids the Python 3.9 vs 3.10+ dict/list API difference leaking into test mocks. --- dvc/config_schema.py | 18 ++++++++++++------ tests/unit/test_plugin_schema_discovery.py | 10 +++++----- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/dvc/config_schema.py b/dvc/config_schema.py index d2a97ce0d9..553c6d2d11 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -284,6 +284,17 @@ def __call__(self, data): } +def _get_dvc_fs_entry_points(): + """Return installed dvc.fs entry points, compatible with Python 3.9+. + + ``entry_points(group=...)`` was added in Python 3.10; on 3.9 we use + the dict-based ``entry_points().get(group, [])`` API instead. + """ + if sys.version_info >= (3, 10): + return entry_points(group="dvc.fs") + return entry_points().get("dvc.fs", []) # type: ignore[call-arg] + + def _discover_plugin_schemas(): """Discover remote config schemas from installed DVC filesystem plugins. @@ -295,12 +306,7 @@ def _discover_plugin_schemas(): Existing (hardcoded) schemes are never overwritten. """ - # entry_points(group=...) requires Python 3.10+; use dict API on 3.9 - if sys.version_info >= (3, 10): - eps = entry_points(group="dvc.fs") - else: - eps = entry_points().get("dvc.fs", []) # type: ignore[call-arg] - for ep in eps: + for ep in _get_dvc_fs_entry_points(): try: cls = ep.load() except Exception: # noqa: BLE001,S112 diff --git a/tests/unit/test_plugin_schema_discovery.py b/tests/unit/test_plugin_schema_discovery.py index de0ac1d706..c0990cf4f1 100644 --- a/tests/unit/test_plugin_schema_discovery.py +++ b/tests/unit/test_plugin_schema_discovery.py @@ -52,7 +52,7 @@ def test_plugin_schema_registered(self): from dvc.config_schema import REMOTE_SCHEMAS eps = [_make_entry_point("myplugin", FakePluginFS)] - with patch("dvc.config_schema.entry_points", return_value=eps): + with patch("dvc.config_schema._get_dvc_fs_entry_points", return_value=eps): # Clear any prior registration from this test key REMOTE_SCHEMAS.pop("myplugin", None) @@ -76,7 +76,7 @@ def test_plugin_without_remote_config_skipped(self): from dvc.config_schema import REMOTE_SCHEMAS eps = [_make_entry_point("noplugin", FakePluginNoConfig)] - with patch("dvc.config_schema.entry_points", return_value=eps): + with patch("dvc.config_schema._get_dvc_fs_entry_points", return_value=eps): REMOTE_SCHEMAS.pop("noplugin", None) from dvc.config_schema import _discover_plugin_schemas @@ -96,7 +96,7 @@ class FakeS3: REMOTE_CONFIG: ClassVar[dict] = {"fake_key": str} eps = [_make_entry_point("s3", FakeS3)] - with patch("dvc.config_schema.entry_points", return_value=eps): + with patch("dvc.config_schema._get_dvc_fs_entry_points", return_value=eps): from dvc.config_schema import _discover_plugin_schemas _discover_plugin_schemas() @@ -113,7 +113,7 @@ def test_plugin_load_failure_skipped(self): ep.name = "broken" ep.load.side_effect = ImportError("missing dependency") - with patch("dvc.config_schema.entry_points", return_value=[ep]): + with patch("dvc.config_schema._get_dvc_fs_entry_points", return_value=[ep]): REMOTE_SCHEMAS.pop("broken", None) from dvc.config_schema import _discover_plugin_schemas @@ -127,7 +127,7 @@ def test_multi_protocol_plugin(self): from dvc.config_schema import REMOTE_SCHEMAS eps = [_make_entry_point("myproto", FakePluginMultiProtocol)] - with patch("dvc.config_schema.entry_points", return_value=eps): + with patch("dvc.config_schema._get_dvc_fs_entry_points", return_value=eps): REMOTE_SCHEMAS.pop("myproto", None) REMOTE_SCHEMAS.pop("myprotos", None) From 161efd59feb85987ae78cf4fb047b1c3b54a4ebd Mon Sep 17 00:00:00 2001 From: adamlabadorf Date: Sat, 21 Feb 2026 15:59:14 -0500 Subject: [PATCH 5/5] config: fix mypy unreachable error on Python 3.9 fallback line mypy evaluates sys.version_info >= (3, 10) as always True on 3.10+ environments, flagging the 3.9 fallback return as unreachable. Extend the type: ignore comment to cover both call-arg and unreachable. --- dvc/config_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/config_schema.py b/dvc/config_schema.py index 553c6d2d11..95cf12dce9 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -292,7 +292,7 @@ def _get_dvc_fs_entry_points(): """ if sys.version_info >= (3, 10): return entry_points(group="dvc.fs") - return entry_points().get("dvc.fs", []) # type: ignore[call-arg] + return entry_points().get("dvc.fs", []) # type: ignore[call-arg,unreachable] def _discover_plugin_schemas():