From 9610bf0b9f91e295e829fdb062acd4a182b7ac00 Mon Sep 17 00:00:00 2001 From: Richard Xia Date: Fri, 21 Feb 2025 00:24:31 -0800 Subject: [PATCH 1/3] Python 3.13 compatibility. (#473) Between Python 3.12 and Python 3.13, the internal structure of pathlib changed, causing the Artifactory URL parsing to fail. `pathlib.PurePath` previously had a private class attribute, `_flavour`, which has now been renamed to `parser` and made into a public and documented API. This renames `_flavour` to `parser` in the ArtifactoryPath subclasses, but it leaves around a `_flavour` class attribute that is aliased to `parser` as a compatibility shim for older versions of Python. One other breakage between Python 3.12 and 3.13 is that the artifactory package attempted to import `posixpath` via the `pathlib` package. `posixpath` was never meant to be a publicly accessible attribute of `pathlib`, as `posixpath` is its own top-level package in the standard library. The `pathlib` code was significantly restructured, causing the `posixpath` module to no longer be accessible under `pathlib`. We fix this in artifactory by directly importing the top-level `posixpath` package. Finally, this adds Python 3.13 to the package metadata in setup.py and the tox and GitHub Actions configuration files so that it is officially declared as a supported Python version and tested in CI. --- .github/workflows/ci.yml | 2 +- artifactory.py | 11 +++++++---- setup.py | 1 + tox.ini | 1 + 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aae80a9..607c4e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 diff --git a/artifactory.py b/artifactory.py index 0c4b63c..b27180f 100755 --- a/artifactory.py +++ b/artifactory.py @@ -33,6 +33,7 @@ import os import pathlib import platform +import posixpath import re import urllib.parse from itertools import chain @@ -444,7 +445,7 @@ class _ArtifactoryFlavour(object if IS_PYTHON_3_12_OR_NEWER else pathlib._Flavou sep = "/" altsep = "/" has_drv = True - pathmod = pathlib.posixpath + pathmod = posixpath is_supported = True def _get_base_url(self, url): @@ -1499,7 +1500,8 @@ class PureArtifactoryPath(pathlib.PurePath): operations. """ - _flavour = _artifactory_flavour + parser = _artifactory_flavour + _flavour = parser # Compatibility shim for Python < 3.13 __slots__ = () def _init(self, *args): @@ -1507,7 +1509,7 @@ def _init(self, *args): @classmethod def _split_root(cls, part): - cls._flavour.splitroot(part) + cls.parser.splitroot(part) @classmethod def _parse_parts(cls, parts): @@ -2661,7 +2663,8 @@ def get_projects(self, lazy=False): class ArtifactorySaaSPath(ArtifactoryPath): """Class for SaaS Artifactory""" - _flavour = _saas_artifactory_flavour + parser = _saas_artifactory_flavour + _flavour = parser # Compatibility shim for Python < 3.13 class ArtifactoryBuild: diff --git a/setup.py b/setup.py index ae0c5ef..25f5624 100755 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Software Development :: Libraries", "Topic :: System :: Filesystems", ], diff --git a/tox.ini b/tox.ini index b80d13d..fd620e7 100644 --- a/tox.ini +++ b/tox.ini @@ -5,6 +5,7 @@ envlist = py310 py311 py312 + py313 pre-commit [testenv] From 9a6d1bcda8a8dfc9770ea3c5492c00d3c787c9df Mon Sep 17 00:00:00 2001 From: offa Date: Fri, 21 Feb 2025 09:45:55 +0000 Subject: [PATCH 2/3] Replace _make_child_relpath() with public joinpath() (#475) --- artifactory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifactory.py b/artifactory.py index b27180f..ae75199 100755 --- a/artifactory.py +++ b/artifactory.py @@ -1923,7 +1923,7 @@ def _make_child(self, args): return obj def _make_child_relpath(self, args): - obj = super(ArtifactoryPath, self)._make_child_relpath(args) + obj = super(ArtifactoryPath, self).joinpath(args) obj.auth = self.auth obj.verify = self.verify obj.cert = self.cert From 643bc4f56e593dd527cd2cbbc30ab3ce18e8408d Mon Sep 17 00:00:00 2001 From: Richard Xia Date: Mon, 17 Mar 2025 20:42:31 -0700 Subject: [PATCH 3/3] Add unit test for `glob()` and partially fix Python 3.13 case. (#476) * Add unit test for `glob()` and partially fix Python 3.13 case. * Add custom subclass of _Globber to fix remaining issues with glob behavior. * Guard _globber override with Python version check. Co-authored-by: allburov * Rearrange `if` statement to prefer early returns. Co-authored-by: allburov * Add comment linking to original code for recursive_selector(). --------- Co-authored-by: allburov --- artifactory.py | 109 ++++++++++++++++++++++++++++ dohq_artifactory/compat.py | 4 + tests/unit/test_artifactory_path.py | 85 ++++++++++++++++++++++ 3 files changed, 198 insertions(+) diff --git a/artifactory.py b/artifactory.py index ae75199..078182e 100755 --- a/artifactory.py +++ b/artifactory.py @@ -27,6 +27,7 @@ import datetime import errno import fnmatch +import glob import hashlib import io import json @@ -55,6 +56,7 @@ from dohq_artifactory.compat import IS_PYTHON_2 from dohq_artifactory.compat import IS_PYTHON_3_10_OR_NEWER from dohq_artifactory.compat import IS_PYTHON_3_12_OR_NEWER +from dohq_artifactory.compat import IS_PYTHON_3_13_OR_NEWER from dohq_artifactory.exception import ArtifactoryException from dohq_artifactory.exception import raise_for_status from dohq_artifactory.logger import logger @@ -588,6 +590,9 @@ def make_uri(self, path): def normcase(self, path): return path + def split(self, path): + return posixpath.split(path) + def splitdrive(self, path): drv, root, part = self.splitroot(path) return (drv + root, self.sep.join(part)) @@ -1493,6 +1498,94 @@ class ArtifactoryOpensourceAccessor(_ArtifactoryAccessor): """ +# In Python 3.13, pathlib now reuses code from the glob package in order to implement +# the Path.glob() method. There are two related classes in the glob package, _Globber +# and _StringGlobber, where the former will delegate operations to the Path object while +# the latter directly calls os.path functions, performing actual file system calls. The +# private abstract base class of PurePath, PurePathBase, sets the _globber class +# attribute to _Globber, while PurePath overrides it to be _StringGlobber. +# +# We create a custom subclass that explicitly subclasses _Globber and not +# _StringGlobber, since we want the version that delegates file system operations to the +# Path objects. +# +# In addition, we override _Globber.recursive_selector() with a copy of the original +# code but with one modification. Inside the definition of the nested select_recursive() +# function, we # add 1 to the original value of match_pos. The reason for this is that +# the add_slash() method will not actually add a slash when the path object is an +# instance of a Path subclass, since it will normally get normalized away. The match +# position therefore needs to be incremented by 1 in order to account for the actual +# slash character that appears when inspecting children of the current directory. This +# isn't an issue in the actual use of _Globber in Python, since it converts all paths to +# strings, and the add_slash() will literally append a slash character to the string +# path. See the original code in +# https://github.com/python/cpython/blob/v3.13.2/Lib/glob.py#L448-L510 +class _ArtifactoryGlobber(glob._Globber if IS_PYTHON_3_13_OR_NEWER else object): + def recursive_selector(self, part, parts): + """Returns a function that selects a given path and all its children, + recursively, filtering by pattern. + """ + # Optimization: consume following '**' parts, which have no effect. + while parts and parts[-1] == "**": + parts.pop() + + # Optimization: consume and join any following non-special parts here, + # rather than leaving them for the next selector. They're used to + # build a regular expression, which we use to filter the results of + # the recursive walk. As a result, non-special pattern segments + # following a '**' wildcard don't require additional filesystem access + # to expand. + follow_symlinks = self.recursive is not glob._no_recurse_symlinks + if follow_symlinks: + while parts and parts[-1] not in glob._special_parts: + part += self.sep + parts.pop() + + match = None if part == "**" else self.compile(part) + dir_only = bool(parts) + select_next = self.selector(parts) + + def select_recursive(path, exists=False): + path = self.add_slash(path) + match_pos = len(str(path)) + 1 + if match is None or match(str(path), match_pos): + yield from select_next(path, exists) + stack = [path] + while stack: + yield from select_recursive_step(stack, match_pos) + + def select_recursive_step(stack, match_pos): + path = stack.pop() + try: + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with self.scandir(path) as scandir_it: + entries = list(scandir_it) + except OSError: + pass + else: + for entry in entries: + is_dir = False + try: + if entry.is_dir(follow_symlinks=follow_symlinks): + is_dir = True + except OSError: + pass + + if is_dir or not dir_only: + entry_path = self.parse_entry(entry) + if match is None or match(str(entry_path), match_pos): + if dir_only: + yield from select_next(entry_path, exists=True) + else: + # Optimization: directly yield the path if this is + # last pattern part. + yield entry_path + if is_dir: + stack.append(entry_path) + + return select_recursive + + class PureArtifactoryPath(pathlib.PurePath): """ A class to work with Artifactory paths that doesn't connect @@ -1502,6 +1595,13 @@ class PureArtifactoryPath(pathlib.PurePath): parser = _artifactory_flavour _flavour = parser # Compatibility shim for Python < 3.13 + + # In Python 3.13, this attribute is accessed by PurePath.glob(), and we need to + # override it to behave properly for ArtifactoryPaths with a custom subclass of + # glob._Globber. + if IS_PYTHON_3_13_OR_NEWER: + _globber = _ArtifactoryGlobber + __slots__ = () def _init(self, *args): @@ -1795,6 +1895,15 @@ def _scandir(self): """ return self._accessor.scandir(self) + def glob(self, *args, **kwargs): + if IS_PYTHON_3_13_OR_NEWER: + # In Python 3.13, the implementation of Path.glob() changed such that it assumes that it + # works only with real filesystem paths and will try to call real filesystem operations like + # os.scandir(). In Python 3.13, we explicitly intercept this and call PathBase's glob() + # implementation, which only depends on methods defined on the Path subclass. + return pathlib._abc.PathBase.glob(self, *args, **kwargs) + return super().glob(*args, **kwargs) + def download_stats(self, pathobj=None): """ Item statistics record the number of times an item was downloaded, last download date and last downloader. diff --git a/dohq_artifactory/compat.py b/dohq_artifactory/compat.py index d6ce8c7..9707fca 100644 --- a/dohq_artifactory/compat.py +++ b/dohq_artifactory/compat.py @@ -9,3 +9,7 @@ # parts of the code once python3.11 is no longer supported. This constant helps # identifying those. IS_PYTHON_3_12_OR_NEWER = sys.version_info >= (3, 12) +# Pathlib.Path and glob changed significantly in 3.13, so we will not need several +# parts of the code once python3.12 is no longer supported. This constant helps +# identifying those. +IS_PYTHON_3_13_OR_NEWER = sys.version_info >= (3, 13) diff --git a/tests/unit/test_artifactory_path.py b/tests/unit/test_artifactory_path.py index 351e294..ed37d53 100644 --- a/tests/unit/test_artifactory_path.py +++ b/tests/unit/test_artifactory_path.py @@ -1349,6 +1349,91 @@ def _create_archive_obj(self): archive_obj = folder.archive(check_sum=True) return archive_obj + @responses.activate + def test_glob(self): + """ + Test that glob works + :return: + """ + + # Build up a fake directory tree that looks like the following: + # + # .index/ + # com/ + # foo + # bar + + com_dir_stat = { + "repo": "libs-release-local", + "path": "/com", + "created": "2014-02-18T15:35:29.361+04:00", + "lastModified": "2014-02-18T15:35:29.361+04:00", + "lastUpdated": "2014-02-18T15:35:29.361+04:00", + "children": [ + {"uri": "/foo"}, + {"uri": "/bar"}, + ], + "uri": "http://artifactory.local/artifactory/api/storage/libs-release-local/com", + } + index_dir_stat = { + "repo": "libs-release-local", + "path": "/.index", + "created": "2014-02-18T15:35:29.361+04:00", + "lastModified": "2014-02-18T15:35:29.361+04:00", + "lastUpdated": "2014-02-18T15:35:29.361+04:00", + "children": [], + "uri": "http://artifactory.local/artifactory/api/storage/libs-release-local/.index", + } + ArtifactoryPath = self.cls + root_path = ArtifactoryPath( + "http://artifactory.local/artifactory/libs-release-local" + ) + constructed_url = ( + "http://artifactory.local/artifactory/api/storage/libs-release-local" + ) + responses.add( + responses.GET, + constructed_url, + status=200, + json=self.dir_stat, + ) + responses.add( + responses.GET, + f"{constructed_url}/com", + status=200, + json=com_dir_stat, + ) + responses.add( + responses.GET, + f"{constructed_url}/.index", + status=200, + json=index_dir_stat, + ) + responses.add( + responses.GET, + f"{constructed_url}/com/foo", + status=200, + json=self.file_stat, + ) + responses.add( + responses.GET, + f"{constructed_url}/com/bar", + status=200, + json=self.file_stat, + ) + + results = list(root_path.glob("**/*")) + + self.assertEqual( + [str(r) for r in results], + [ + "http://artifactory.local/artifactory/libs-release-local/.index", + "http://artifactory.local/artifactory/libs-release-local/com", + "http://artifactory.local/artifactory/libs-release-local/com/foo", + "http://artifactory.local/artifactory/libs-release-local/com/bar", + ], + ) + class ArtifactorySaaSPathTest(unittest.TestCase): cls = artifactory.ArtifactorySaaSPath