diff --git a/CHANGES/1047.feature b/CHANGES/1047.feature new file mode 100644 index 00000000..a168158a --- /dev/null +++ b/CHANGES/1047.feature @@ -0,0 +1 @@ +Added exposure of metadata file to Simple API (PEP 658) diff --git a/pulp_python/app/management/commands/repair-python-metadata.py b/pulp_python/app/management/commands/repair-python-metadata.py index da69414c..31025fba 100644 --- a/pulp_python/app/management/commands/repair-python-metadata.py +++ b/pulp_python/app/management/commands/repair-python-metadata.py @@ -24,8 +24,14 @@ def repair_metadata(content): set_of_update_fields = set() total_repaired = 0 for package in immediate_content.prefetch_related("_artifacts").iterator(chunk_size=1000): + # Get the main artifact + main_artifact = ( + package.contentartifact_set.exclude(relative_path__endswith=".metadata") + .first() + .artifact + ) new_data = artifact_to_python_content_data( - package.filename, package._artifacts.get(), package.pulp_domain + package.filename, main_artifact, package.pulp_domain ) changed = False for field, value in new_data.items(): diff --git a/pulp_python/app/migrations/0019_create_missing_metadata_artifacts.py b/pulp_python/app/migrations/0019_create_missing_metadata_artifacts.py new file mode 100644 index 00000000..945bbc53 --- /dev/null +++ b/pulp_python/app/migrations/0019_create_missing_metadata_artifacts.py @@ -0,0 +1,211 @@ +# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts + +from django.db import migrations + +BATCH_SIZE = 1000 + + +def pulp_hashlib_new(name, *args, **kwargs): + """ + Copied and updated (to comply with migrations) from pulpcore. + """ + import hashlib as the_real_hashlib + from django.conf import settings + + if name not in settings.ALLOWED_CONTENT_CHECKSUMS: + return None + + return the_real_hashlib.new(name, *args, **kwargs) + + +def init_and_validate(file, artifact_model, expected_digests): + """ + Copied and updated (to comply with migrations) from pulpcore. + """ + from django.conf import settings + + digest_fields = [] + for alg in ("sha512", "sha384", "sha256", "sha224", "sha1", "md5"): + if alg in settings.ALLOWED_CONTENT_CHECKSUMS: + digest_fields.append(alg) + + if isinstance(file, str): + with open(file, "rb") as f: + hashers = { + n: hasher for n in digest_fields if (hasher := pulp_hashlib_new(n)) is not None + } + if not hashers: + return None + + size = 0 + while True: + chunk = f.read(1048576) # 1 megabyte + if not chunk: + break + for algorithm in hashers.values(): + algorithm.update(chunk) + size = size + len(chunk) + else: + size = file.size + hashers = file.hashers + + mismatched_sha256 = None + for algorithm, expected_digest in expected_digests.items(): + if algorithm not in hashers: + return None + actual_digest = hashers[algorithm].hexdigest() + if expected_digest != actual_digest: + # Store the actual value for later fixing if it differs from the package value + mismatched_sha256 = actual_digest + + attributes = {"size": size, "file": file} + for algorithm in digest_fields: + attributes[algorithm] = hashers[algorithm].hexdigest() + + return artifact_model(**attributes), mismatched_sha256 + + +def extract_wheel_metadata(filename): + """ + Extract the metadata file content from a wheel file. + Return the raw metadata content as bytes or None if metadata cannot be extracted. + """ + import zipfile + + try: + with zipfile.ZipFile(filename, "r") as f: + for file_path in f.namelist(): + if file_path.endswith(".dist-info/METADATA"): + return f.read(file_path) + except (zipfile.BadZipFile, KeyError, OSError): + pass + return None + + +def artifact_to_metadata_artifact(filename, artifact, md_digests, tmp_dir, artifact_model): + """ + Create artifact for metadata from the provided wheel artifact. + Return (artifact, mismatched_sha256) on success, "extraction_failed" when metadata extraction + fails, or None on init_and_validate failure. + """ + import shutil + import tempfile + + with tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False) as temp_file: + temp_wheel_path = temp_file.name + artifact.file.seek(0) + shutil.copyfileobj(artifact.file, temp_file) + temp_file.flush() + + metadata_content = extract_wheel_metadata(temp_wheel_path) + if not metadata_content: + return "extraction_failed" + + with tempfile.NamedTemporaryFile( + "wb", dir=tmp_dir, suffix=".metadata", delete=False + ) as temp_md: + temp_metadata_path = temp_md.name + temp_md.write(metadata_content) + temp_md.flush() + + return init_and_validate(temp_metadata_path, artifact_model, md_digests) + + +def create_missing_metadata_artifacts(apps, schema_editor): + """ + Create metadata artifacts for PythonPackageContent instances that have metadata_sha256 + but are missing the corresponding metadata artifact. + """ + import tempfile + from django.conf import settings + from django.db import models + + PythonPackageContent = apps.get_model("python", "PythonPackageContent") + ContentArtifact = apps.get_model("core", "ContentArtifact") + Artifact = apps.get_model("core", "Artifact") + + packages = ( + PythonPackageContent.objects.filter( + metadata_sha256__isnull=False, + filename__endswith=".whl", + contentartifact__artifact__isnull=False, + contentartifact__relative_path=models.F("filename"), + ) + .exclude(metadata_sha256="") + .prefetch_related("_artifacts") + .only("filename", "metadata_sha256") + ) + skipped_pkgs = 0 + artifact_batch = [] + contentartifact_batch = [] + packages_batch = [] + + with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir: + for package in packages: + # Get the main artifact for package + main_artifact = package._artifacts.get() + + filename = package.filename + metadata_digests = {"sha256": package.metadata_sha256} + result = artifact_to_metadata_artifact( + filename, main_artifact, metadata_digests, temp_dir, Artifact + ) + if result == "extraction_failed": + # Unset metadata_sha256 when metadata extraction fails + package.metadata_sha256 = None + packages_batch.append(package) + skipped_pkgs += 1 + continue + if result is None: + # Failed to build metadata artifact (init_and_validate failed) + skipped_pkgs += 1 + continue + metadata_artifact, mismatched_sha256 = result + if mismatched_sha256: + # Fix the package if its metadata_sha256 differs from the actual value + package.metadata_sha256 = mismatched_sha256 + packages_batch.append(package) + + contentartifact = ContentArtifact( + artifact=metadata_artifact, + content=package, + relative_path=f"{filename}.metadata", + ) + artifact_batch.append(metadata_artifact) + contentartifact_batch.append(contentartifact) + + if len(artifact_batch) == BATCH_SIZE: + Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE) + ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE) + artifact_batch.clear() + contentartifact_batch.clear() + if len(packages_batch) == BATCH_SIZE: + PythonPackageContent.objects.bulk_update( + packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE + ) + packages_batch.clear() + + if artifact_batch: + Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE) + ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE) + if packages_batch: + PythonPackageContent.objects.bulk_update( + packages_batch, ["metadata_sha256"], batch_size=BATCH_SIZE + ) + + if skipped_pkgs > 0: + print(f"Skipped creation of missing metadata artifacts for {skipped_pkgs} packages") + + +class Migration(migrations.Migration): + + dependencies = [ + ("python", "0018_packageprovenance"), + ] + + operations = [ + migrations.RunPython( + create_missing_metadata_artifacts, + reverse_code=migrations.RunPython.noop, + ), + ] diff --git a/pulp_python/app/pypi/views.py b/pulp_python/app/pypi/views.py index 4bbbb8a6..b7808a9e 100644 --- a/pulp_python/app/pypi/views.py +++ b/pulp_python/app/pypi/views.py @@ -352,8 +352,6 @@ def parse_package(release_package): @extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page") def retrieve(self, request, path, package): """Retrieves the simple api html/json page for a package.""" - media_type = request.accepted_renderer.media_type - repo_ver, content = self.get_rvc() # Should I redirect if the normalized name is different? normalized = canonicalize_name(package) diff --git a/pulp_python/app/serializers.py b/pulp_python/app/serializers.py index 091a27a1..47a8b41d 100644 --- a/pulp_python/app/serializers.py +++ b/pulp_python/app/serializers.py @@ -1,5 +1,6 @@ import logging import os +import tempfile from gettext import gettext as _ from django.conf import settings from django.db.utils import IntegrityError @@ -22,6 +23,7 @@ ) from pulp_python.app.utils import ( DIST_EXTENSIONS, + artifact_to_metadata_artifact, artifact_to_python_content_data, get_project_metadata_from_file, parse_project_metadata, @@ -93,11 +95,31 @@ class Meta: model = python_models.PythonDistribution +class PythonSingleContentArtifactField(core_serializers.SingleContentArtifactField): + """ + Custom field with overridden get_attribute method. Meant to be used only in + PythonPackageContentSerializer to handle possible existence of metadata artifact. + """ + + def get_attribute(self, instance): + # When content has multiple artifacts (wheel + metadata), return the main one + if instance._artifacts.count() > 1: + for ca in instance.contentartifact_set.all(): + if not ca.relative_path.endswith(".metadata"): + return ca.artifact + + return super().get_attribute(instance) + + class PythonPackageContentSerializer(core_serializers.SingleArtifactContentUploadSerializer): """ A Serializer for PythonPackageContent. """ + artifact = PythonSingleContentArtifactField( + help_text=_("Artifact file representing the physical content"), + ) + # Core metadata # Version 1.0 author = serializers.CharField( @@ -386,8 +408,21 @@ def deferred_validate(self, data): if attestations := data.pop("attestations", None): data["provenance"] = self.handle_attestations(filename, data["sha256"], attestations) + # Create metadata artifact for wheel files + if filename.endswith(".whl"): + if metadata_artifact := artifact_to_metadata_artifact(filename, artifact): + data["metadata_artifact"] = metadata_artifact + data["metadata_sha256"] = metadata_artifact.sha256 + return data + def get_artifacts(self, validated_data): + artifacts = super().get_artifacts(validated_data) + if metadata_artifact := validated_data.pop("metadata_artifact", None): + relative_path = f"{validated_data['filename']}.metadata" + artifacts[relative_path] = metadata_artifact + return artifacts + def retrieve(self, validated_data): content = python_models.PythonPackageContent.objects.filter( sha256=validated_data["sha256"], _pulp_domain=get_domain() @@ -419,6 +454,7 @@ def create(self, validated_data): class Meta: fields = core_serializers.SingleArtifactContentUploadSerializer.Meta.fields + ( + "artifact", "author", "author_email", "description", @@ -514,6 +550,15 @@ def validate(self, data): data["provenance"] = self.handle_attestations( filename, data["sha256"], attestations, offline=True ) + # Create metadata artifact for wheel files + if filename.endswith(".whl"): + with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir: + if metadata_artifact := artifact_to_metadata_artifact( + filename, artifact, tmp_dir=temp_dir + ): + data["metadata_artifact"] = metadata_artifact + data["metadata_sha256"] = metadata_artifact.sha256 + return data class Meta(PythonPackageContentSerializer.Meta): diff --git a/pulp_python/app/tasks/repair.py b/pulp_python/app/tasks/repair.py index 0f32cfa2..c2cfc0d0 100644 --- a/pulp_python/app/tasks/repair.py +++ b/pulp_python/app/tasks/repair.py @@ -95,9 +95,13 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s progress_report.save() with progress_report: for package in progress_report.iter(immediate_content.iterator(chunk_size=BULK_SIZE)): - new_data = artifact_to_python_content_data( - package.filename, package._artifacts.get(), domain + # Get the main artifact + main_artifact = ( + package.contentartifact_set.exclude(relative_path__endswith=".metadata") + .first() + .artifact ) + new_data = artifact_to_python_content_data(package.filename, main_artifact, domain) total_repaired += update_package_if_needed( package, new_data, batch, set_of_update_fields ) @@ -113,7 +117,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s grouped_by_url = defaultdict(list) for package in group_set: - for ra in package.contentartifact_set.get().remoteartifact_set.all(): + for ra in ( + package.contentartifact_set.exclude(relative_path__endswith=".metadata") + .first() + .remoteartifact_set.all() + ): grouped_by_url[ra.remote.url].append((package, ra)) # Prioritize the URL that can serve the most packages diff --git a/pulp_python/app/tasks/sync.py b/pulp_python/app/tasks/sync.py index d7058e8e..b364c3dd 100644 --- a/pulp_python/app/tasks/sync.py +++ b/pulp_python/app/tasks/sync.py @@ -229,11 +229,15 @@ async def create_content(self, pkg): create a Content Unit to put into the pipeline """ declared_contents = {} + page = await aget_remote_simple_page(pkg.name, self.remote) + upstream_pkgs = {pkg.filename: pkg for pkg in page.packages} + for version, dists in pkg.releases.items(): for package in dists: entry = parse_metadata(pkg.info, version, package) url = entry.pop("url") size = package["size"] or None + d_artifacts = [] artifact = Artifact(sha256=entry["sha256"], size=size) package = PythonPackageContent(**entry) @@ -245,11 +249,29 @@ async def create_content(self, pkg): remote=self.remote, deferred_download=self.deferred_download, ) - dc = DeclarativeContent(content=package, d_artifacts=[da]) + d_artifacts.append(da) + + if upstream_pkg := upstream_pkgs.get(entry["filename"]): + if upstream_pkg.has_metadata: + url = upstream_pkg.metadata_url + md_sha256 = upstream_pkg.metadata_digests.get("sha256") + package.metadata_sha256 = md_sha256 + artifact = Artifact(sha256=md_sha256) + + metadata_artifact = DeclarativeArtifact( + artifact=artifact, + url=url, + relative_path=f"{entry['filename']}.metadata", + remote=self.remote, + deferred_download=self.deferred_download, + ) + d_artifacts.append(metadata_artifact) + + dc = DeclarativeContent(content=package, d_artifacts=d_artifacts) declared_contents[entry["filename"]] = dc await self.python_stage.put(dc) - if pkg.releases and (page := await aget_remote_simple_page(pkg.name, self.remote)): + if pkg.releases and page: if self.remote.provenance: await self.sync_provenance(page, declared_contents) diff --git a/pulp_python/app/tasks/upload.py b/pulp_python/app/tasks/upload.py index dcd7aa72..bf98342d 100644 --- a/pulp_python/app/tasks/upload.py +++ b/pulp_python/app/tasks/upload.py @@ -15,7 +15,7 @@ Provenance, verify_provenance, ) -from pulp_python.app.utils import artifact_to_python_content_data +from pulp_python.app.utils import artifact_to_metadata_artifact, artifact_to_python_content_data def upload(artifact_sha256, filename, attestations=None, repository_pk=None): @@ -97,6 +97,11 @@ def create_content(artifact_sha256, filename, domain): def create(): content = PythonPackageContent.objects.create(**data) ContentArtifact.objects.create(artifact=artifact, content=content, relative_path=filename) + + if metadata_artifact := artifact_to_metadata_artifact(filename, artifact): + ContentArtifact.objects.create( + artifact=metadata_artifact, content=content, relative_path=f"{filename}.metadata" + ) return content new_content = create() diff --git a/pulp_python/app/utils.py b/pulp_python/app/utils.py index 4363f11e..b1beace2 100644 --- a/pulp_python/app/utils.py +++ b/pulp_python/app/utils.py @@ -1,4 +1,5 @@ import hashlib +import logging import pkginfo import re import shutil @@ -8,14 +9,19 @@ from aiohttp.client_exceptions import ClientError from collections import defaultdict from django.conf import settings +from django.db.utils import IntegrityError from django.utils import timezone from jinja2 import Template from packaging.utils import canonicalize_name from packaging.requirements import Requirement from packaging.version import parse, InvalidVersion from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage -from pulpcore.plugin.models import Remote +from pulpcore.plugin.models import Artifact, Remote from pulpcore.plugin.exceptions import TimeoutException +from pulpcore.plugin.util import get_domain + + +log = logging.getLogger(__name__) PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL" @@ -41,6 +47,7 @@ """ +# TODO in the future: data-requires-python (PEP 503) simple_detail_template = """ @@ -49,10 +56,12 @@

Links for {{ project_name }}

- {% for pkg in project_packages %} - {{ pkg.filename }}
- {% endfor %} + {%- endfor %} """ @@ -200,11 +209,11 @@ def get_project_metadata_from_file(filename): return metadata -def compute_metadata_sha256(filename: str) -> str | None: +def extract_wheel_metadata(filename: str) -> bytes | None: """ - Compute SHA256 hash of the metadata file from a Python package. + Extract the metadata file content from a wheel file. - Returns SHA256 hash or None if metadata cannot be extracted. + Returns the raw metadata content as bytes or None if metadata cannot be extracted. """ if not filename.endswith(".whl"): return None @@ -212,13 +221,22 @@ def compute_metadata_sha256(filename: str) -> str | None: with zipfile.ZipFile(filename, "r") as f: for file_path in f.namelist(): if file_path.endswith(".dist-info/METADATA"): - metadata_content = f.read(file_path) - return hashlib.sha256(metadata_content).hexdigest() - except (zipfile.BadZipFile, KeyError, OSError): - pass + return f.read(file_path) + except (zipfile.BadZipFile, KeyError, OSError) as e: + log.warning(f"Failed to extract metadata file from {filename}: {e}") return None +def compute_metadata_sha256(filename: str) -> str | None: + """ + Compute SHA256 hash of the metadata file from a Python package. + + Returns SHA256 hash or None if metadata cannot be extracted. + """ + metadata_content = extract_wheel_metadata(filename) + return hashlib.sha256(metadata_content).hexdigest() if metadata_content else None + + def artifact_to_python_content_data(filename, artifact, domain=None): """ Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent. @@ -227,6 +245,7 @@ def artifact_to_python_content_data(filename, artifact, domain=None): # because pkginfo validates that the filename has a valid extension before # reading it with tempfile.NamedTemporaryFile("wb", dir=".", suffix=filename) as temp_file: + artifact.file.seek(0) shutil.copyfileobj(artifact.file, temp_file) temp_file.flush() metadata = get_project_metadata_from_file(temp_file.name) @@ -239,6 +258,42 @@ def artifact_to_python_content_data(filename, artifact, domain=None): return data +def artifact_to_metadata_artifact( + filename: str, artifact: Artifact, tmp_dir: str = "." +) -> Artifact | None: + """ + Creates artifact for metadata from the provided wheel artifact. + """ + if not filename.endswith(".whl"): + return None + + with tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False) as temp_file: + temp_wheel_path = temp_file.name + artifact.file.seek(0) + shutil.copyfileobj(artifact.file, temp_file) + temp_file.flush() + + metadata_content = extract_wheel_metadata(temp_wheel_path) + if not metadata_content: + return None + + with tempfile.NamedTemporaryFile( + "wb", dir=tmp_dir, suffix=".metadata", delete=False + ) as temp_md: + temp_metadata_path = temp_md.name + temp_md.write(metadata_content) + temp_md.flush() + + metadata_artifact = Artifact.init_and_validate(temp_metadata_path) + try: + metadata_artifact.save() + except IntegrityError: + metadata_artifact = Artifact.objects.get( + sha256=metadata_artifact.sha256, pulp_domain=get_domain() + ) + return metadata_artifact + + def fetch_json_release_metadata(name: str, version: str, remotes: set[Remote]) -> dict: """ Fetches metadata for a specific release from PyPI's JSON API. A release can contain @@ -402,7 +457,9 @@ def find_artifact(): _art = models.RemoteArtifact.objects.filter(content_artifact=content_artifact).first() return _art - content_artifact = content.contentartifact_set.first() + content_artifact = content.contentartifact_set.exclude( + relative_path__endswith=".metadata" + ).first() artifact = find_artifact() origin = settings.CONTENT_ORIGIN or settings.PYPI_API_HOSTNAME or "" origin = origin.strip("/") diff --git a/pulp_python/tests/functional/api/test_crud_content_unit.py b/pulp_python/tests/functional/api/test_crud_content_unit.py index 9689735c..2aac0a98 100644 --- a/pulp_python/tests/functional/api/test_crud_content_unit.py +++ b/pulp_python/tests/functional/api/test_crud_content_unit.py @@ -10,7 +10,10 @@ PYTHON_EGG_FILENAME, PYTHON_EGG_URL, PYTHON_SM_FIXTURE_CHECKSUMS, + PYTHON_WHEEL_FILENAME, + PYTHON_WHEEL_URL, ) +from pulp_python.tests.functional.utils import ensure_metadata def test_content_crud( @@ -179,3 +182,22 @@ def test_upload_metadata_24_spec(python_content_factory): assert content.license_expression == "MIT" assert content.license_file == '["LICENSE"]' break + + +@pytest.mark.parallel +def test_package_creation_with_metadata( + pulp_content_url, + python_content_factory, + python_distribution_factory, + python_repo, +): + """ + Test that the creation of a Python wheel package creates a metadata artifact. + """ + python_content_factory( + repository=python_repo, relative_path=PYTHON_WHEEL_FILENAME, url=PYTHON_WHEEL_URL + ) + distro = python_distribution_factory(repository=python_repo) + + # Test that metadata is accessible + ensure_metadata(pulp_content_url, distro.base_path, PYTHON_WHEEL_FILENAME) diff --git a/pulp_python/tests/functional/api/test_pypi_apis.py b/pulp_python/tests/functional/api/test_pypi_apis.py index 6ca8eaa4..ce1eced7 100644 --- a/pulp_python/tests/functional/api/test_pypi_apis.py +++ b/pulp_python/tests/functional/api/test_pypi_apis.py @@ -5,22 +5,19 @@ from urllib.parse import urljoin from pulp_python.tests.functional.constants import ( - PYTHON_SM_PROJECT_SPECIFIER, - PYTHON_SM_FIXTURE_RELEASES, - PYTHON_SM_FIXTURE_CHECKSUMS, + PYPI_SERIAL_CONSTANT, PYTHON_MD_PROJECT_SPECIFIER, PYTHON_MD_PYPI_SUMMARY, PYTHON_EGG_FILENAME, PYTHON_EGG_SHA256, + PYTHON_WHEEL_FILENAME, PYTHON_WHEEL_SHA256, SHELF_PYTHON_JSON, ) - -from pulp_python.tests.functional.utils import ensure_simple +from pulp_python.tests.functional.utils import ensure_metadata PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL" -PYPI_SERIAL_CONSTANT = 1000000000 @pytest.mark.parallel @@ -142,6 +139,35 @@ def test_package_upload_simple( assert summary.added["python.python"]["count"] == 1 +@pytest.mark.parallel +def test_package_upload_with_metadata( + monitor_task, + pulp_content_url, + python_content_summary, + python_empty_repo_distro, + python_package_dist_directory, +): + """ + Test that the upload of a Python wheel package creates a metadata artifact. + """ + repo, distro = python_empty_repo_distro() + url = urljoin(distro.base_url, "simple/") + dist_dir, egg_file, wheel_file = python_package_dist_directory + response = requests.post( + url, + data={"sha256_digest": PYTHON_WHEEL_SHA256}, + files={"content": open(wheel_file, "rb")}, + auth=("admin", "password"), + ) + assert response.status_code == 202 + monitor_task(response.json()["task"]) + summary = python_content_summary(repository=repo) + assert summary.added["python.python"]["count"] == 1 + + # Test that metadata is accessible + ensure_metadata(pulp_content_url, distro.base_path, PYTHON_WHEEL_FILENAME) + + @pytest.mark.parallel def test_twine_upload( pulpcore_bindings, @@ -213,22 +239,6 @@ def test_simple_redirect_with_publications( assert response.url == str(urljoin(pulp_content_url, f"{distro.base_path}/simple/")) -@pytest.mark.parallel -def test_simple_correctness_live( - python_remote_factory, python_repo_with_sync, python_distribution_factory -): - """Checks that the simple api on live distributions are correct.""" - remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER) - repo = python_repo_with_sync(remote) - distro = python_distribution_factory(repository=repo) - proper, msgs = ensure_simple( - urljoin(distro.base_url, "simple/"), - PYTHON_SM_FIXTURE_RELEASES, - sha_digests=PYTHON_SM_FIXTURE_CHECKSUMS, - ) - assert proper is True, msgs - - @pytest.mark.parallel def test_pypi_json(python_remote_factory, python_repo_with_sync, python_distribution_factory): """Checks the data of `pypi/{package_name}/json` endpoint.""" diff --git a/pulp_python/tests/functional/api/test_pypi_simple_json_api.py b/pulp_python/tests/functional/api/test_pypi_simple_api.py similarity index 60% rename from pulp_python/tests/functional/api/test_pypi_simple_json_api.py rename to pulp_python/tests/functional/api/test_pypi_simple_api.py index e2c70896..5043377a 100644 --- a/pulp_python/tests/functional/api/test_pypi_simple_json_api.py +++ b/pulp_python/tests/functional/api/test_pypi_simple_api.py @@ -4,21 +4,85 @@ import requests from pulp_python.tests.functional.constants import ( + PYPI_SERIAL_CONSTANT, PYTHON_EGG_FILENAME, + PYTHON_EGG_SHA256, PYTHON_EGG_URL, + PYTHON_SM_FIXTURE_CHECKSUMS, + PYTHON_SM_FIXTURE_RELEASES, PYTHON_SM_PROJECT_SPECIFIER, PYTHON_WHEEL_FILENAME, + PYTHON_WHEEL_METADATA_SHA256, + PYTHON_WHEEL_SHA256, PYTHON_WHEEL_URL, + PYTHON_XS_FIXTURE_CHECKSUMS, ) +from pulp_python.tests.functional.utils import ensure_simple API_VERSION = "1.1" -PYPI_SERIAL_CONSTANT = 1000000000 PYPI_TEXT_HTML = "text/html" PYPI_SIMPLE_V1_HTML = "application/vnd.pypi.simple.v1+html" PYPI_SIMPLE_V1_JSON = "application/vnd.pypi.simple.v1+json" +@pytest.mark.parallel +def test_simple_html_index_api( + python_remote_factory, python_repo_with_sync, python_distribution_factory +): + remote = python_remote_factory(includes=PYTHON_SM_PROJECT_SPECIFIER) + repo = python_repo_with_sync(remote) + distro = python_distribution_factory(repository=repo) + + url = urljoin(distro.base_url, "simple/") + headers = {"Accept": PYPI_SIMPLE_V1_HTML} + + response = requests.get(url, headers=headers) + assert response.headers["Content-Type"] == PYPI_SIMPLE_V1_HTML + assert response.headers["X-PyPI-Last-Serial"] == str(PYPI_SERIAL_CONSTANT) + + proper, msgs = ensure_simple( + url, PYTHON_SM_FIXTURE_RELEASES, sha_digests=PYTHON_SM_FIXTURE_CHECKSUMS + ) + assert proper, f"Simple API validation failed: {msgs}" + + +def test_simple_html_detail_api( + delete_orphans_pre, + monitor_task, + python_bindings, + python_content_factory, + python_distribution_factory, + python_repo_factory, +): + content_1 = python_content_factory(PYTHON_WHEEL_FILENAME, url=PYTHON_WHEEL_URL) + content_2 = python_content_factory(PYTHON_EGG_FILENAME, url=PYTHON_EGG_URL) + body = {"add_content_units": [content_1.pulp_href, content_2.pulp_href]} + + repo = python_repo_factory() + monitor_task(python_bindings.RepositoriesPythonApi.modify(repo.pulp_href, body).task) + distro = python_distribution_factory(repository=repo) + + url = f'{urljoin(distro.base_url, "simple/")}shelf-reader' + headers = {"Accept": PYPI_SIMPLE_V1_HTML} + + response = requests.get(url, headers=headers) + assert response.headers["Content-Type"] == PYPI_SIMPLE_V1_HTML + assert response.headers["X-PyPI-Last-Serial"] == str(PYPI_SERIAL_CONSTANT) + + metadata_sha_digests = { + PYTHON_WHEEL_FILENAME: PYTHON_WHEEL_METADATA_SHA256, + PYTHON_EGG_FILENAME: None, # egg files should not have metadata + } + proper, msgs = ensure_simple( + urljoin(distro.base_url, "simple/"), + {"shelf-reader": [PYTHON_WHEEL_FILENAME, PYTHON_EGG_FILENAME]}, + sha_digests=PYTHON_XS_FIXTURE_CHECKSUMS, + metadata_sha_digests=metadata_sha_digests, + ) + assert proper, f"Simple API validation failed: {msgs}" + + @pytest.mark.parallel def test_simple_json_index_api( python_remote_factory, python_repo_with_sync, python_distribution_factory @@ -72,27 +136,19 @@ def test_simple_json_detail_api( assert data["versions"] == ["0.1"] # Check data of a wheel - file_whl = next( - (i for i in data["files"] if i["filename"] == "shelf_reader-0.1-py2-none-any.whl"), None - ) + file_whl = next((i for i in data["files"] if i["filename"] == PYTHON_WHEEL_FILENAME), None) assert file_whl is not None, "wheel file not found" assert file_whl["url"] - assert file_whl["hashes"] == { - "sha256": "2eceb1643c10c5e4a65970baf63bde43b79cbdac7de81dae853ce47ab05197e9" - } + assert file_whl["hashes"] == {"sha256": PYTHON_WHEEL_SHA256} assert file_whl["requires-python"] is None - assert file_whl["data-dist-info-metadata"] == { - "sha256": "ed333f0db05d77e933a157b7225b403ada9a2f93318d77b41b662eba78bac350" - } + assert file_whl["data-dist-info-metadata"] == {"sha256": PYTHON_WHEEL_METADATA_SHA256} assert file_whl["size"] == 22455 assert file_whl["upload-time"] is not None # Check data of a tarball - file_tar = next((i for i in data["files"] if i["filename"] == "shelf-reader-0.1.tar.gz"), None) + file_tar = next((i for i in data["files"] if i["filename"] == PYTHON_EGG_FILENAME), None) assert file_tar is not None, "tar file not found" assert file_tar["url"] - assert file_tar["hashes"] == { - "sha256": "04cfd8bb4f843e35d51bfdef2035109bdea831b55a57c3e6a154d14be116398c" - } + assert file_tar["hashes"] == {"sha256": PYTHON_EGG_SHA256} assert file_tar["requires-python"] is None assert file_tar["data-dist-info-metadata"] is False assert file_tar["size"] == 19097 diff --git a/pulp_python/tests/functional/api/test_sync.py b/pulp_python/tests/functional/api/test_sync.py index c8030b8a..5b19ae6b 100644 --- a/pulp_python/tests/functional/api/test_sync.py +++ b/pulp_python/tests/functional/api/test_sync.py @@ -18,6 +18,7 @@ DJANGO_LATEST_3, SCIPY_COUNTS, ) +from pulp_python.tests.functional.utils import ensure_metadata @pytest.mark.parallel @@ -336,3 +337,21 @@ def test_sync_provenance(python_repo_with_sync, python_remote_factory, python_co summary = python_content_summary(repository_version=repo.latest_version_href) assert summary.present["python.python"]["count"] == 2 assert summary.present["python.provenance"]["count"] == 2 + + +@pytest.mark.parallel +def test_package_sync_with_metadata( + pulp_content_url, + python_distribution_factory, + python_remote_factory, + python_repo_with_sync, +): + """ + Test that the sync of a Python wheel package creates a metadata artifact. + """ + remote = python_remote_factory(includes=["pytz"]) + repo = python_repo_with_sync(remote) + distro = python_distribution_factory(repository=repo) + + # Test that metadata is accessible + ensure_metadata(pulp_content_url, distro.base_path, "pytz-2023.2-py2.py3-none-any.whl") diff --git a/pulp_python/tests/functional/api/test_upload.py b/pulp_python/tests/functional/api/test_upload.py index 33f67517..5d2e207e 100644 --- a/pulp_python/tests/functional/api/test_upload.py +++ b/pulp_python/tests/functional/api/test_upload.py @@ -8,6 +8,7 @@ PYTHON_EGG_SHA256, PYTHON_WHEEL_SHA256, ) +from pulp_python.tests.functional.utils import ensure_metadata from urllib.parse import urljoin @@ -48,6 +49,30 @@ def test_synchronous_package_upload( assert ctx.value.status == 403 +@pytest.mark.parallel +def test_synchronous_package_upload_with_metadata( + download_python_file, + monitor_task, + pulp_content_url, + python_bindings, + python_distribution_factory, + python_repo, +): + """ + Test that the synchronous upload of a Python wheel package creates a metadata artifact. + """ + python_file = download_python_file(PYTHON_WHEEL_FILENAME, PYTHON_WHEEL_URL) + content_body = {"file": python_file} + content = python_bindings.ContentPackagesApi.upload(**content_body) + + body = {"add_content_units": [content.pulp_href]} + monitor_task(python_bindings.RepositoriesPythonApi.modify(python_repo.pulp_href, body).task) + distro = python_distribution_factory(repository=python_repo) + + # Test that metadata is accessible + ensure_metadata(pulp_content_url, distro.base_path, PYTHON_WHEEL_FILENAME) + + @pytest.mark.parallel def test_legacy_upload_invalid_protocol_version( python_empty_repo_distro, python_package_dist_directory diff --git a/pulp_python/tests/functional/constants.py b/pulp_python/tests/functional/constants.py index 2855b8a9..4150720f 100644 --- a/pulp_python/tests/functional/constants.py +++ b/pulp_python/tests/functional/constants.py @@ -150,6 +150,8 @@ PYTHON_WHEEL_URL = urljoin(urljoin(PYTHON_FIXTURES_URL, "packages/"), PYTHON_WHEEL_FILENAME) PYTHON_WHEEL_SHA256 = "2eceb1643c10c5e4a65970baf63bde43b79cbdac7de81dae853ce47ab05197e9" +PYTHON_WHEEL_METADATA_SHA256 = "ed333f0db05d77e933a157b7225b403ada9a2f93318d77b41b662eba78bac350" + PYTHON_XS_FIXTURE_CHECKSUMS = { PYTHON_EGG_FILENAME: PYTHON_EGG_SHA256, PYTHON_WHEEL_FILENAME: PYTHON_WHEEL_SHA256, @@ -353,3 +355,5 @@ VULNERABILITY_REPORT_TEST_PACKAGES = [ "django==5.2.1", ] + +PYPI_SERIAL_CONSTANT = 1000000000 diff --git a/pulp_python/tests/functional/utils.py b/pulp_python/tests/functional/utils.py index a3a2ede3..cd8354fd 100644 --- a/pulp_python/tests/functional/utils.py +++ b/pulp_python/tests/functional/utils.py @@ -4,7 +4,29 @@ from lxml import html -def ensure_simple(simple_url, packages, sha_digests=None): +def _validate_metadata_sha_digest(link, filename, metadata_sha_digests): + """ + Validate data-dist-info-metadata attribute for a release link. + """ + data_dist_info_metadata = link.get("data-dist-info-metadata") + + if expected_metadata_sha := metadata_sha_digests.get(filename): + expected_attr = f"sha256={expected_metadata_sha}" + if data_dist_info_metadata != expected_attr: + return ( + f"\nFile {filename} has incorrect data-dist-info-metadata: " + f"expected '{expected_attr}', got '{data_dist_info_metadata}'" + ) + else: + if data_dist_info_metadata: + return ( + f"\nFile {filename} should not have data-dist-info-metadata " + f"but has '{data_dist_info_metadata}'" + ) + return "" + + +def ensure_simple(simple_url, packages, sha_digests=None, metadata_sha_digests=None): """ Tests that the simple api at `url` matches the packages supplied. `packages`: dictionary of form {package_name: [release_filenames]} @@ -28,6 +50,9 @@ def explore_links(page_url, page_name, links_found, msgs): links_found[link.text] = True if link.get("href"): legit_found_links.append(link.get("href")) + # Check metadata SHA digest if provided + if metadata_sha_digests and page_name == "release": + msgs += _validate_metadata_sha_digest(link, link.text, metadata_sha_digests) else: msgs += f"\nFound {page_name} link without href {link.text}" else: @@ -62,3 +87,15 @@ def explore_links(page_url, page_name, links_found, msgs): ) ) return len(msgs) == 0, msgs + + +def ensure_metadata(pulp_content_url, distro_base_path, filename): + """ + Tests that metadata is accessible for a given wheel package filename. + """ + relative_path = f"{distro_base_path}/{filename}.metadata" + metadata_url = urljoin(pulp_content_url, relative_path) + metadata_response = requests.get(metadata_url) + assert metadata_response.status_code == 200 + assert len(metadata_response.content) > 0 + assert "Name: " in metadata_response.text