|
| 1 | +# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts |
| 2 | + |
| 3 | +from django.db import migrations |
| 4 | + |
| 5 | +BATCH_SIZE = 1000 |
| 6 | + |
| 7 | + |
| 8 | +def pulp_hashlib_new(name, *args, **kwargs): |
| 9 | + """ |
| 10 | + Copied and updated (to comply with migrations) from pulpcore. |
| 11 | + """ |
| 12 | + import hashlib as the_real_hashlib |
| 13 | + from django.conf import settings |
| 14 | + |
| 15 | + if name not in settings.ALLOWED_CONTENT_CHECKSUMS: |
| 16 | + return None |
| 17 | + |
| 18 | + return the_real_hashlib.new(name, *args, **kwargs) |
| 19 | + |
| 20 | + |
| 21 | +def init_and_validate(file, artifact_model, expected_digests): |
| 22 | + """ |
| 23 | + Copied and updated (to comply with migrations) from pulpcore. |
| 24 | + """ |
| 25 | + from django.conf import settings |
| 26 | + |
| 27 | + digest_fields = [] |
| 28 | + for alg in ("sha512", "sha384", "sha256", "sha224", "sha1", "md5"): |
| 29 | + if alg in settings.ALLOWED_CONTENT_CHECKSUMS: |
| 30 | + digest_fields.append(alg) |
| 31 | + |
| 32 | + if isinstance(file, str): |
| 33 | + with open(file, "rb") as f: |
| 34 | + hashers = { |
| 35 | + n: hasher for n in digest_fields if (hasher := pulp_hashlib_new(n)) is not None |
| 36 | + } |
| 37 | + if not hashers: |
| 38 | + return None |
| 39 | + |
| 40 | + size = 0 |
| 41 | + while True: |
| 42 | + chunk = f.read(1048576) # 1 megabyte |
| 43 | + if not chunk: |
| 44 | + break |
| 45 | + for algorithm in hashers.values(): |
| 46 | + algorithm.update(chunk) |
| 47 | + size = size + len(chunk) |
| 48 | + else: |
| 49 | + size = file.size |
| 50 | + hashers = file.hashers |
| 51 | + |
| 52 | + for algorithm, expected_digest in expected_digests.items(): |
| 53 | + if algorithm not in hashers: |
| 54 | + return None |
| 55 | + actual_digest = hashers[algorithm].hexdigest() |
| 56 | + if expected_digest != actual_digest: |
| 57 | + return None |
| 58 | + |
| 59 | + attributes = {"size": size, "file": file} |
| 60 | + for algorithm in digest_fields: |
| 61 | + attributes[algorithm] = hashers[algorithm].hexdigest() |
| 62 | + |
| 63 | + return artifact_model(**attributes) |
| 64 | + |
| 65 | + |
| 66 | +def extract_wheel_metadata(filename): |
| 67 | + """ |
| 68 | + Extract the metadata file content from a wheel file. |
| 69 | + Returns the raw metadata content as bytes or None if metadata cannot be extracted. |
| 70 | + """ |
| 71 | + import zipfile |
| 72 | + |
| 73 | + try: |
| 74 | + with zipfile.ZipFile(filename, "r") as f: |
| 75 | + for file_path in f.namelist(): |
| 76 | + if file_path.endswith(".dist-info/METADATA"): |
| 77 | + return f.read(file_path) |
| 78 | + except (zipfile.BadZipFile, KeyError, OSError): |
| 79 | + pass |
| 80 | + return None |
| 81 | + |
| 82 | + |
| 83 | +def artifact_to_metadata_artifact(filename, artifact, md_digests, tmp_dir, artifact_model): |
| 84 | + """ |
| 85 | + Creates artifact for metadata from the provided wheel artifact. |
| 86 | + """ |
| 87 | + import shutil |
| 88 | + import tempfile |
| 89 | + |
| 90 | + with tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False) as temp_file: |
| 91 | + temp_wheel_path = temp_file.name |
| 92 | + artifact.file.seek(0) |
| 93 | + shutil.copyfileobj(artifact.file, temp_file) |
| 94 | + temp_file.flush() |
| 95 | + |
| 96 | + metadata_content = extract_wheel_metadata(temp_wheel_path) |
| 97 | + if not metadata_content: |
| 98 | + return None |
| 99 | + |
| 100 | + with tempfile.NamedTemporaryFile( |
| 101 | + "wb", dir=tmp_dir, suffix=".metadata", delete=False |
| 102 | + ) as temp_md: |
| 103 | + temp_metadata_path = temp_md.name |
| 104 | + temp_md.write(metadata_content) |
| 105 | + temp_md.flush() |
| 106 | + |
| 107 | + metadata_artifact = init_and_validate(temp_metadata_path, artifact_model, md_digests) |
| 108 | + return metadata_artifact |
| 109 | + |
| 110 | + |
| 111 | +def create_missing_metadata_artifacts(apps, schema_editor): |
| 112 | + """ |
| 113 | + Create metadata artifacts for PythonPackageContent instances that have metadata_sha256 |
| 114 | + but are missing the corresponding metadata artifact. |
| 115 | + """ |
| 116 | + import tempfile |
| 117 | + from django.conf import settings |
| 118 | + |
| 119 | + PythonPackageContent = apps.get_model("python", "PythonPackageContent") |
| 120 | + ContentArtifact = apps.get_model("core", "ContentArtifact") |
| 121 | + Artifact = apps.get_model("core", "Artifact") |
| 122 | + |
| 123 | + packages = ( |
| 124 | + PythonPackageContent.objects.filter( |
| 125 | + metadata_sha256__isnull=False, filename__endswith=".whl" |
| 126 | + ) |
| 127 | + .exclude(metadata_sha256="") |
| 128 | + .prefetch_related("contentartifact_set") |
| 129 | + .only("filename", "metadata_sha256") |
| 130 | + ) |
| 131 | + artifact_batch = [] |
| 132 | + contentartifact_batch = [] |
| 133 | + |
| 134 | + with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir: |
| 135 | + for package in packages: |
| 136 | + filename = package.filename |
| 137 | + content_artifacts = list(package.contentartifact_set.all()) |
| 138 | + |
| 139 | + # Get the main artifact for package |
| 140 | + main_artifact = None |
| 141 | + for ca in content_artifacts: |
| 142 | + if ca.relative_path == filename and ca.artifact: |
| 143 | + main_artifact = ca.artifact |
| 144 | + break |
| 145 | + |
| 146 | + if not main_artifact: |
| 147 | + # Main artifact does not exist |
| 148 | + continue |
| 149 | + |
| 150 | + metadata_digests = {"sha256": package.metadata_sha256} |
| 151 | + metadata_artifact = artifact_to_metadata_artifact( |
| 152 | + filename, main_artifact, metadata_digests, temp_dir, Artifact |
| 153 | + ) |
| 154 | + if not metadata_artifact: |
| 155 | + # Failed to build metadata artifact |
| 156 | + continue |
| 157 | + |
| 158 | + contentartifact = ContentArtifact( |
| 159 | + artifact=metadata_artifact, |
| 160 | + content=package, |
| 161 | + relative_path=f"{filename}.metadata", |
| 162 | + ) |
| 163 | + artifact_batch.append(metadata_artifact) |
| 164 | + contentartifact_batch.append(contentartifact) |
| 165 | + |
| 166 | + if len(artifact_batch) == BATCH_SIZE: |
| 167 | + Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE) |
| 168 | + ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE) |
| 169 | + artifact_batch.clear() |
| 170 | + contentartifact_batch.clear() |
| 171 | + |
| 172 | + if artifact_batch: |
| 173 | + Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE) |
| 174 | + ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE) |
| 175 | + |
| 176 | + |
| 177 | +class Migration(migrations.Migration): |
| 178 | + |
| 179 | + dependencies = [ |
| 180 | + ("python", "0018_packageprovenance"), |
| 181 | + ] |
| 182 | + |
| 183 | + operations = [ |
| 184 | + migrations.RunPython( |
| 185 | + create_missing_metadata_artifacts, |
| 186 | + reverse_code=migrations.RunPython.noop, |
| 187 | + ), |
| 188 | + ] |
0 commit comments