Skip to content

Commit 9950bbf

Browse files
committed
Add migration for missing metadata artifacts
1 parent e134c20 commit 9950bbf

File tree

1 file changed

+193
-0
lines changed

1 file changed

+193
-0
lines changed
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
2+
3+
from django.db import migrations
4+
5+
BATCH_SIZE = 1000
6+
7+
8+
def pulp_hashlib_new(name, *args, **kwargs):
9+
"""
10+
Copied and updated (to comply with migrations) from pulpcore.
11+
"""
12+
import hashlib as the_real_hashlib
13+
from django.conf import settings
14+
15+
if name not in settings.ALLOWED_CONTENT_CHECKSUMS:
16+
return None
17+
18+
return the_real_hashlib.new(name, *args, **kwargs)
19+
20+
21+
def init_and_validate(file, artifact_model, expected_digests):
22+
"""
23+
Copied and updated (to comply with migrations) from pulpcore.
24+
"""
25+
from django.conf import settings
26+
27+
digest_fields = []
28+
for alg in ("sha512", "sha384", "sha256", "sha224", "sha1", "md5"):
29+
if alg in settings.ALLOWED_CONTENT_CHECKSUMS:
30+
digest_fields.append(alg)
31+
32+
if isinstance(file, str):
33+
with open(file, "rb") as f:
34+
hashers = {
35+
n: hasher for n in digest_fields if (hasher := pulp_hashlib_new(n)) is not None
36+
}
37+
if not hashers:
38+
return None
39+
40+
size = 0
41+
while True:
42+
chunk = f.read(1048576) # 1 megabyte
43+
if not chunk:
44+
break
45+
for algorithm in hashers.values():
46+
algorithm.update(chunk)
47+
size = size + len(chunk)
48+
else:
49+
size = file.size
50+
hashers = file.hashers
51+
52+
for algorithm, expected_digest in expected_digests.items():
53+
if algorithm not in hashers:
54+
return None
55+
actual_digest = hashers[algorithm].hexdigest()
56+
if expected_digest != actual_digest:
57+
print(
58+
f"WARNING: SHA256 mismatch for file {file}. Expected: {expected_digest}, Got: {actual_digest}"
59+
)
60+
return None
61+
62+
attributes = {"size": size, "file": file}
63+
for algorithm in digest_fields:
64+
attributes[algorithm] = hashers[algorithm].hexdigest()
65+
66+
return artifact_model(**attributes)
67+
68+
69+
def extract_wheel_metadata(filename):
70+
"""
71+
Extract the metadata file content from a wheel file.
72+
Returns the raw metadata content as bytes or None if metadata cannot be extracted.
73+
"""
74+
import zipfile
75+
76+
try:
77+
with zipfile.ZipFile(filename, "r") as f:
78+
for file_path in f.namelist():
79+
if file_path.endswith(".dist-info/METADATA"):
80+
return f.read(file_path)
81+
except (zipfile.BadZipFile, KeyError, OSError):
82+
pass
83+
return None
84+
85+
86+
def artifact_to_metadata_artifact(filename, artifact, md_digests, tmp_dir, artifact_model):
87+
"""
88+
Creates artifact for metadata from the provided wheel artifact.
89+
"""
90+
import shutil
91+
import tempfile
92+
93+
with tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False) as temp_file:
94+
temp_wheel_path = temp_file.name
95+
artifact.file.seek(0)
96+
shutil.copyfileobj(artifact.file, temp_file)
97+
temp_file.flush()
98+
99+
metadata_content = extract_wheel_metadata(temp_wheel_path)
100+
if not metadata_content:
101+
return None
102+
103+
with tempfile.NamedTemporaryFile(
104+
"wb", dir=tmp_dir, suffix=".metadata", delete=False
105+
) as temp_md:
106+
temp_metadata_path = temp_md.name
107+
temp_md.write(metadata_content)
108+
temp_md.flush()
109+
110+
metadata_artifact = init_and_validate(temp_metadata_path, artifact_model, md_digests)
111+
return metadata_artifact
112+
113+
114+
def create_missing_metadata_artifacts(apps, schema_editor):
115+
"""
116+
Create metadata artifacts for PythonPackageContent instances that have metadata_sha256
117+
but are missing the corresponding metadata artifact.
118+
"""
119+
import tempfile
120+
from django.conf import settings
121+
122+
PythonPackageContent = apps.get_model("python", "PythonPackageContent")
123+
ContentArtifact = apps.get_model("core", "ContentArtifact")
124+
Artifact = apps.get_model("core", "Artifact")
125+
126+
packages = (
127+
PythonPackageContent.objects.filter(
128+
metadata_sha256__isnull=False, filename__endswith=".whl"
129+
)
130+
.exclude(metadata_sha256="")
131+
.prefetch_related("contentartifact_set")
132+
.only("filename", "metadata_sha256")
133+
)
134+
artifact_batch = []
135+
contentartifact_batch = []
136+
137+
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
138+
for package in packages:
139+
filename = package.filename
140+
content_artifacts = list(package.contentartifact_set.all())
141+
# ContentArtifact and Artifact for metadata cannot exist yet because
142+
# this migration is released together with the new functionality which creates them
143+
144+
# Get the main artifact for package
145+
main_artifact = None
146+
for ca in content_artifacts:
147+
if ca.relative_path == filename and ca.artifact:
148+
main_artifact = ca.artifact
149+
break
150+
151+
if not main_artifact:
152+
# Main artifact does not exist
153+
continue
154+
155+
metadata_digests = {"sha256": package.metadata_sha256}
156+
metadata_artifact = artifact_to_metadata_artifact(
157+
filename, main_artifact, metadata_digests, temp_dir, Artifact
158+
)
159+
if not metadata_artifact:
160+
# Failed to build metadata artifact
161+
continue
162+
163+
contentartifact = ContentArtifact(
164+
artifact=metadata_artifact,
165+
content=package,
166+
relative_path=f"{filename}.metadata",
167+
)
168+
artifact_batch.append(metadata_artifact)
169+
contentartifact_batch.append(contentartifact)
170+
171+
if len(artifact_batch) == BATCH_SIZE:
172+
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
173+
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
174+
artifact_batch.clear()
175+
contentartifact_batch.clear()
176+
177+
if artifact_batch:
178+
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
179+
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
180+
181+
182+
class Migration(migrations.Migration):
183+
184+
dependencies = [
185+
("python", "0018_packageprovenance"),
186+
]
187+
188+
operations = [
189+
migrations.RunPython(
190+
create_missing_metadata_artifacts,
191+
reverse_code=migrations.RunPython.noop,
192+
),
193+
]

0 commit comments

Comments
 (0)