Skip to content

Commit 33b60c5

Browse files
committed
Add migration for missing metadata artifacts
1 parent e134c20 commit 33b60c5

File tree

1 file changed

+188
-0
lines changed

1 file changed

+188
-0
lines changed
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
# Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts
2+
3+
from django.db import migrations
4+
5+
BATCH_SIZE = 1000
6+
7+
8+
def pulp_hashlib_new(name, *args, **kwargs):
9+
"""
10+
Copied and updated (to comply with migrations) from pulpcore.
11+
"""
12+
import hashlib as the_real_hashlib
13+
from django.conf import settings
14+
15+
if name not in settings.ALLOWED_CONTENT_CHECKSUMS:
16+
return None
17+
18+
return the_real_hashlib.new(name, *args, **kwargs)
19+
20+
21+
def init_and_validate(file, artifact_model, expected_digests):
22+
"""
23+
Copied and updated (to comply with migrations) from pulpcore.
24+
"""
25+
from django.conf import settings
26+
27+
digest_fields = []
28+
for alg in ("sha512", "sha384", "sha256", "sha224", "sha1", "md5"):
29+
if alg in settings.ALLOWED_CONTENT_CHECKSUMS:
30+
digest_fields.append(alg)
31+
32+
if isinstance(file, str):
33+
with open(file, "rb") as f:
34+
hashers = {
35+
n: hasher for n in digest_fields if (hasher := pulp_hashlib_new(n)) is not None
36+
}
37+
if not hashers:
38+
return None
39+
40+
size = 0
41+
while True:
42+
chunk = f.read(1048576) # 1 megabyte
43+
if not chunk:
44+
break
45+
for algorithm in hashers.values():
46+
algorithm.update(chunk)
47+
size = size + len(chunk)
48+
else:
49+
size = file.size
50+
hashers = file.hashers
51+
52+
for algorithm, expected_digest in expected_digests.items():
53+
if algorithm not in hashers:
54+
return None
55+
actual_digest = hashers[algorithm].hexdigest()
56+
if expected_digest != actual_digest:
57+
return None
58+
59+
attributes = {"size": size, "file": file}
60+
for algorithm in digest_fields:
61+
attributes[algorithm] = hashers[algorithm].hexdigest()
62+
63+
return artifact_model(**attributes)
64+
65+
66+
def extract_wheel_metadata(filename):
67+
"""
68+
Extract the metadata file content from a wheel file.
69+
Returns the raw metadata content as bytes or None if metadata cannot be extracted.
70+
"""
71+
import zipfile
72+
73+
try:
74+
with zipfile.ZipFile(filename, "r") as f:
75+
for file_path in f.namelist():
76+
if file_path.endswith(".dist-info/METADATA"):
77+
return f.read(file_path)
78+
except (zipfile.BadZipFile, KeyError, OSError):
79+
pass
80+
return None
81+
82+
83+
def artifact_to_metadata_artifact(filename, artifact, md_digests, tmp_dir, artifact_model):
84+
"""
85+
Creates artifact for metadata from the provided wheel artifact.
86+
"""
87+
import shutil
88+
import tempfile
89+
90+
with tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False) as temp_file:
91+
temp_wheel_path = temp_file.name
92+
artifact.file.seek(0)
93+
shutil.copyfileobj(artifact.file, temp_file)
94+
temp_file.flush()
95+
96+
metadata_content = extract_wheel_metadata(temp_wheel_path)
97+
if not metadata_content:
98+
return None
99+
100+
with tempfile.NamedTemporaryFile(
101+
"wb", dir=tmp_dir, suffix=".metadata", delete=False
102+
) as temp_md:
103+
temp_metadata_path = temp_md.name
104+
temp_md.write(metadata_content)
105+
temp_md.flush()
106+
107+
metadata_artifact = init_and_validate(temp_metadata_path, artifact_model, md_digests)
108+
return metadata_artifact
109+
110+
111+
def create_missing_metadata_artifacts(apps, schema_editor):
112+
"""
113+
Create metadata artifacts for PythonPackageContent instances that have metadata_sha256
114+
but are missing the corresponding metadata artifact.
115+
"""
116+
import tempfile
117+
from django.conf import settings
118+
119+
PythonPackageContent = apps.get_model("python", "PythonPackageContent")
120+
ContentArtifact = apps.get_model("core", "ContentArtifact")
121+
Artifact = apps.get_model("core", "Artifact")
122+
123+
packages = (
124+
PythonPackageContent.objects.filter(
125+
metadata_sha256__isnull=False, filename__endswith=".whl"
126+
)
127+
.exclude(metadata_sha256="")
128+
.prefetch_related("contentartifact_set")
129+
.only("filename", "metadata_sha256")
130+
)
131+
artifact_batch = []
132+
contentartifact_batch = []
133+
134+
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
135+
for package in packages:
136+
filename = package.filename
137+
content_artifacts = list(package.contentartifact_set.all())
138+
139+
# Get the main artifact for package
140+
main_artifact = None
141+
for ca in content_artifacts:
142+
if ca.relative_path == filename and ca.artifact:
143+
main_artifact = ca.artifact
144+
break
145+
146+
if not main_artifact:
147+
# Main artifact does not exist
148+
continue
149+
150+
metadata_digests = {"sha256": package.metadata_sha256}
151+
metadata_artifact = artifact_to_metadata_artifact(
152+
filename, main_artifact, metadata_digests, temp_dir, Artifact
153+
)
154+
if not metadata_artifact:
155+
# Failed to build metadata artifact
156+
continue
157+
158+
contentartifact = ContentArtifact(
159+
artifact=metadata_artifact,
160+
content=package,
161+
relative_path=f"{filename}.metadata",
162+
)
163+
artifact_batch.append(metadata_artifact)
164+
contentartifact_batch.append(contentartifact)
165+
166+
if len(artifact_batch) == BATCH_SIZE:
167+
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
168+
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
169+
artifact_batch.clear()
170+
contentartifact_batch.clear()
171+
172+
if artifact_batch:
173+
Artifact.objects.bulk_create(artifact_batch, batch_size=BATCH_SIZE)
174+
ContentArtifact.objects.bulk_create(contentartifact_batch, batch_size=BATCH_SIZE)
175+
176+
177+
class Migration(migrations.Migration):
178+
179+
dependencies = [
180+
("python", "0018_packageprovenance"),
181+
]
182+
183+
operations = [
184+
migrations.RunPython(
185+
create_missing_metadata_artifacts,
186+
reverse_code=migrations.RunPython.noop,
187+
),
188+
]

0 commit comments

Comments
 (0)