Skip to content

Commit

Permalink
Merge pull request #1153 from ATIX-AG/prefetch_artifacts_for_publish
Browse files Browse the repository at this point in the history
Prefetch artifacts for publish
  • Loading branch information
hstct authored Sep 18, 2024
2 parents 581f9f3 + 733cd0f commit 6c7230a
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 19 deletions.
1 change: 1 addition & 0 deletions CHANGES/1148.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved performance by prefetching relevant Artifacts and RemoteArtifacts during publishing, reducing the number of database calls.
14 changes: 7 additions & 7 deletions pulp_deb/app/serializers/content_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
ValidationError,
Serializer,
)
from pulpcore.plugin.models import Artifact, Content, CreatedResource, RemoteArtifact
from pulpcore.plugin.models import Artifact, Content, CreatedResource
from pulpcore.plugin.serializers import (
ContentChecksumSerializer,
MultipleArtifactContentSerializer,
Expand Down Expand Up @@ -466,7 +466,7 @@ def from822(cls, data, **kwargs):
package_fields["custom_fields"] = custom_fields
return cls(data=package_fields, **kwargs)

def to822(self, component=""):
def to822(self, component="", artifact_dict=None, remote_artifact_dict=None):
"""Create deb822.Package object from model."""
ret = deb822.Packages()

Expand All @@ -479,11 +479,11 @@ def to822(self, component=""):
if custom_fields:
ret.update(custom_fields)

try:
artifact = self.instance._artifacts.get()
artifact.touch() # Orphan cleanup protection until we are done!
except Artifact.DoesNotExist:
artifact = RemoteArtifact.objects.filter(sha256=self.instance.sha256).first()
artifact = None
if artifact_dict and self.instance.sha256 in artifact_dict:
artifact = artifact_dict[self.instance.sha256]
elif remote_artifact_dict and self.instance.sha256 in remote_artifact_dict:
artifact = remote_artifact_dict[self.instance.sha256]

if artifact:
ret.update({"MD5sum": artifact.md5} if artifact.md5 else {})
Expand Down
47 changes: 37 additions & 10 deletions pulp_deb/app/tasks/publishing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
from django.forms.models import model_to_dict

from pulpcore.plugin.models import (
Artifact,
PublishedArtifact,
PublishedMetadata,
RemoteArtifact,
RepositoryVersion,
)

Expand Down Expand Up @@ -147,8 +149,11 @@ def publish(

packages = Package.objects.filter(
pk__in=repo_version.content.order_by("-pulp_created")
).prefetch_related("contentartifact_set", "_artifacts")
artifact_dict, remote_artifact_dict = _batch_fetch_artifacts(packages)
release_helper.components[component].add_packages(
packages, artifact_dict, remote_artifact_dict
)
release_helper.components[component].add_packages(packages)

source_packages = SourcePackage.objects.filter(
pk__in=repo_version.content.order_by("-pulp_created"),
Expand Down Expand Up @@ -250,12 +255,19 @@ def publish(
)

for component in components:
packages = [
prc.package
for prc in package_release_components
if prc.release_component.component == component
]
release_helper.components[component].add_packages(packages)
packages = Package.objects.filter(
pk__in=[
prc.package.pk
for prc in package_release_components
if prc.release_component.component == component
]
).prefetch_related("contentartifact_set", "_artifacts")
artifact_dict, remote_artifact_dict = _batch_fetch_artifacts(packages)
release_helper.components[component].add_packages(
packages,
artifact_dict,
remote_artifact_dict,
)

source_packages = [
drc.source_package
Expand Down Expand Up @@ -313,13 +325,17 @@ def __init__(self, parent, component):
source_index_path,
)

def add_packages(self, packages):
def add_packages(self, packages, artifact_dict, remote_artifact_dict):
published_artifacts = []
package_data = []

content_artifacts = {
package.pk: list(package.contentartifact_set.all()) for package in packages
}

for package in packages:
with suppress(IntegrityError):
content_artifact = package.contentartifact_set.get()
content_artifact = content_artifacts.get(package.pk, [None])[0]
relative_path = package.filename(self.component)

published_artifact = PublishedArtifact(
Expand All @@ -337,7 +353,7 @@ def add_packages(self, packages):
for package, architecture in package_data:
package_serializer = Package822Serializer(package, context={"request": None})
try:
package_serializer.to822(self.component).dump(
package_serializer.to822(self.component, artifact_dict, remote_artifact_dict).dump(
self.package_index_files[architecture][0]
)
except KeyError:
Expand Down Expand Up @@ -559,3 +575,14 @@ def _fetch_file_checksum(file_path, index, checksum):
checksum_type = CHECKSUM_TYPE_MAP[checksum]
hashed_path = Path(file_path).parents[0] / "by-hash" / checksum_type / digest
return hashed_path


def _batch_fetch_artifacts(packages):
sha256_values = [package.sha256 for package in packages if package.sha256]
artifacts = Artifact.objects.filter(sha256__in=sha256_values)
artifact_dict = {artifact.sha256: artifact for artifact in artifacts}

remote_artifacts = RemoteArtifact.objects.filter(sha256__in=sha256_values)
remote_artifact_dict = {artifact.sha256: artifact for artifact in remote_artifacts}

return artifact_dict, remote_artifact_dict
8 changes: 6 additions & 2 deletions pulp_deb/tests/unit/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ def test_filename_with_component(self):

def test_to822(self):
"""Test if package transforms correctly into 822dict."""
artifact_dict = {self.package1.sha256: self.artifact1}
package_dict = Package822Serializer(self.package1, context={"request": None}).to822(
"joetunn"
"joetunn", artifact_dict=artifact_dict
)
self.assertEqual(package_dict["package"], self.package1.package)
self.assertEqual(package_dict["version"], self.package1.version)
Expand All @@ -77,7 +78,10 @@ def test_to822(self):

def test_to822_dump(self):
"""Test dump to package index."""
artifact_dict = {self.package1.sha256: self.artifact1}
self.assertEqual(
Package822Serializer(self.package1, context={"request": None}).to822().dump(),
Package822Serializer(self.package1, context={"request": None})
.to822(artifact_dict=artifact_dict)
.dump(),
self.PACKAGE_PARAGRAPH,
)

0 comments on commit 6c7230a

Please sign in to comment.