From f0b639c30bd3cb4c614d52329c46e601b1307a8e Mon Sep 17 00:00:00 2001 From: Tobias Grigo Date: Mon, 2 Sep 2024 15:28:53 +0200 Subject: [PATCH] Add bulk creation for PublishedArtifacts and in structured publications closes #1147 --- CHANGES/1147.feature | 1 + .../app/serializers/content_serializers.py | 21 +-- pulp_deb/app/tasks/publishing.py | 126 +++++++++++------- 3 files changed, 87 insertions(+), 61 deletions(-) create mode 100644 CHANGES/1147.feature diff --git a/CHANGES/1147.feature b/CHANGES/1147.feature new file mode 100644 index 00000000..60007ac2 --- /dev/null +++ b/CHANGES/1147.feature @@ -0,0 +1 @@ +Improved performance when creating publications, by creating PublishedArtifacts in bulk, greatly reducing the number of database calls. diff --git a/pulp_deb/app/serializers/content_serializers.py b/pulp_deb/app/serializers/content_serializers.py index 0f357a07..b4b992fc 100644 --- a/pulp_deb/app/serializers/content_serializers.py +++ b/pulp_deb/app/serializers/content_serializers.py @@ -482,22 +482,15 @@ def to822(self, component=""): try: artifact = self.instance._artifacts.get() artifact.touch() # Orphan cleanup protection until we are done! - if artifact.md5: - ret["MD5sum"] = artifact.md5 - if artifact.sha1: - ret["SHA1"] = artifact.sha1 - ret["SHA256"] = artifact.sha256 - ret["Size"] = str(artifact.size) except Artifact.DoesNotExist: artifact = RemoteArtifact.objects.filter(sha256=self.instance.sha256).first() - if artifact.md5: - ret["MD5sum"] = artifact.md5 - if artifact.sha1: - ret["SHA1"] = artifact.sha1 - ret["SHA256"] = artifact.sha256 - ret["Size"] = str(artifact.size) - - ret["Filename"] = self.instance.filename(component) + + if artifact: + ret.update({"MD5sum": artifact.md5} if artifact.md5 else {}) + ret.update({"SHA1": artifact.sha1} if artifact.sha1 else {}) + ret.update({"SHA256": artifact.sha256}) + ret.update({"Size": str(artifact.size)}) + ret.update({"Filename": self.instance.filename(component)}) return ret diff --git a/pulp_deb/app/tasks/publishing.py b/pulp_deb/app/tasks/publishing.py index ebd641fc..73fd8157 100644 --- a/pulp_deb/app/tasks/publishing.py +++ b/pulp_deb/app/tasks/publishing.py @@ -11,6 +11,7 @@ from django.conf import settings from django.core.files import File +from django.db import transaction from django.db.utils import IntegrityError from django.forms.models import model_to_dict @@ -147,14 +148,12 @@ def publish( packages = Package.objects.filter( pk__in=repo_version.content.order_by("-pulp_created") ) - for package in packages: - release_helper.components[component].add_package(package) + release_helper.components[component].add_packages(packages) source_packages = SourcePackage.objects.filter( pk__in=repo_version.content.order_by("-pulp_created"), ) - for source_package in source_packages: - release_helper.components[component].add_source_package(source_package) + release_helper.components[component].add_source_packages(source_packages) release_helper.finish() @@ -242,10 +241,6 @@ def publish( pk__in=repo_version.content.order_by("-pulp_created"), release_component__in=release_components_filtered, ).select_related("release_component", "package") - for prc in package_release_components: - release_helper.components[prc.release_component.component].add_package( - prc.package - ) source_package_release_components = ( SourcePackageReleaseComponent.objects.filter( @@ -253,10 +248,21 @@ def publish( release_component__in=release_components_filtered, ).select_related("release_component", "source_package") ) - for drc in source_package_release_components: - release_helper.components[ - drc.release_component.component - ].add_source_package(drc.source_package) + + for component in components: + packages = [ + prc.package + for prc in package_release_components + if prc.release_component.component == component + ] + release_helper.components[component].add_packages(packages) + + source_packages = [ + drc.source_package + for drc in source_package_release_components + if drc.release_component.component == component + ] + release_helper.components[component].add_source_packages(source_packages) release_helper.save_unsigned_metadata() release_helpers.append(release_helper) @@ -307,47 +313,73 @@ def __init__(self, parent, component): source_index_path, ) - def add_package(self, package): - with suppress(IntegrityError): - published_artifact = PublishedArtifact( - relative_path=package.filename(self.component), - publication=self.parent.publication, - content_artifact=package.contentartifact_set.get(), - ) - published_artifact.save() - package_serializer = Package822Serializer(package, context={"request": None}) - - try: - package_serializer.to822(self.component).dump( - self.package_index_files[package.architecture][0] - ) - except KeyError: - log.warn( - f"Published package '{package.relative_path}' with architecture " - f"'{package.architecture}' was not added to component '{self.component}' in " - f"distribution '{self.parent.distribution}' because it lacks this architecture!" - ) - else: - self.package_index_files[package.architecture][0].write(b"\n") + def add_packages(self, packages): + published_artifacts = [] + package_data = [] - # Publish DSC file and setup to create Sources Indices file - def add_source_package(self, source_package): - artifact_set = source_package.contentartifact_set.all() - for content_artifact in artifact_set: + for package in packages: with suppress(IntegrityError): + content_artifact = package.contentartifact_set.get() + relative_path = package.filename(self.component) + published_artifact = PublishedArtifact( - relative_path=source_package.derived_path( - os.path.basename(content_artifact.relative_path), self.component - ), + relative_path=relative_path, publication=self.parent.publication, content_artifact=content_artifact, ) - published_artifact.save() - dsc_file_822_serializer = DscFile822Serializer(source_package, context={"request": None}) - dsc_file_822_serializer.to822(self.component, paragraph=True).dump( - self.source_index_file_info[0] - ) - self.source_index_file_info[0].write(b"\n") + published_artifacts.append(published_artifact) + package_data.append((package, package.architecture)) + + with transaction.atomic(): + if published_artifacts: + PublishedArtifact.objects.bulk_create(published_artifacts, ignore_conflicts=True) + + for package, architecture in package_data: + package_serializer = Package822Serializer(package, context={"request": None}) + try: + package_serializer.to822(self.component).dump( + self.package_index_files[architecture][0] + ) + except KeyError: + log.warn( + f"Published package '{package.relative_path}' with architecture " + f"'{architecture}' was not added to component '{self.component}' in " + f"distribution '{self.parent.distribution}' because it lacks this architecture!" + ) + else: + self.package_index_files[architecture][0].write(b"\n") + + # Publish DSC file and setup to create Sources Indices file + def add_source_packages(self, source_packages): + published_artifacts = [] + source_package_data = [] + + for source_package in source_packages: + with suppress(IntegrityError): + artifact_set = source_package.contentartifact_set.all() + for content_artifact in artifact_set: + published_artifact = PublishedArtifact( + relative_path=source_package.derived_path( + os.path.basename(content_artifact.relative_path), self.component + ), + publication=self.parent.publication, + content_artifact=content_artifact, + ) + published_artifacts.append(published_artifact) + source_package_data.append(source_package) + + with transaction.atomic(): + if published_artifacts: + PublishedArtifact.objects.bulk_create(published_artifacts, ignore_conflicts=True) + + for source_package in source_package_data: + dsc_file_822_serializer = DscFile822Serializer( + source_package, context={"request": None} + ) + dsc_file_822_serializer.to822(self.component, paragraph=True).dump( + self.source_index_file_info[0] + ) + self.source_index_file_info[0].write(b"\n") def finish(self): # Publish Packages files