Skip to content

Commit

Permalink
Merge pull request #1146 from ATIX-AG/add_batching_for_publish
Browse files Browse the repository at this point in the history
Add package and source package batching for publish
  • Loading branch information
hstct authored Sep 18, 2024
2 parents ce99f13 + f0b639c commit 581f9f3
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 61 deletions.
1 change: 1 addition & 0 deletions CHANGES/1147.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved performance when creating publications, by creating PublishedArtifacts in bulk, greatly reducing the number of database calls.
21 changes: 7 additions & 14 deletions pulp_deb/app/serializers/content_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,22 +482,15 @@ def to822(self, component=""):
try:
artifact = self.instance._artifacts.get()
artifact.touch() # Orphan cleanup protection until we are done!
if artifact.md5:
ret["MD5sum"] = artifact.md5
if artifact.sha1:
ret["SHA1"] = artifact.sha1
ret["SHA256"] = artifact.sha256
ret["Size"] = str(artifact.size)
except Artifact.DoesNotExist:
artifact = RemoteArtifact.objects.filter(sha256=self.instance.sha256).first()
if artifact.md5:
ret["MD5sum"] = artifact.md5
if artifact.sha1:
ret["SHA1"] = artifact.sha1
ret["SHA256"] = artifact.sha256
ret["Size"] = str(artifact.size)

ret["Filename"] = self.instance.filename(component)

if artifact:
ret.update({"MD5sum": artifact.md5} if artifact.md5 else {})
ret.update({"SHA1": artifact.sha1} if artifact.sha1 else {})
ret.update({"SHA256": artifact.sha256})
ret.update({"Size": str(artifact.size)})
ret.update({"Filename": self.instance.filename(component)})

return ret

Expand Down
126 changes: 79 additions & 47 deletions pulp_deb/app/tasks/publishing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from django.conf import settings
from django.core.files import File
from django.db import transaction
from django.db.utils import IntegrityError
from django.forms.models import model_to_dict

Expand Down Expand Up @@ -147,14 +148,12 @@ def publish(
packages = Package.objects.filter(
pk__in=repo_version.content.order_by("-pulp_created")
)
for package in packages:
release_helper.components[component].add_package(package)
release_helper.components[component].add_packages(packages)

source_packages = SourcePackage.objects.filter(
pk__in=repo_version.content.order_by("-pulp_created"),
)
for source_package in source_packages:
release_helper.components[component].add_source_package(source_package)
release_helper.components[component].add_source_packages(source_packages)

release_helper.finish()

Expand Down Expand Up @@ -242,21 +241,28 @@ def publish(
pk__in=repo_version.content.order_by("-pulp_created"),
release_component__in=release_components_filtered,
).select_related("release_component", "package")
for prc in package_release_components:
release_helper.components[prc.release_component.component].add_package(
prc.package
)

source_package_release_components = (
SourcePackageReleaseComponent.objects.filter(
pk__in=repo_version.content.order_by("-pulp_created"),
release_component__in=release_components_filtered,
).select_related("release_component", "source_package")
)
for drc in source_package_release_components:
release_helper.components[
drc.release_component.component
].add_source_package(drc.source_package)

for component in components:
packages = [
prc.package
for prc in package_release_components
if prc.release_component.component == component
]
release_helper.components[component].add_packages(packages)

source_packages = [
drc.source_package
for drc in source_package_release_components
if drc.release_component.component == component
]
release_helper.components[component].add_source_packages(source_packages)

release_helper.save_unsigned_metadata()
release_helpers.append(release_helper)
Expand Down Expand Up @@ -307,47 +313,73 @@ def __init__(self, parent, component):
source_index_path,
)

def add_package(self, package):
with suppress(IntegrityError):
published_artifact = PublishedArtifact(
relative_path=package.filename(self.component),
publication=self.parent.publication,
content_artifact=package.contentartifact_set.get(),
)
published_artifact.save()
package_serializer = Package822Serializer(package, context={"request": None})

try:
package_serializer.to822(self.component).dump(
self.package_index_files[package.architecture][0]
)
except KeyError:
log.warn(
f"Published package '{package.relative_path}' with architecture "
f"'{package.architecture}' was not added to component '{self.component}' in "
f"distribution '{self.parent.distribution}' because it lacks this architecture!"
)
else:
self.package_index_files[package.architecture][0].write(b"\n")
def add_packages(self, packages):
published_artifacts = []
package_data = []

# Publish DSC file and setup to create Sources Indices file
def add_source_package(self, source_package):
artifact_set = source_package.contentartifact_set.all()
for content_artifact in artifact_set:
for package in packages:
with suppress(IntegrityError):
content_artifact = package.contentartifact_set.get()
relative_path = package.filename(self.component)

published_artifact = PublishedArtifact(
relative_path=source_package.derived_path(
os.path.basename(content_artifact.relative_path), self.component
),
relative_path=relative_path,
publication=self.parent.publication,
content_artifact=content_artifact,
)
published_artifact.save()
dsc_file_822_serializer = DscFile822Serializer(source_package, context={"request": None})
dsc_file_822_serializer.to822(self.component, paragraph=True).dump(
self.source_index_file_info[0]
)
self.source_index_file_info[0].write(b"\n")
published_artifacts.append(published_artifact)
package_data.append((package, package.architecture))

with transaction.atomic():
if published_artifacts:
PublishedArtifact.objects.bulk_create(published_artifacts, ignore_conflicts=True)

for package, architecture in package_data:
package_serializer = Package822Serializer(package, context={"request": None})
try:
package_serializer.to822(self.component).dump(
self.package_index_files[architecture][0]
)
except KeyError:
log.warn(
f"Published package '{package.relative_path}' with architecture "
f"'{architecture}' was not added to component '{self.component}' in "
f"distribution '{self.parent.distribution}' because it lacks this architecture!"
)
else:
self.package_index_files[architecture][0].write(b"\n")

# Publish DSC file and setup to create Sources Indices file
def add_source_packages(self, source_packages):
published_artifacts = []
source_package_data = []

for source_package in source_packages:
with suppress(IntegrityError):
artifact_set = source_package.contentartifact_set.all()
for content_artifact in artifact_set:
published_artifact = PublishedArtifact(
relative_path=source_package.derived_path(
os.path.basename(content_artifact.relative_path), self.component
),
publication=self.parent.publication,
content_artifact=content_artifact,
)
published_artifacts.append(published_artifact)
source_package_data.append(source_package)

with transaction.atomic():
if published_artifacts:
PublishedArtifact.objects.bulk_create(published_artifacts, ignore_conflicts=True)

for source_package in source_package_data:
dsc_file_822_serializer = DscFile822Serializer(
source_package, context={"request": None}
)
dsc_file_822_serializer.to822(self.component, paragraph=True).dump(
self.source_index_file_info[0]
)
self.source_index_file_info[0].write(b"\n")

def finish(self):
# Publish Packages files
Expand Down

0 comments on commit 581f9f3

Please sign in to comment.