From d4a338bc2c50534e582a50af88e721673d988e34 Mon Sep 17 00:00:00 2001 From: Quirin Pamp Date: Thu, 16 Nov 2023 21:15:17 +0100 Subject: [PATCH] Merge pull request #922 from ATIX-AG/fix_package_repo_uniqueness Fix package repo uniqueness (cherry picked from commit 7ede5bed2dfd9caa5c3722ed837821eda5052106) --- CHANGES/921.bugfix | 3 + pulp_deb/app/models/content/content.py | 2 - .../app/models/content/structure_content.py | 7 - pulp_deb/app/models/repository.py | 117 ++++++++++++++-- pulp_deb/app/tasks/exceptions.py | 18 --- .../functional/api/test_duplicate_packages.py | 132 ++++++++++++++++++ .../data/packages/duplicates/README.md | 1 + .../packages/duplicates/frigg_1.0_ppc64.deb | Bin 0 -> 2120 bytes pulp_deb/tests/functional/utils.py | 5 +- 9 files changed, 246 insertions(+), 39 deletions(-) create mode 100644 CHANGES/921.bugfix delete mode 100644 pulp_deb/app/tasks/exceptions.py create mode 100644 pulp_deb/tests/functional/api/test_duplicate_packages.py create mode 100644 pulp_deb/tests/functional/data/packages/duplicates/README.md create mode 100644 pulp_deb/tests/functional/data/packages/duplicates/frigg_1.0_ppc64.deb diff --git a/CHANGES/921.bugfix b/CHANGES/921.bugfix new file mode 100644 index 000000000..0a539c9df --- /dev/null +++ b/CHANGES/921.bugfix @@ -0,0 +1,3 @@ +Fixed repo uniqueness constraints. +Duplicate packages with identical checksums are now allowed. +In addition, duplicates are now also handled for the set of incoming content. diff --git a/pulp_deb/app/models/content/content.py b/pulp_deb/app/models/content/content.py index 3d6479b8d..496fe4fc9 100644 --- a/pulp_deb/app/models/content/content.py +++ b/pulp_deb/app/models/content/content.py @@ -90,8 +90,6 @@ def filename(self, component=""): "{}.{}".format(self.name, self.SUFFIX), ) - repo_key_fields = ("package", "version", "architecture") - class Meta: default_related_name = "%(app_label)s_%(model_name)s" unique_together = (("relative_path", "sha256"),) diff --git a/pulp_deb/app/models/content/structure_content.py b/pulp_deb/app/models/content/structure_content.py index 61e5a9d66..92f059e05 100644 --- a/pulp_deb/app/models/content/structure_content.py +++ b/pulp_deb/app/models/content/structure_content.py @@ -91,13 +91,6 @@ class PackageReleaseComponent(Content): package = models.ForeignKey(Package, on_delete=models.CASCADE) release_component = models.ForeignKey(ReleaseComponent, on_delete=models.CASCADE) - repo_key_fields = ( - "release_component", - "package__package", - "package__version", - "package__architecture", - ) - class Meta: default_related_name = "%(app_label)s_%(model_name)s" unique_together = (("package", "release_component"),) diff --git a/pulp_deb/app/models/repository.py b/pulp_deb/app/models/repository.py index 258f0286b..2d1475ad1 100644 --- a/pulp_deb/app/models/repository.py +++ b/pulp_deb/app/models/repository.py @@ -1,6 +1,12 @@ from django.db import models from pulpcore.plugin.models import BaseModel, Repository -from pulpcore.plugin.repo_version_utils import remove_duplicates, validate_version_paths +from pulpcore.plugin.repo_version_utils import ( + remove_duplicates, + validate_version_paths, + validate_duplicate_content, +) + +from pulpcore.plugin.models import Content from pulp_deb.app.models import ( AptReleaseSigningService, @@ -17,6 +23,11 @@ ReleaseFile, ) +import logging +from gettext import gettext as _ + +log = logging.getLogger(__name__) + class AptRepository(Repository): """ @@ -85,17 +96,10 @@ def finalize_new_version(self, new_version): finalize. """ - from pulp_deb.app.tasks.exceptions import DuplicateDistributionException - + handle_duplicate_packages(new_version) + validate_duplicate_content(new_version) remove_duplicates(new_version) validate_version_paths(new_version) - releases = new_version.get_content(Release.objects.all()) - distributions = [] - for release in releases: - distribution = release.distribution - if distribution in distributions: - raise DuplicateDistributionException(distribution) - distributions.append(distribution) class AptRepositoryReleaseServiceOverride(BaseModel): @@ -111,3 +115,96 @@ class AptRepositoryReleaseServiceOverride(BaseModel): class Meta: unique_together = (("repository", "release_distribution"),) + + +def handle_duplicate_packages(new_version): + """ + pulpcore's remove_duplicates does not work for .deb packages, since identical duplicate + packages (same sha256) are rare, but allowed, while duplicates with different sha256 are + forbidden. As such we need our own version of this function for .deb packages. Since we are + already building our own function, we will also be combining the functionality of pulpcore's + remove_duplicates and validate_duplicate_content within this function. + """ + content_qs_added = new_version.added(base_version=new_version.base_version) + if new_version.base_version: + content_qs_existing = new_version.base_version.content + else: + try: + content_qs_existing = new_version.previous().content + except new_version.DoesNotExist: + content_qs_existing = Content.objects.none() + package_types = { + Package.get_pulp_type(): Package, + InstallerPackage.get_pulp_type(): InstallerPackage, + } + repo_key_fields = ("package", "version", "architecture") + + for pulp_type, package_obj in package_types.items(): + # First handle duplicates within the packages added to new_version + package_qs_added = package_obj.objects.filter( + pk__in=content_qs_added.filter(pulp_type=pulp_type) + ) + added_unique = package_qs_added.distinct(*repo_key_fields) + added_checksum_unique = package_qs_added.distinct(*repo_key_fields, "sha256") + + if added_unique.count() < added_checksum_unique.count(): + package_qs_added_duplicates = added_checksum_unique.difference(added_unique) + if log.isEnabledFor(logging.DEBUG): + message = _( + "New repository version contains multiple packages with '{}', but differing " + "checksum!" + ) + for package_fields in package_qs_added_duplicates.values(*repo_key_fields): + log.debug(message.format(package_fields)) + + message = _( + "Cannot create repository version since there are newly added packages with the " + "same name, version, and architecture, but a different checksum. If the log level " + "is DEBUG, you can find a list of affected packages in the Pulp log." + ) + raise ValueError(message) + + # Now remove existing packages that are duplicates of any packages added to new_version + if package_qs_added.count() and content_qs_existing.count(): + for batch in batch_qs(package_qs_added.values(*repo_key_fields, "sha256")): + find_dup_qs = models.Q() + + for content_dict in batch: + sha256 = content_dict.pop("sha256") + item_query = models.Q(**content_dict) & ~models.Q(sha256=sha256) + find_dup_qs |= item_query + + package_qs_duplicates = ( + package_obj.objects.filter(pk__in=content_qs_existing) + .filter(find_dup_qs) + .only("pk") + ) + prc_qs_duplicates = ( + PackageReleaseComponent.objects.filter(pk__in=content_qs_existing) + .filter(package__in=package_qs_duplicates) + .only("pk") + ) + if package_qs_duplicates.count(): + message = _("Removing duplicates for type {} from new repo version.") + log.warning(message.format(pulp_type)) + new_version.remove_content(package_qs_duplicates) + new_version.remove_content(prc_qs_duplicates) + + +# The following helper function is copy and pasted from pulpcore. As soon as +# https://github.com/pulp/pulpcore/issues/4607 is ready, we should replace it with an import! +def batch_qs(qs, batch_size=1000): + """ + Returns a queryset batch in the given queryset. + + Usage: + # Make sure to order your querset + article_qs = Article.objects.order_by('id') + for qs in batch_qs(article_qs): + for article in qs: + print article.body + """ + total = qs.count() + for start in range(0, total, batch_size): + end = min(start + batch_size, total) + yield qs[start:end] diff --git a/pulp_deb/app/tasks/exceptions.py b/pulp_deb/app/tasks/exceptions.py deleted file mode 100644 index 470bfab60..000000000 --- a/pulp_deb/app/tasks/exceptions.py +++ /dev/null @@ -1,18 +0,0 @@ -from gettext import gettext as _ - - -class DuplicateDistributionException(Exception): - """ - Exception to signal, that we are creating a repository versions containing multiple Release - content with the same distribution. - """ - - def __init__(self, distribution, *args, **kwargs): - message = ( - "Cannot create the new repository version, since it contains multiple Release content " - "with the same distribution '{}'.\n" - "This known issue is tracked here: https://github.com/pulp/pulp_deb/issues/599\n" - "You can check the issue for known workarounds. Please also consider posting what you " - "did before getting this error, to help us to fix the underlying problem more quickly." - ) - super().__init__(_(message).format(distribution), *args, **kwargs) diff --git a/pulp_deb/tests/functional/api/test_duplicate_packages.py b/pulp_deb/tests/functional/api/test_duplicate_packages.py new file mode 100644 index 000000000..d9e918c0a --- /dev/null +++ b/pulp_deb/tests/functional/api/test_duplicate_packages.py @@ -0,0 +1,132 @@ +"""Tests relating to duplicate package handling. + +By "duplicate package" we mean two packages with the same Package, Version, and Architecture fields, +but different checksums. By contrast we refer to two packages with the same checksum (but stored at +a different relative_path within some repo) as two "identical packages". So defined, an APT repo may +contain identical packages, but may not contain any duplicates. + +To ensure this is the case we use the handle_duplicate_packages function. As such, these tests are +primarily intended to test this function. +""" +import pytest +from uuid import uuid4 + +from pulpcore.tests.functional.utils import PulpTaskError + +from pulp_deb.tests.functional.constants import DEB_PACKAGE_RELPATH +from pulp_deb.tests.functional.utils import ( + get_counts_from_content_summary, + get_local_package_absolute_path, +) + +DUPLICATE_PACKAGE_DIR = "data/packages/duplicates/" # below pulp_deb/tests/functional/ + + +def test_upload_package_and_duplicate( + apt_package_api, + deb_get_content_summary, + deb_get_repository_by_href, + deb_package_factory, + deb_repository_factory, +): + """Test uploading a package to a repo, and then uploading it's duplicate. + + The expectation is that uploading the duplicate will kick the older duplicate (along with the + associated PackageReleaseComponent) out of the repo. Only the newer duplicate and its PRC will + remain. + """ + # Generate an empty test repo. + repository = deb_repository_factory() + assert repository.latest_version_href.endswith("/0/") + repository_href = repository.pulp_href + + # Upload a test package to a component in the repo. + package_upload_params = { + "file": get_local_package_absolute_path(DEB_PACKAGE_RELPATH), + "relative_path": DEB_PACKAGE_RELPATH, + "distribution": str(uuid4()), + "component": str(uuid4()), + "repository": repository_href, + } + deb_package_factory(**package_upload_params) + + # Assert that the uploaded package has arrived in the repo. + repository = deb_get_repository_by_href(repository_href) + assert repository.latest_version_href.endswith("/1/") + content_counts = get_counts_from_content_summary(deb_get_content_summary(repository).added) + assert content_counts == { + "deb.package": 1, + "deb.package_release_component": 1, + "deb.release_architecture": 1, + "deb.release_component": 1, + } + package1_sha256 = ( + apt_package_api.list( + repository_version_added=repository.latest_version_href, fields=["sha256"] + ) + .results[0] + .sha256 + ) + + # Upload a duplicate of the first package into the repo. + package_upload_params["file"] = get_local_package_absolute_path( + package_name=DEB_PACKAGE_RELPATH, relative_path=DUPLICATE_PACKAGE_DIR + ) + deb_package_factory(**package_upload_params) + + # Assert that only the newer duplicate is now in the repo. + repository = deb_get_repository_by_href(repository_href) + assert repository.latest_version_href.endswith("/2/") + content_summary = deb_get_content_summary(repository) + content_counts_added = get_counts_from_content_summary(content_summary.added) + content_counts_removed = get_counts_from_content_summary(content_summary.removed) + assert content_counts_added == { + "deb.package": 1, + "deb.package_release_component": 1, + } + assert content_counts_removed == { + "deb.package": 1, + "deb.package_release_component": 1, + } + package2_sha256 = ( + apt_package_api.list( + repository_version_added=repository.latest_version_href, fields=["sha256"] + ) + .results[0] + .sha256 + ) + assert package1_sha256 != package2_sha256 + + +def test_add_duplicates_to_repo( + deb_modify_repository, + deb_package_factory, + deb_repository_factory, +): + """Test adding two duplicate packages to a repository in a single modify action. + + The expectation is that this will raise a ValueError. + """ + # Upload two duplicate packages. + package_upload_params = { + "file": get_local_package_absolute_path( + package_name=DEB_PACKAGE_RELPATH, relative_path=DUPLICATE_PACKAGE_DIR + ), + "relative_path": DEB_PACKAGE_RELPATH, + } + href1 = deb_package_factory(**package_upload_params).pulp_href + package_upload_params["file"] = get_local_package_absolute_path(DEB_PACKAGE_RELPATH) + href2 = deb_package_factory(**package_upload_params).pulp_href + + # Generate an empty test repo. + repository = deb_repository_factory() + assert repository.latest_version_href.endswith("/0/") + + # Add the duplicates to the repository. + with pytest.raises(PulpTaskError) as exception: + deb_modify_repository(repository, {"add_content_units": [href1, href2]}) + + # Assert the error message. + assert "Cannot create repository version since there are newly added packages with" in str( + exception.value + ) diff --git a/pulp_deb/tests/functional/data/packages/duplicates/README.md b/pulp_deb/tests/functional/data/packages/duplicates/README.md new file mode 100644 index 000000000..ceb2c75a2 --- /dev/null +++ b/pulp_deb/tests/functional/data/packages/duplicates/README.md @@ -0,0 +1 @@ +This is a duplicate of the package with a different checksum! diff --git a/pulp_deb/tests/functional/data/packages/duplicates/frigg_1.0_ppc64.deb b/pulp_deb/tests/functional/data/packages/duplicates/frigg_1.0_ppc64.deb new file mode 100644 index 0000000000000000000000000000000000000000..ab424443b27efbb93a9b4c5c2932c01fe43e29dc GIT binary patch literal 2120 zcmbuv=1g6G(xl(856{0zwGk zx&a~K1QJD2QPBWlW^87FFh(F06=6H`|9%4)3;~BLD(<}h1}|)=2NMpA2#+L1gy}^R zNP5xd|9Rio$ndZIEtB)!H~>Isf%VWvsEa8W=c)vh2K*-%I_^}@sBZ%Yjsavu+T||- zv%3yBpea&41HY9X-%s_ph!Lcg2coV0!{X_ zf&){!zEVQR{xGG{`_1 z=MBWp@rGFq6Thjo{HiN(|8BSlgvmmpMm>+`D-mJzR%3)lhSZ=(^imy${ zp(Fd6s^DeE8WQ?)+DHA!koRAsSWc$6-U#9?pBF=FShsSI?xEqst4C&_vFvhrZ9UAMFk^dnftiGDIPWFB1a?;wMnW0h zd8j;YvYvA7C0-0%njL^q(|CL7A;K2_gA>c%lLx+##5sm?p2>xFdTv-vz1F03=nq;w zcU!!axLO5X#LD8n-4w{|Z+wNm9Us_xp%AK2F#r{t@fzjTS7r4b#V72uP-P6>qgvDy z-WxtJ;F4ru+Q{{2{Z&)d=(8G_Vbi#g^rO3O~b%*Q;j7@Y#}w^U{lP=O^$ z62N+&ng>c`RJ!hk=`Jz}c;U6&WQB#vFqTZ3WM+oQEN*VhD`^&0k@RNf3=_b8HqCpq z)PV}!NQE(~k}Wrs#ChF#D`c)O>41k*h5AO**NVzp(?V?|9V+k~dOE}HoxC#jfvB-s zSh|9Jc#PJPu+jebQ*2pQh7hpUOZjXexD?huoo}+vqw2o|`cq$@OO76Q<_JEYP zmW6!QsJQOay5vfZjtm=RXn+BVHR-JfTQtTtND3TFD{g(|xQ`sU*Sp>y`1oMAsJmjn zX-2oz{mhZu1%e-H1ZxVX(I#qx&RbKb7isxkA-rp>+4C2p)3W`K;I2aK>;}IqbcH-M zQ2vAZx;hm+qFKsSaBUJ4Ngm_ zmEOAm&E{PcN8-9#Wvk`3+-)Uf^2)?7B-@-Vo8fN89j~|>trQ;HLEpOf#XBPcPTI|| zl#Qh;<2RqIm6j-WsJ%o0;}SG=Ciz}%dHU!29$p8I_?8pDCVsDK&GK#d9-{;s>tC`7 zUYP^w2^h;*ly#ggvA+L0n5QyqtI<|I88NBZV{DWck(Fztf+|o65w=HWoNE@3YG7`_ zK9WsPN8a!0d?Mds<&W9l6V_~jmtdEFopq^E!lypQjx9l_sV@x^Th7bW8OnLTgbGE} zjQg}oCq(;xjJtfjtR0NeFmp3L&h|og9b`b(rx;h@=gLGHyWE%(1%;{T??vxBmXCON zhv=}M{7}j(&y;Wso*#Eb&`Tz!-y-+bJ!*_ke6t-r3c0tr8c-bKpqq7Le^*ou-5V4z zebtBIYORI3@!T^V?DU9r1L!Scx?-16T*My_I*y_j<8rxOD<4uZ5Z56pd>($aonOd) zjDes0MD^-&fsfy!4~IBJnJiYvLHOErmeo1x&>E*4w&(1yOMV|NlFJ_-uMg>=K}+la z)GGtk$K5adAWjMi9rbjw>?tD((^n?on=NG-Q<(mg!Nsi8zzQZ~@)f6{OvdzKko@!h z^1vb%n3>lq|!DnYyMp1oUiwdhtLEUCHB1dHZHI#>Dk>@Kej=GNBH_+g3rQbJ4a zYS=@_LCWTmBO3A7_G#6l)K@#5^CDwX)1X)MTnR*90o78eOlYg0gzd9Hw*R$X^0RfT mkWaoBlpKU;hb#=*lwy literal 0 HcmV?d00001 diff --git a/pulp_deb/tests/functional/utils.py b/pulp_deb/tests/functional/utils.py index 6e3eddd3d..f1af09549 100644 --- a/pulp_deb/tests/functional/utils.py +++ b/pulp_deb/tests/functional/utils.py @@ -63,15 +63,16 @@ def gen_deb_remote_verbose(url=None, remove_policy=False): return data -def get_local_package_absolute_path(package_name): +def get_local_package_absolute_path(package_name, relative_path="data/packages/"): """Looks up the local package of the given name under the relative path 'data/packages/' and returns the absolute path. :param package_name: Name of the package to look up. + :param relative_path: The relative path of the directory below pulp_deb/tests/functional/. :returns: The absolute path to the package. """ p = Path(__file__).parent.absolute() - return p.joinpath(f"data/packages/{package_name}") + return p.joinpath(relative_path).joinpath(package_name) def gen_distribution(**kwargs):