Skip to content

Commit

Permalink
Made orphan clean up more tolerant when other tasks are running in pa…
Browse files Browse the repository at this point in the history
…rallel.

closes #4209
Orphan clean up can fail when other tasks like sync or content upload
might be rinning in paralell.
  • Loading branch information
ipanova committed Aug 17, 2023
1 parent 89d37d1 commit 11fcecc
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGES/4209.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Made orphan clean up more tolerant when other tasks are running in parallel.
28 changes: 22 additions & 6 deletions pulpcore/app/tasks/orphan.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import gc

from logger import getLogger

from django.conf import settings
from django.db.models.deletion import ProtectedError
from django.utils import timezone

from pulpcore.app.models import (
Expand All @@ -12,6 +15,8 @@
Upload,
)

log = getLogger(__name__)


def queryset_iterator(qs, batchsize=2000, gc_collect=True):
"""
Expand Down Expand Up @@ -53,14 +58,20 @@ def orphan_cleanup(content_pks=None, orphan_protection_time=settings.ORPHAN_PROT
content = Content.objects.orphaned(orphan_protection_time, content_pks).exclude(
pulp_type=PublishedMetadata.get_pulp_type()
)
content_count = content.count()
if not content_count:
if not content.exists():
break

# delete the content
for c in queryset_iterator(content):
progress_bar.increase_by(c.count())
c.delete()
count = c.count()
try:
c.delete()
except ProtectedError as e:
# some orphan content might have been picked by another task running in parallel
# i.e. sync
log.info(e)
else:
progress_bar.increase_by(count)

# delete the artifacts that don't belong to any content
artifacts = Artifact.objects.orphaned(orphan_protection_time)
Expand All @@ -71,8 +82,13 @@ def orphan_cleanup(content_pks=None, orphan_protection_time=settings.ORPHAN_PROT
code="clean-up.artifacts",
) as progress_bar:
for artifact in progress_bar.iter(artifacts.iterator()):
# we need to manually call delete() because it cleans up the file on the filesystem
artifact.delete()
try:
# we need to manually call delete() because it cleans up the file on the filesystem
artifact.delete()
except ProtectedError as e:
# Rarely artifact could be shared between to different content units.
# Just log and skip the artifact deletion in this case
log.info(e)


def upload_cleanup():
Expand Down

0 comments on commit 11fcecc

Please sign in to comment.