Skip to content

Commit

Permalink
Add option to preserve trailing slash on deletes
Browse files Browse the repository at this point in the history
Most object storages support having a slash in the object
name and rohmu should be able to delete such objects.

Let the default behaviour the same but introduce an option
`preserve_trailing_slash` which allows to control whether the trailing
slash in the key are stripped or not.
  • Loading branch information
giacomo-alzetta-aiven committed Jan 16, 2025
1 parent 7ed9186 commit 8ff4cc0
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 23 deletions.
6 changes: 4 additions & 2 deletions rohmu/object_storage/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,10 @@ def _iter_key(self, *, path: str, with_metadata: bool, deep: bool) -> Iterator[I
},
)

def delete_key(self, key: str) -> None:
path = self.format_key_for_backend(key, remove_slash_prefix=True)
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
path = self.format_key_for_backend(
key, remove_slash_prefix=True, trailing_slash=preserve_trailing_slash and key.endswith("/")
)
self.log.debug("Deleting key: %r", path)
try:
blob_client = self.get_blob_service_client().get_blob_client(container=self.container_name, blob=path)
Expand Down
10 changes: 5 additions & 5 deletions rohmu/object_storage/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,20 +255,20 @@ def format_key_from_backend(self, key: str) -> str:
raise StorageError(f"Key {repr(key)} does not start with expected prefix {repr(self.prefix)}")
return key[len(self.prefix) :]

def delete_key(self, key: str) -> None:
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
raise NotImplementedError

def delete_keys(self, keys: Collection[str]) -> None:
def delete_keys(self, keys: Collection[str], preserve_trailing_slash: bool = False) -> None:
"""Delete specified keys"""
for key in keys:
self.delete_key(key)
self.delete_key(key, preserve_trailing_slash=preserve_trailing_slash)

def delete_tree(self, key: str) -> None:
def delete_tree(self, key: str, preserve_trailing_slash: bool = False) -> None:
"""Delete all keys under given root key. Basic implementation works by just listing all available
keys and deleting them individually but storage providers can implement more efficient logic."""
self.log.debug("Deleting tree: %r", key)
names = [item["name"] for item in self.list_path(key, with_metadata=False, deep=True)]
self.delete_keys(names)
self.delete_keys(names, preserve_trailing_slash=preserve_trailing_slash)

def get_contents_to_file(
self, key: str, filepath_to_store_to: AnyPath, *, progress_callback: ProgressProportionCallbackType = None
Expand Down
4 changes: 2 additions & 2 deletions rohmu/object_storage/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,8 @@ def initial_op(domain: Any) -> HttpRequest:
else:
raise NotImplementedError(property_name)

def delete_key(self, key: str) -> None:
path = self.format_key_for_backend(key)
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
path = self.format_key_for_backend(key, trailing_slash=preserve_trailing_slash and key.endswith("/"))
self.log.debug("Deleting key: %r", path)
with self._object_client(not_found=path) as clob:
# https://googleapis.github.io/google-api-python-client/docs/dyn/storage_v1.objects.html#delete
Expand Down
4 changes: 2 additions & 2 deletions rohmu/object_storage/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _filter_metadata(self, metadata: Metadata) -> Metadata:
def get_metadata_for_key(self, key: str) -> Metadata:
return self._filter_metadata(self._get_metadata_for_key(key))

def delete_key(self, key: str) -> None:
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
self.log.debug("Deleting key: %r", key)
target_path = self.format_key_for_backend(key.strip("/"))
if not os.path.exists(target_path):
Expand All @@ -137,7 +137,7 @@ def delete_key(self, key: str) -> None:
os.unlink(metadata_path)
self.notifier.object_deleted(key=key)

def delete_tree(self, key: str) -> None:
def delete_tree(self, key: str, preserve_trailing_slash: bool = False) -> None:
self.log.debug("Deleting tree: %r", key)
target_path = self.format_key_for_backend(key.strip("/"))
if not os.path.isdir(target_path):
Expand Down
17 changes: 13 additions & 4 deletions rohmu/object_storage/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,20 +331,29 @@ def _metadata_for_key(self, key: str) -> Metadata:

return response["Metadata"]

def delete_key(self, key: str) -> None:
path = self.format_key_for_backend(key, remove_slash_prefix=True)
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
path = self.format_key_for_backend(
key, remove_slash_prefix=True, trailing_slash=preserve_trailing_slash and key.endswith("/")
)
self.log.debug("Deleting key: %r", path)
self._metadata_for_key(path) # check that key exists
self.stats.operation(StorageOperation.delete_key)
self.get_client().delete_object(Bucket=self.bucket_name, Key=path)
self.notifier.object_deleted(key=key)

def delete_keys(self, keys: Collection[str]) -> None:
def delete_keys(self, keys: Collection[str], preserve_trailing_slash: bool = False) -> None:
self.stats.operation(StorageOperation.delete_key, count=len(keys))
for batch in batched(keys, 1000): # Cannot delete more than 1000 objects at a time
formatted_keys = [
self.format_key_for_backend(
k,
remove_slash_prefix=True,
trailing_slash=preserve_trailing_slash and k.endswith("/"),
) for k in batch
]
self.get_client().delete_objects(
Bucket=self.bucket_name,
Delete={"Objects": [{"Key": self.format_key_for_backend(key, remove_slash_prefix=True)} for key in batch]},
Delete={"Objects": [{"Key": key} for key in formatted_keys]},
)
# Note: `tree_deleted` is not used here because the operation on S3 is not atomic, i.e.
# it is possible for a new object to be created after `list_objects` above
Expand Down
2 changes: 1 addition & 1 deletion rohmu/object_storage/sftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def copy_file(
) -> None:
raise NotImplementedError

def delete_key(self, key: str) -> None:
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
target_path = self.format_key_for_backend(key.strip("/"))
self.log.info("Removing path: %r", target_path)

Expand Down
4 changes: 2 additions & 2 deletions rohmu/object_storage/swift.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ def _delete_object_segments(self, key: str, manifest: str) -> None:
with suppress(FileNotFoundFromStorageError):
self._delete_object_plain(item["name"])

def delete_key(self, key: str) -> None:
path = self.format_key_for_backend(key)
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
path = self.format_key_for_backend(key, trailing_slash=preserve_trailing_slash and key.endswith("/"))
self.log.debug("Deleting key: %r", path)
try:
headers = self.conn.head_object(self.container_name, path)
Expand Down
37 changes: 32 additions & 5 deletions test/object_storage/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,40 @@ def test_operations_reporting(infra: S3Infra) -> None:
infra.operation.assert_called_once_with(StorageOperation.head_request)


def test_deletion(infra: S3Infra) -> None:
infra.transfer.delete_keys(["2", "3"])
@pytest.mark.parametrize("preserve_trailing_slash", [True, False, None])
def test_delete_keys(infra: S3Infra, preserve_trailing_slash: bool | None) -> None:
if preserve_trailing_slash is None:
infra.transfer.delete_keys(["2", "3", "4/"])
else:
infra.transfer.delete_keys(["2", "3", "4/"], preserve_trailing_slash=preserve_trailing_slash)
infra.s3_client.delete_objects.assert_called_once_with(
Bucket="test-bucket", Delete={"Objects": [{"Key": "test-prefix/2"}, {"Key": "test-prefix/3"}]}
Bucket="test-bucket",
Delete={
"Objects": [
{"Key": "test-prefix/2"},
{"Key": "test-prefix/3"},
{"Key": "test-prefix/4/" if preserve_trailing_slash else "test-prefix/4"},
],
},
)
infra.transfer.delete_key("1")
infra.s3_client.delete_object.assert_called_once_with(Bucket="test-bucket", Key="test-prefix/1")

@pytest.mark.parametrize(
("key", "preserve_trailing_slash", "expected_key"),
[
("1", True, "test-prefix/1"),
("2/", True, "test-prefix/2/"),
("1", False, "test-prefix/1"),
("2/", False, "test-prefix/2"),
("1", None, "test-prefix/1"),
("2/", None, "test-prefix/2"),
],
)
def test_delete_key(infra: S3Infra, key: str, preserve_trailing_slash: bool | None, expected_key: str) -> None:
if preserve_trailing_slash is None:
infra.transfer.delete_key(key)
else:
infra.transfer.delete_key(key, preserve_trailing_slash=preserve_trailing_slash)
infra.s3_client.delete_object.assert_called_once_with(Bucket="test-bucket", Key=expected_key)


def test_get_contents_to_fileobj_raises_error_on_invalid_byte_range(infra: S3Infra) -> None:
Expand Down

0 comments on commit 8ff4cc0

Please sign in to comment.