diff --git a/rohmu/object_storage/azure.py b/rohmu/object_storage/azure.py index fcc89a15..0281e456 100644 --- a/rohmu/object_storage/azure.py +++ b/rohmu/object_storage/azure.py @@ -301,8 +301,10 @@ def _iter_key(self, *, path: str, with_metadata: bool, deep: bool) -> Iterator[I }, ) - def delete_key(self, key: str) -> None: - path = self.format_key_for_backend(key, remove_slash_prefix=True) + def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None: + path = self.format_key_for_backend( + key, remove_slash_prefix=True, trailing_slash=preserve_trailing_slash and key.endswith("/") + ) self.log.debug("Deleting key: %r", path) try: blob_client = self.get_blob_service_client().get_blob_client(container=self.container_name, blob=path) diff --git a/rohmu/object_storage/base.py b/rohmu/object_storage/base.py index 95766929..ec301115 100644 --- a/rohmu/object_storage/base.py +++ b/rohmu/object_storage/base.py @@ -255,20 +255,20 @@ def format_key_from_backend(self, key: str) -> str: raise StorageError(f"Key {repr(key)} does not start with expected prefix {repr(self.prefix)}") return key[len(self.prefix) :] - def delete_key(self, key: str) -> None: + def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None: raise NotImplementedError - def delete_keys(self, keys: Collection[str]) -> None: + def delete_keys(self, keys: Collection[str], preserve_trailing_slash: bool = False) -> None: """Delete specified keys""" for key in keys: - self.delete_key(key) + self.delete_key(key, preserve_trailing_slash=preserve_trailing_slash) - def delete_tree(self, key: str) -> None: + def delete_tree(self, key: str, preserve_trailing_slash: bool = False) -> None: """Delete all keys under given root key. Basic implementation works by just listing all available keys and deleting them individually but storage providers can implement more efficient logic.""" self.log.debug("Deleting tree: %r", key) names = [item["name"] for item in self.list_path(key, with_metadata=False, deep=True)] - self.delete_keys(names) + self.delete_keys(names, preserve_trailing_slash=preserve_trailing_slash) def get_contents_to_file( self, key: str, filepath_to_store_to: AnyPath, *, progress_callback: ProgressProportionCallbackType = None diff --git a/rohmu/object_storage/google.py b/rohmu/object_storage/google.py index c6e05043..e93f6fa3 100644 --- a/rohmu/object_storage/google.py +++ b/rohmu/object_storage/google.py @@ -452,8 +452,8 @@ def initial_op(domain: Any) -> HttpRequest: else: raise NotImplementedError(property_name) - def delete_key(self, key: str) -> None: - path = self.format_key_for_backend(key) + def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None: + path = self.format_key_for_backend(key, trailing_slash=preserve_trailing_slash and key.endswith("/")) self.log.debug("Deleting key: %r", path) with self._object_client(not_found=path) as clob: # https://googleapis.github.io/google-api-python-client/docs/dyn/storage_v1.objects.html#delete diff --git a/rohmu/object_storage/local.py b/rohmu/object_storage/local.py index 84775aea..01e56591 100644 --- a/rohmu/object_storage/local.py +++ b/rohmu/object_storage/local.py @@ -123,7 +123,7 @@ def _filter_metadata(self, metadata: Metadata) -> Metadata: def get_metadata_for_key(self, key: str) -> Metadata: return self._filter_metadata(self._get_metadata_for_key(key)) - def delete_key(self, key: str) -> None: + def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None: self.log.debug("Deleting key: %r", key) target_path = self.format_key_for_backend(key.strip("/")) if not os.path.exists(target_path): @@ -137,7 +137,7 @@ def delete_key(self, key: str) -> None: os.unlink(metadata_path) self.notifier.object_deleted(key=key) - def delete_tree(self, key: str) -> None: + def delete_tree(self, key: str, preserve_trailing_slash: bool = False) -> None: self.log.debug("Deleting tree: %r", key) target_path = self.format_key_for_backend(key.strip("/")) if not os.path.isdir(target_path): diff --git a/rohmu/object_storage/s3.py b/rohmu/object_storage/s3.py index e19fd693..1a9f62af 100644 --- a/rohmu/object_storage/s3.py +++ b/rohmu/object_storage/s3.py @@ -331,20 +331,29 @@ def _metadata_for_key(self, key: str) -> Metadata: return response["Metadata"] - def delete_key(self, key: str) -> None: - path = self.format_key_for_backend(key, remove_slash_prefix=True) + def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None: + path = self.format_key_for_backend( + key, remove_slash_prefix=True, trailing_slash=preserve_trailing_slash and key.endswith("/") + ) self.log.debug("Deleting key: %r", path) self._metadata_for_key(path) # check that key exists self.stats.operation(StorageOperation.delete_key) self.get_client().delete_object(Bucket=self.bucket_name, Key=path) self.notifier.object_deleted(key=key) - def delete_keys(self, keys: Collection[str]) -> None: + def delete_keys(self, keys: Collection[str], preserve_trailing_slash: bool = False) -> None: self.stats.operation(StorageOperation.delete_key, count=len(keys)) for batch in batched(keys, 1000): # Cannot delete more than 1000 objects at a time + formatted_keys = [ + self.format_key_for_backend( + k, + remove_slash_prefix=True, + trailing_slash=preserve_trailing_slash and k.endswith("/"), + ) for k in batch + ] self.get_client().delete_objects( Bucket=self.bucket_name, - Delete={"Objects": [{"Key": self.format_key_for_backend(key, remove_slash_prefix=True)} for key in batch]}, + Delete={"Objects": [{"Key": key} for key in formatted_keys]}, ) # Note: `tree_deleted` is not used here because the operation on S3 is not atomic, i.e. # it is possible for a new object to be created after `list_objects` above diff --git a/rohmu/object_storage/sftp.py b/rohmu/object_storage/sftp.py index 8695ad50..1afe5333 100644 --- a/rohmu/object_storage/sftp.py +++ b/rohmu/object_storage/sftp.py @@ -210,7 +210,7 @@ def copy_file( ) -> None: raise NotImplementedError - def delete_key(self, key: str) -> None: + def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None: target_path = self.format_key_for_backend(key.strip("/")) self.log.info("Removing path: %r", target_path) diff --git a/rohmu/object_storage/swift.py b/rohmu/object_storage/swift.py index 81efa12c..e108afcc 100644 --- a/rohmu/object_storage/swift.py +++ b/rohmu/object_storage/swift.py @@ -225,8 +225,8 @@ def _delete_object_segments(self, key: str, manifest: str) -> None: with suppress(FileNotFoundFromStorageError): self._delete_object_plain(item["name"]) - def delete_key(self, key: str) -> None: - path = self.format_key_for_backend(key) + def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None: + path = self.format_key_for_backend(key, trailing_slash=preserve_trailing_slash and key.endswith("/")) self.log.debug("Deleting key: %r", path) try: headers = self.conn.head_object(self.container_name, path) diff --git a/test/object_storage/test_s3.py b/test/object_storage/test_s3.py index e39d6fa2..0336b306 100644 --- a/test/object_storage/test_s3.py +++ b/test/object_storage/test_s3.py @@ -168,13 +168,40 @@ def test_operations_reporting(infra: S3Infra) -> None: infra.operation.assert_called_once_with(StorageOperation.head_request) -def test_deletion(infra: S3Infra) -> None: - infra.transfer.delete_keys(["2", "3"]) +@pytest.mark.parametrize("preserve_trailing_slash", [True, False, None]) +def test_delete_keys(infra: S3Infra, preserve_trailing_slash: bool | None) -> None: + if preserve_trailing_slash is None: + infra.transfer.delete_keys(["2", "3", "4/"]) + else: + infra.transfer.delete_keys(["2", "3", "4/"], preserve_trailing_slash=preserve_trailing_slash) infra.s3_client.delete_objects.assert_called_once_with( - Bucket="test-bucket", Delete={"Objects": [{"Key": "test-prefix/2"}, {"Key": "test-prefix/3"}]} + Bucket="test-bucket", + Delete={ + "Objects": [ + {"Key": "test-prefix/2"}, + {"Key": "test-prefix/3"}, + {"Key": "test-prefix/4/" if preserve_trailing_slash else "test-prefix/4"}, + ], + }, ) - infra.transfer.delete_key("1") - infra.s3_client.delete_object.assert_called_once_with(Bucket="test-bucket", Key="test-prefix/1") + +@pytest.mark.parametrize( + ("key", "preserve_trailing_slash", "expected_key"), + [ + ("1", True, "test-prefix/1"), + ("2/", True, "test-prefix/2/"), + ("1", False, "test-prefix/1"), + ("2/", False, "test-prefix/2"), + ("1", None, "test-prefix/1"), + ("2/", None, "test-prefix/2"), + ], +) +def test_delete_key(infra: S3Infra, key: str, preserve_trailing_slash: bool | None, expected_key: str) -> None: + if preserve_trailing_slash is None: + infra.transfer.delete_key(key) + else: + infra.transfer.delete_key(key, preserve_trailing_slash=preserve_trailing_slash) + infra.s3_client.delete_object.assert_called_once_with(Bucket="test-bucket", Key=expected_key) def test_get_contents_to_fileobj_raises_error_on_invalid_byte_range(infra: S3Infra) -> None: