From 6fcf46093bab4ee52fbe90241b8d85d58f1b6a29 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Mon, 23 Oct 2023 12:19:11 +0200 Subject: [PATCH 1/2] [SPO] Fix handling drive_items in incremental syncs --- connectors/sources/sharepoint_online.py | 30 ++++++++++++++++++++----- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/connectors/sources/sharepoint_online.py b/connectors/sources/sharepoint_online.py index 039b2b562..157cdcdb9 100644 --- a/connectors/sources/sharepoint_online.py +++ b/connectors/sources/sharepoint_online.py @@ -1579,16 +1579,31 @@ async def _drive_items_batch_with_permissions( return - ids_to_items = { - drive_item["id"]: drive_item for drive_item in drive_items_batch + def _is_item_deleted(drive_item): + return self.drive_item_operation(drive_item) == OP_DELETE + + # Don't fetch access controls for deleted drive items + deleted_drive_items = [ + drive_item + for drive_item in drive_items_batch + if _is_item_deleted(drive_item) + ] + for drive_item in deleted_drive_items: + yield drive_item + + # Fetch access controls only for upserts + upsert_ids_to_items = { + drive_item["id"]: drive_item + for drive_item in drive_items_batch + if not _is_item_deleted(drive_item) } - drive_items_ids = list(ids_to_items.keys()) + upsert_drive_items_ids = list(upsert_ids_to_items.keys()) async for permissions_response in self.client.drive_items_permissions_batch( - drive_id, drive_items_ids + drive_id, upsert_drive_items_ids ): drive_item_id = permissions_response.get("id") - drive_item = ids_to_items.get(drive_item_id) + drive_item = upsert_ids_to_items.get(drive_item_id) permissions = permissions_response.get("body", {}).get("value", []) if drive_item: @@ -1716,7 +1731,10 @@ async def get_docs_incrementally(self, sync_cursor, filtering=None): site, site_access_control ), None, OP_INDEX - async for site_drive in self.site_drives(site, check_timestamp=True): + # Edit operation on a drive_item doesn't update the + # lastModifiedDateTime of the parent site_drive. Therfore, we + # set check_timestamp to False when iterating over site_drives. + async for site_drive in self.site_drives(site, check_timestamp=False): yield self._decorate_with_access_control( site_drive, site_access_control ), None, OP_INDEX From bb9247b2d9e966320d15e82d713ccf18633fb240 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Mon, 23 Oct 2023 12:29:46 +0200 Subject: [PATCH 2/2] Add unittests --- tests/sources/test_sharepoint_online.py | 38 +++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/sources/test_sharepoint_online.py b/tests/sources/test_sharepoint_online.py index 91ccadbd4..303941733 100644 --- a/tests/sources/test_sharepoint_online.py +++ b/tests/sources/test_sharepoint_online.py @@ -2646,6 +2646,44 @@ async def test_drive_items_batch_with_permissions_when_fetch_drive_item_permissi for drive_item in drive_items_without_permissions ) + @pytest.mark.asyncio + async def test_drive_items_batch_with_permissions_for_delta_delete_operation( + self, patch_sharepoint_client + ): + async with create_spo_source(use_document_level_security=True) as source: + drive_id = 1 + drive_item_ids = ["1", "2"] + drive_items_batch = [ + {"id": "1", "deleted": {"state": "deleted"}}, + {"id": "2"}, + ] + + permissions_responses = [ + { + "id": drive_item_id, + "body": { + "value": [{"grantedToV2": {"user": {"id": "some user id"}}}] + }, + } + for drive_item_id in drive_item_ids + ] + + patch_sharepoint_client.drive_items_permissions_batch = AsyncIterator( + permissions_responses + ) + + drive_items_with_permissions = [] + + async for drive_item_with_permission in source._drive_items_batch_with_permissions( + drive_id, drive_items_batch, "dummy_site_web_url" + ): + drive_items_with_permissions.append(drive_item_with_permission) + + assert len(drive_items_with_permissions) == len(drive_item_ids) + # Item with id 1 was deleted, so not trying to fetch permissions + assert ACCESS_CONTROL not in drive_items_with_permissions[0] + assert ACCESS_CONTROL in drive_items_with_permissions[1] + @pytest.mark.asyncio @patch( "connectors.sources.sharepoint_online.ACCESS_CONTROL",