Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPO] Fix bugs when handling drive_items in incremental sync #1827

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions connectors/sources/sharepoint_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -1579,16 +1579,31 @@ async def _drive_items_batch_with_permissions(

return

ids_to_items = {
drive_item["id"]: drive_item for drive_item in drive_items_batch
def _is_item_deleted(drive_item):
return self.drive_item_operation(drive_item) == OP_DELETE

# Don't fetch access controls for deleted drive items
deleted_drive_items = [
drive_item
for drive_item in drive_items_batch
if _is_item_deleted(drive_item)
]
for drive_item in deleted_drive_items:
yield drive_item

# Fetch access controls only for upserts
upsert_ids_to_items = {
drive_item["id"]: drive_item
for drive_item in drive_items_batch
if not _is_item_deleted(drive_item)
}
drive_items_ids = list(ids_to_items.keys())
upsert_drive_items_ids = list(upsert_ids_to_items.keys())

async for permissions_response in self.client.drive_items_permissions_batch(
drive_id, drive_items_ids
drive_id, upsert_drive_items_ids
):
drive_item_id = permissions_response.get("id")
drive_item = ids_to_items.get(drive_item_id)
drive_item = upsert_ids_to_items.get(drive_item_id)
permissions = permissions_response.get("body", {}).get("value", [])

if drive_item:
Expand Down Expand Up @@ -1716,7 +1731,10 @@ async def get_docs_incrementally(self, sync_cursor, filtering=None):
site, site_access_control
), None, OP_INDEX

async for site_drive in self.site_drives(site, check_timestamp=True):
# Edit operation on a drive_item doesn't update the
# lastModifiedDateTime of the parent site_drive. Therfore, we
# set check_timestamp to False when iterating over site_drives.
async for site_drive in self.site_drives(site, check_timestamp=False):
yield self._decorate_with_access_control(
site_drive, site_access_control
), None, OP_INDEX
Expand Down
38 changes: 38 additions & 0 deletions tests/sources/test_sharepoint_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -2646,6 +2646,44 @@ async def test_drive_items_batch_with_permissions_when_fetch_drive_item_permissi
for drive_item in drive_items_without_permissions
)

@pytest.mark.asyncio
async def test_drive_items_batch_with_permissions_for_delta_delete_operation(
self, patch_sharepoint_client
):
async with create_spo_source(use_document_level_security=True) as source:
drive_id = 1
drive_item_ids = ["1", "2"]
drive_items_batch = [
{"id": "1", "deleted": {"state": "deleted"}},
{"id": "2"},
]

permissions_responses = [
{
"id": drive_item_id,
"body": {
"value": [{"grantedToV2": {"user": {"id": "some user id"}}}]
},
}
for drive_item_id in drive_item_ids
]

patch_sharepoint_client.drive_items_permissions_batch = AsyncIterator(
permissions_responses
)

drive_items_with_permissions = []

async for drive_item_with_permission in source._drive_items_batch_with_permissions(
drive_id, drive_items_batch, "dummy_site_web_url"
):
drive_items_with_permissions.append(drive_item_with_permission)

assert len(drive_items_with_permissions) == len(drive_item_ids)
# Item with id 1 was deleted, so not trying to fetch permissions
assert ACCESS_CONTROL not in drive_items_with_permissions[0]
assert ACCESS_CONTROL in drive_items_with_permissions[1]

@pytest.mark.asyncio
@patch(
"connectors.sources.sharepoint_online.ACCESS_CONTROL",
Expand Down
Loading