From 9705b88d3da9b4f53f7f96172caaa3b52d130e3c Mon Sep 17 00:00:00 2001 From: Rahul Bhargava Date: Fri, 15 Dec 2023 09:53:28 -0500 Subject: [PATCH] add full paging test to investigate paging bug --- mcnews/tests/test_fixtures.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/mcnews/tests/test_fixtures.py b/mcnews/tests/test_fixtures.py index ea434a9..868812e 100644 --- a/mcnews/tests/test_fixtures.py +++ b/mcnews/tests/test_fixtures.py @@ -1,7 +1,5 @@ from unittest import TestCase import datetime as dt -import os -import pytest import mcnews.searchapi as searchapi INTEGRATION_TEST_COLLECTION = "mediacloud_test" @@ -17,7 +15,7 @@ def setUp(self) -> None: def test_count(self): results = self._api.count("*", dt.datetime(2023, 1, 1), dt.datetime(2024, 1, 1)) assert results > 0 - assert results < 5000 + assert results < 20000 def test_count_over_time(self): results = self._api.count_over_time("*", dt.datetime(2020, 1, 1), dt.datetime(2025, 1, 1)) @@ -39,12 +37,12 @@ def test_count_date_filter(self): def test_paged_articles(self): query = "*" - start_date = dt.datetime(2023, 10, 1) + start_date = dt.datetime(2023, 1, 1) end_date = dt.datetime(2023, 12, 31) story_count = self._api.count(query, start_date, end_date) # make sure test case is reasonable size (ie. more than one page, but not too many pages assert story_count > 1000 - assert story_count < 10000 + assert story_count < 20000 # fetch first page page1, next_token1 = self._api.paged_articles(query, start_date, end_date) assert len(page1) > 0 @@ -58,3 +56,22 @@ def test_paged_articles(self): page2_urls = [s['url'] for s in page2] assert page1_url1 not in page2_urls # verify pages don't overlap + def test_page_all(self): + query = "*" + start_date = dt.datetime(2023, 1, 1) + end_date = dt.datetime(2023, 12, 31) + story_count = self._api.count(query, start_date, end_date) + # fetch first page + more_stories = True + stories = [] + next_page_token = None + page_count = 0 + while more_stories: + page, next_page_token = self._api.paged_articles(query, start_date, end_date, + pagination_token=next_page_token) + assert len(page) > 0 + stories += page + more_stories = next_page_token is not None + page_count += 1 + assert len(stories) > story_count * 0.9 # why doesn't this match :-( + assert page_count == (1 + int(story_count / 1000))