Skip to content
This repository has been archived by the owner on Oct 10, 2024. It is now read-only.

add full paging test to investigate paging bug #2

Merged
merged 1 commit into from
Dec 15, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions mcnews/tests/test_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from unittest import TestCase
import datetime as dt
import os
import pytest
import mcnews.searchapi as searchapi

INTEGRATION_TEST_COLLECTION = "mediacloud_test"
Expand All @@ -17,7 +15,7 @@ def setUp(self) -> None:
def test_count(self):
results = self._api.count("*", dt.datetime(2023, 1, 1), dt.datetime(2024, 1, 1))
assert results > 0
assert results < 5000
assert results < 20000

def test_count_over_time(self):
results = self._api.count_over_time("*", dt.datetime(2020, 1, 1), dt.datetime(2025, 1, 1))
Expand All @@ -39,12 +37,12 @@ def test_count_date_filter(self):

def test_paged_articles(self):
query = "*"
start_date = dt.datetime(2023, 10, 1)
start_date = dt.datetime(2023, 1, 1)
end_date = dt.datetime(2023, 12, 31)
story_count = self._api.count(query, start_date, end_date)
# make sure test case is reasonable size (ie. more than one page, but not too many pages
assert story_count > 1000
assert story_count < 10000
assert story_count < 20000
# fetch first page
page1, next_token1 = self._api.paged_articles(query, start_date, end_date)
assert len(page1) > 0
Expand All @@ -58,3 +56,22 @@ def test_paged_articles(self):
page2_urls = [s['url'] for s in page2]
assert page1_url1 not in page2_urls # verify pages don't overlap

def test_page_all(self):
query = "*"
start_date = dt.datetime(2023, 1, 1)
end_date = dt.datetime(2023, 12, 31)
story_count = self._api.count(query, start_date, end_date)
# fetch first page
more_stories = True
stories = []
next_page_token = None
page_count = 0
while more_stories:
page, next_page_token = self._api.paged_articles(query, start_date, end_date,
pagination_token=next_page_token)
assert len(page) > 0
stories += page
more_stories = next_page_token is not None
page_count += 1
assert len(stories) > story_count * 0.9 # why doesn't this match :-(
assert page_count == (1 + int(story_count / 1000))
Loading