Skip to content
This repository has been archived by the owner on Oct 10, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' of github.com:mediacloud/mediacloud-news-client
Browse files Browse the repository at this point in the history
  • Loading branch information
Paige Gulley committed Dec 15, 2023
2 parents 38253de + 0b12e3c commit 372f791
Showing 1 changed file with 22 additions and 5 deletions.
27 changes: 22 additions & 5 deletions mcnews/tests/test_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from unittest import TestCase
import datetime as dt
import os
import pytest
import mcnews.searchapi as searchapi

INTEGRATION_TEST_COLLECTION = "mediacloud_test"
Expand All @@ -17,7 +15,7 @@ def setUp(self) -> None:
def test_count(self):
results = self._api.count("*", dt.datetime(2023, 1, 1), dt.datetime(2024, 1, 1))
assert results > 0
assert results < 5000
assert results < 20000

def test_count_over_time(self):
results = self._api.count_over_time("*", dt.datetime(2020, 1, 1), dt.datetime(2025, 1, 1))
Expand All @@ -39,12 +37,12 @@ def test_count_date_filter(self):

def test_paged_articles(self):
query = "*"
start_date = dt.datetime(2023, 10, 1)
start_date = dt.datetime(2023, 1, 1)
end_date = dt.datetime(2023, 12, 31)
story_count = self._api.count(query, start_date, end_date)
# make sure test case is reasonable size (ie. more than one page, but not too many pages
assert story_count > 1000
assert story_count < 10000
assert story_count < 20000
# fetch first page
page1, next_token1 = self._api.paged_articles(query, start_date, end_date)
assert len(page1) > 0
Expand All @@ -58,3 +56,22 @@ def test_paged_articles(self):
page2_urls = [s['url'] for s in page2]
assert page1_url1 not in page2_urls # verify pages don't overlap

def test_page_all(self):
query = "*"
start_date = dt.datetime(2023, 1, 1)
end_date = dt.datetime(2023, 12, 31)
story_count = self._api.count(query, start_date, end_date)
# fetch first page
more_stories = True
stories = []
next_page_token = None
page_count = 0
while more_stories:
page, next_page_token = self._api.paged_articles(query, start_date, end_date,
pagination_token=next_page_token)
assert len(page) > 0
stories += page
more_stories = next_page_token is not None
page_count += 1
assert len(stories) > story_count * 0.9 # why doesn't this match :-(
assert page_count == (1 + int(story_count / 1000))

0 comments on commit 372f791

Please sign in to comment.