Skip to content

Commit

Permalink
feat(updater): cache youtube requests (#167)
Browse files Browse the repository at this point in the history
  • Loading branch information
ReenigneArcher authored Jan 5, 2025
1 parent 351555f commit d47f422
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 68 deletions.
20 changes: 13 additions & 7 deletions .github/workflows/update-db.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,21 @@ jobs:
persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of the personal token
fetch-depth: 0 # otherwise, will fail to push refs to dest repo

- name: Cache
id: cache
uses: actions/cache@v4
with:
path: cache
key: none

- name: Get current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.12'

- name: Install Python dependencies
run: |
Expand All @@ -54,12 +61,11 @@ jobs:
cp -f -r ./gh-pages-template/* ./gh-pages/
- name: Update
working-directory: gh-pages
run: |
python -u ../src/update_db.py \
--twitch_client_id ${{ secrets.TWITCH_CLIENT_ID }} \
--twitch_client_secret ${{ secrets.TWITCH_CLIENT_SECRET }} \
--youtube_api_key ${{ secrets.YOUTUBE_API_KEY }}
env:
TWITCH_CLIENT_ID: ${{ secrets.TWITCH_CLIENT_ID }}
TWITCH_CLIENT_SECRET: ${{ secrets.TWITCH_CLIENT_SECRET }}
YOUTUBE_API_KEY: ${{ secrets.YOUTUBE_API_KEY }}
run: python -u src/update_db.py ${{ github.event_name == 'pull_request' && '-t' }}

- name: Prepare Artifacts # uploading artifacts will fail if not zipped due to very large quantity of files
shell: bash
Expand Down
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,8 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# Project specific
gh-pages/
cache/
*.sqlite
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
igdb-api-v4==0.3.3
python-dotenv==1.0.1
requests==2.32.3
requests-cache==1.2.1
146 changes: 85 additions & 61 deletions src/update_db.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# standard imports
import argparse
from datetime import timedelta
import pathlib
import json
import os
Expand All @@ -8,6 +9,7 @@

# lib imports
import requests
import requests_cache
from dotenv import load_dotenv
from igdb.wrapper import IGDBWrapper

Expand Down Expand Up @@ -44,48 +46,8 @@ def igdb_authorization(client_id: str, client_secret: str) -> dict:

token_url = 'https://id.twitch.tv/oauth2/token'

authorization = post_json(url=token_url, headers=auth_headers)
return authorization


def get_json(url: str, headers: dict) -> dict:
"""
Send a GET request and return a json formatted dictionary.
Parameters
----------
url : str
The url to request.
headers
The headers to use in the request.
Returns
-------
dict
Dictionary of json data.
"""
result = requests.get(url=url, headers=headers).json()
return result


def post_json(url: str, headers: dict) -> dict:
"""
Send a POST request and return a json formatted dictionary.
Parameters
----------
url : str
The url to request.
headers
The headers to use in the request.
Returns
-------
dict
Dictionary of json data.
"""
result = requests.post(url=url, data=headers).json()
return result
authorization = requests.post(url=token_url, data=auth_headers)
return authorization.json()


def write_json_files(file_path: str, data: dict):
Expand Down Expand Up @@ -129,8 +91,13 @@ def get_youtube(video_ids: list) -> dict:
url = f'{uri}?id={videos}&key={args.youtube_api_key}&part=snippet&fields={fields}'
headers = dict(Accept='application/json')

result = get_json(url=url, headers=headers)
return result
session = requests_cache.CachedSession(
cache_name='cache/youtube_cache',
backend='sqlite',
expire_after=timedelta(days=1),
)
response = session.get(url=url, headers=headers)
return response.json()


def get_data():
Expand Down Expand Up @@ -293,7 +260,7 @@ def get_data():

if end_point_dict['write_all']:
# write the end_point file
file_path = os.path.join(end_point, 'all')
file_path = os.path.join(args.out_dir, end_point, 'all')
write_json_files(file_path=file_path, data=full_dict[end_point])

print(f'{len(full_dict[end_point])} items processed in endpoint: {end_point}')
Expand Down Expand Up @@ -368,7 +335,7 @@ def get_data():

# write the full game index
for bucket, bucket_data in buckets.items():
file_path = os.path.join('buckets', str(bucket))
file_path = os.path.join(args.out_dir, 'buckets', str(bucket))
write_json_files(file_path=file_path, data=bucket_data)

# get data for videos
Expand All @@ -378,8 +345,33 @@ def get_data():
end_point = 'videos'
full_dict[end_point] = dict()

all_videos = [all_videos[x:x + 50] for x in range(0, len(all_videos), 50)]
for video_group in all_videos:
all_videos.sort()

cache_file = 'cache/video_groups.json'
os.makedirs(os.path.dirname(cache_file), exist_ok=True)

group_size = 50
if not os.path.isfile(cache_file):
all_video_groups = [all_videos[x:x + group_size] for x in range(0, len(all_videos), group_size)]
else:
with open(cache_file, 'r') as f:
cached_video_groups = json.load(f)

# Filter cached video groups to include only those where all videos are in all_videos
all_video_groups = [x for x in cached_video_groups if all(video in all_videos for video in x)]

# Find videos that are not in any cached video group
uncached_videos = [video for video in all_videos if not any(video in group for group in cached_video_groups)]

# Append uncached videos in groups of up to 50 to all_video_groups
uncached_video_groups = [uncached_videos[x:x + group_size] for x in range(0, len(uncached_videos), group_size)]
all_video_groups.extend(uncached_video_groups)

# write the new video groups to cache
with open(cache_file, 'w') as f:
json.dump(all_video_groups, f)

for video_group in all_video_groups:
json_result = get_youtube(video_ids=video_group)

try:
Expand Down Expand Up @@ -424,7 +416,7 @@ def get_data():
for end_point, end_point_dict in full_dict.items():
print(f'writing individual files for {end_point}')
for item_id, data in end_point_dict.items():
file_path = os.path.join(end_point, str(item_id))
file_path = os.path.join(args.out_dir, end_point, str(item_id))
write_json_files(file_path=file_path, data=data)


Expand All @@ -434,14 +426,14 @@ def get_igdb_enums():
"""
end_point = 'enums'
for enum, values in enums.items():
file_path = os.path.join(end_point, enum)
file_path = os.path.join(args.out_dir, end_point, enum)
write_json_files(file_path=file_path, data=values)

if args.test_mode:
break

# write the end_point file
file_path = os.path.join(end_point, 'all')
file_path = os.path.join(args.out_dir, end_point, 'all')
write_json_files(file_path=file_path, data=enums)


Expand All @@ -452,22 +444,54 @@ def get_platform_cross_reference():
end_point = 'platforms'

# write the end_point file
file_path = os.path.join(end_point, 'cross-reference')
file_path = os.path.join(args.out_dir, end_point, 'cross-reference')
write_json_files(file_path=file_path, data=platforms.cross_reference)


if __name__ == '__main__':
# setup arguments using argparse
parser = argparse.ArgumentParser(description="Download entire igdb database.")
parser.add_argument('--twitch_client_id', type=str, required=False, default=os.getenv('TWITCH_CLIENT_ID'),
help='Twitch developer client id')
parser.add_argument('--twitch_client_secret', type=str, required=False, default=os.getenv('TWITCH_CLIENT_SECRET'),
help='Twitch developer client secret')
parser.add_argument('--youtube_api_key', type=str, required=False, default=os.getenv('YOUTUBE_API_KEY'),
help='Youtube API key')
parser.add_argument('-t', '--test_mode', action='store_true',
help='Only write one item file per end point, per request.')
parser.add_argument('-i', '--indent_json', action='store_true', help='Indent json files.')
parser.add_argument(
'-o',
'--out_dir',
type=str,
required=False,
default='gh-pages',
help='Output directory for json files.',
)
parser.add_argument(
'--twitch_client_id',
type=str,
required=False,
default=os.getenv('TWITCH_CLIENT_ID'),
help='Twitch developer client id',
)
parser.add_argument(
'--twitch_client_secret',
type=str,
required=False,
default=os.getenv('TWITCH_CLIENT_SECRET'),
help='Twitch developer client secret',
)
parser.add_argument(
'--youtube_api_key',
type=str,
required=False,
default=os.getenv('YOUTUBE_API_KEY'),
help='Youtube API key',
)
parser.add_argument(
'-t',
'--test_mode',
action='store_true',
help='Only write one item file per end point, per request.',
)
parser.add_argument(
'-i',
'--indent_json',
action='store_true',
help='Indent json files.',
)

args = parser.parse_args()
args.indent = 4 if args.indent_json else None
Expand Down

0 comments on commit d47f422

Please sign in to comment.