Skip to content

Commit

Permalink
Configure pyalex settings
Browse files Browse the repository at this point in the history
  • Loading branch information
lwrubel committed Jun 25, 2024
1 parent 485e4bb commit 03d2cd7
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ x-airflow-common:
AIRFLOW_VAR_DEV_LIMIT: ${AIRFLOW_VAR_DEV_LIMIT}
AIRFLOW_VAR_DATA_DIR: /opt/airflow/data
AIRFLOW_VAR_OPENALEX_EMAIL: ${AIRFLOW_VAR_OPENALEX_EMAIL}
AIRFLOW_VAR_OPENALEX_MAX_RETRIES: ${AIRFLOW_VAR_OPENALEX_MAX_RETRIES}
volumes:
- ${AIRFLOW_PROJ_DIR:-.}/rialto_airflow:/opt/airflow/rialto_airflow
# TODO: we may want to put logs and data outside of the project directory so
Expand Down
4 changes: 2 additions & 2 deletions rialto_airflow/harvest/openalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from rialto_airflow.utils import invert_dict

config.email = os.environ.get("AIRFLOW_VAR_OPENALEX_EMAIL")
config.max_retries = 5
config.max_retries = os.environ.get("AIRFLOW_VAR_OPENALEX_MAX_RETRIES", 5)
config.retry_backoff_factor = 0.1
config.retry_http_codes = [429, 500, 503]

Expand Down Expand Up @@ -115,7 +115,7 @@ def publications_csv(dois: list, csv_file: str) -> None:

def publications_from_dois(dois: list, batch_size=75):
"""
Look up works by DOI in batches that fit within OpenAlex request size limits
Look up works by DOI in batches that fit within request length limit of 4096
"""
for doi_batch in batched(dois, batch_size):
doi_list = "|".join([doi for doi in doi_batch])
Expand Down

0 comments on commit 03d2cd7

Please sign in to comment.