From 95b3a8c5f58cd20bc58f3716f8708b4ebe639f89 Mon Sep 17 00:00:00 2001 From: jacobthill Date: Fri, 21 Jun 2024 15:13:02 -0400 Subject: [PATCH] rename tasks and fix return type --- rialto_airflow/dags/harvest.py | 20 ++++++++++---------- rialto_airflow/harvest/doi_set.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rialto_airflow/dags/harvest.py b/rialto_airflow/dags/harvest.py index bc31e4e..f4575d3 100644 --- a/rialto_airflow/dags/harvest.py +++ b/rialto_airflow/dags/harvest.py @@ -42,7 +42,7 @@ def find_authors_csv(): return rialto_authors_file(data_dir) @task() - def dimensions_harvest_orcid(authors_csv, snapshot_dir): + def dimensions_harvest_dois(authors_csv, snapshot_dir): """ Fetch the data by ORCID from Dimensions. """ @@ -51,7 +51,7 @@ def dimensions_harvest_orcid(authors_csv, snapshot_dir): return str(pickle_file) @task() - def openalex_harvest_orcid(authors_csv, snapshot_dir): + def openalex_harvest_dois(authors_csv, snapshot_dir): """ Fetch the data by ORCID from OpenAlex. """ @@ -78,14 +78,14 @@ def doi_set(dimensions, openalex, sul_pub): return create_doi_set(dimensions, openalex, sul_pub) @task() - def dimensions_harvest_doi(dois): + def dimensions_harvest_pubs(dois): """ Harvest publication metadata from Dimensions using the dois from doi_set. """ return True @task() - def openalex_harvest_doi(dois): + def openalex_harvest_pubs(dois): """ Harvest publication metadata from OpenAlex using the dois from doi_set. """ @@ -125,17 +125,17 @@ def publish(dataset): sul_pub = sul_pub_harvest(snapshot_dir) - dimensions_orcid = dimensions_harvest_orcid(authors_csv, snapshot_dir) + dimensions_dois = dimensions_harvest_dois(authors_csv, snapshot_dir) - openalex_orcid = openalex_harvest_orcid(authors_csv, snapshot_dir) + openalex_dois = openalex_harvest_dois(authors_csv, snapshot_dir) - dois = doi_set(dimensions_orcid, openalex_orcid, sul_pub) + dois = doi_set(dimensions_dois, openalex_dois, sul_pub) - dimensions_doi = dimensions_harvest_doi(dois) + dimensions_pubs = dimensions_harvest_pubs(dois) - openalex_doi = openalex_harvest_doi(dois) + openalex_pubs = openalex_harvest_pubs(dois) - pubs = merge_publications(sul_pub, dimensions_doi, openalex_doi) + pubs = merge_publications(sul_pub, dimensions_pubs, openalex_pubs) pubs_authors = join_authors(pubs, authors_csv) diff --git a/rialto_airflow/harvest/doi_set.py b/rialto_airflow/harvest/doi_set.py index 49cabc7..5f5f43e 100644 --- a/rialto_airflow/harvest/doi_set.py +++ b/rialto_airflow/harvest/doi_set.py @@ -12,7 +12,7 @@ def create_doi_set(dimensions: str, openalex: str, sul_pub_csv: str) -> list: return unique_dois -def dois_from_pickle(pickle_file: str) -> dict: +def dois_from_pickle(pickle_file: str) -> list: """Load a pickled dictionary of DOIs and ORCIDs from file.""" with open(pickle_file, "rb") as handle: data = pickle.load(handle)