Skip to content

Commit

Permalink
Merge pull request #46 from sul-dlss-labs/cleanup
Browse files Browse the repository at this point in the history
rename tasks and fix return type
  • Loading branch information
edsu authored Jun 21, 2024
2 parents 8b616e2 + 95b3a8c commit a0c3380
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
20 changes: 10 additions & 10 deletions rialto_airflow/dags/harvest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def find_authors_csv():
return rialto_authors_file(data_dir)

@task()
def dimensions_harvest_orcid(authors_csv, snapshot_dir):
def dimensions_harvest_dois(authors_csv, snapshot_dir):
"""
Fetch the data by ORCID from Dimensions.
"""
Expand All @@ -51,7 +51,7 @@ def dimensions_harvest_orcid(authors_csv, snapshot_dir):
return str(pickle_file)

@task()
def openalex_harvest_orcid(authors_csv, snapshot_dir):
def openalex_harvest_dois(authors_csv, snapshot_dir):
"""
Fetch the data by ORCID from OpenAlex.
"""
Expand All @@ -78,14 +78,14 @@ def doi_set(dimensions, openalex, sul_pub):
return create_doi_set(dimensions, openalex, sul_pub)

@task()
def dimensions_harvest_doi(dois):
def dimensions_harvest_pubs(dois):
"""
Harvest publication metadata from Dimensions using the dois from doi_set.
"""
return True

@task()
def openalex_harvest_doi(dois):
def openalex_harvest_pubs(dois):
"""
Harvest publication metadata from OpenAlex using the dois from doi_set.
"""
Expand Down Expand Up @@ -125,17 +125,17 @@ def publish(dataset):

sul_pub = sul_pub_harvest(snapshot_dir)

dimensions_orcid = dimensions_harvest_orcid(authors_csv, snapshot_dir)
dimensions_dois = dimensions_harvest_dois(authors_csv, snapshot_dir)

openalex_orcid = openalex_harvest_orcid(authors_csv, snapshot_dir)
openalex_dois = openalex_harvest_dois(authors_csv, snapshot_dir)

dois = doi_set(dimensions_orcid, openalex_orcid, sul_pub)
dois = doi_set(dimensions_dois, openalex_dois, sul_pub)

dimensions_doi = dimensions_harvest_doi(dois)
dimensions_pubs = dimensions_harvest_pubs(dois)

openalex_doi = openalex_harvest_doi(dois)
openalex_pubs = openalex_harvest_pubs(dois)

pubs = merge_publications(sul_pub, dimensions_doi, openalex_doi)
pubs = merge_publications(sul_pub, dimensions_pubs, openalex_pubs)

pubs_authors = join_authors(pubs, authors_csv)

Expand Down
2 changes: 1 addition & 1 deletion rialto_airflow/harvest/doi_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def create_doi_set(dimensions: str, openalex: str, sul_pub_csv: str) -> list:
return unique_dois


def dois_from_pickle(pickle_file: str) -> dict:
def dois_from_pickle(pickle_file: str) -> list:
"""Load a pickled dictionary of DOIs and ORCIDs from file."""
with open(pickle_file, "rb") as handle:
data = pickle.load(handle)
Expand Down

0 comments on commit a0c3380

Please sign in to comment.