Skip to content

Commit

Permalink
Merge pull request #38 from sul-dlss-labs/t22-rialto-orgs-orcids
Browse files Browse the repository at this point in the history
Get rialto orgs author ORCIDs
  • Loading branch information
edsu authored Jun 19, 2024
2 parents 3376e2b + f2f26a9 commit c19679e
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 2 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ for i in `vault kv list -format yaml puppet/application/rialto-airflow/dev | sed
done
```

5. The harvest DAG requires a CSV file of authors from rialto-orgs to be available. This is not yet automatically available, so to set up locally, download the file at
https://sul-rialto-dev.stanford.edu/authors?action=index&commit=Search&controller=authors&format=csv&orcid_filter=&q=. Put the `authors.csv` file in the `data/` directory.

## Development

### Set-up
Expand All @@ -76,6 +79,7 @@ This will create the virtual environment at the default location of `.venv/`. `u
source .venv/bin/activate
```


### Install dependencies
```
uv pip install -r requirements.txt
Expand Down
21 changes: 20 additions & 1 deletion rialto_airflow/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import csv
import datetime

from pathlib import Path


Expand All @@ -14,3 +14,22 @@ def create_snapshot_dir(data_dir):
snapshot_dir.mkdir()

return str(snapshot_dir)


def rialto_authors_file(data_dir):
"""Get the path to the rialto-orgs authors.csv"""
authors_file = Path(data_dir) / "authors.csv"

return authors_file


def rialto_authors_orcids(rialto_authors_file):
"""Extract the orcidid column from the authors.csv file"""
orcids = []
with open(rialto_authors_file, "r") as file:
reader = csv.reader(file)
header = next(reader)
orcidid = header.index("orcidid")
for row in reader:
orcids.append(row[orcidid])
return orcids
21 changes: 20 additions & 1 deletion test/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,27 @@
import csv
from pathlib import Path
import pytest
from rialto_airflow.utils import create_snapshot_dir, rialto_authors_orcids

from rialto_airflow.utils import create_snapshot_dir

@pytest.fixture
def authors_csv(tmp_path):
# Create a fixture authors CSV file
fixture_file = tmp_path / "authors.csv"
with open(fixture_file, "w", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["sunetid", "orcidid"])
writer.writerow(["author1", "https://orcid.org/0000-0000-0000-0001"])
writer.writerow(["author2", "https://orcid.org/0000-0000-0000-0002"])
return fixture_file


def test_create_snapshot_dir(tmpdir):
snap_dir = Path(create_snapshot_dir(tmpdir))
assert snap_dir.is_dir()


def test_rialto_authors_orcids(tmp_path, authors_csv):
orcids = rialto_authors_orcids(authors_csv)
assert len(orcids) == 2
assert "https://orcid.org/0000-0000-0000-0001" in orcids

0 comments on commit c19679e

Please sign in to comment.