Skip to content

Commit

Permalink
Add ROR organization classifications (#326)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Jan 19, 2025
1 parent 89c6f6c commit 669f730
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 19 deletions.
50 changes: 31 additions & 19 deletions src/pyobo/sources/ror.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import json
import logging
import zipfile
from collections.abc import Iterable
from typing import Any
Expand All @@ -12,7 +13,7 @@
from tqdm.auto import tqdm

from pyobo.struct import Obo, Reference, Term
from pyobo.struct.struct import acronym
from pyobo.struct.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED, acronym
from pyobo.struct.typedef import (
has_homepage,
has_part,
Expand All @@ -23,6 +24,7 @@
see_also,
)

logger = logging.getLogger(__name__)
PREFIX = "ror"
ROR_ZENODO_RECORD_ID = "10086202"

Expand Down Expand Up @@ -81,26 +83,35 @@ def __post_init__(self):

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
return iterate_ror_terms(force=force)


ROR_ORGANIZATION_TYPE_TO_OBI = {
"Education": ...,
"Facility": ...,
"Company": ...,
"Government": ...,
"Healthcare": ...,
"Other": ...,
"Archive": ...,
yield CHARLIE_TERM
yield HUMAN_TERM
yield Term(reference=ORG_CLASS)
yield Term(reference=CITY_CLASS)
yield from ROR_ORGANIZATION_TYPE_TO_OBI.values()
yield from iterate_ror_terms(force=force)


ROR_ORGANIZATION_TYPE_TO_OBI: dict[str, Term] = {
"Education": Term.default(PREFIX, "education", "educational organization"),
"Facility": Term.default(PREFIX, "facility", "facility"),
"Company": Term.default(PREFIX, "company", "company"),
"Government": Term.default(PREFIX, "government", "government organization"),
"Healthcare": Term.default(PREFIX, "healthcare", "healthcare organization"),
"Archive": Term.default(PREFIX, "archive", "archival organization"),
"Nonprofit": Term.default(PREFIX, "healthcare", "nonprofit organization")
.append_xref(Reference(prefix="ICO", identifier="0000048"))
.append_xref(Reference(prefix="GSSO", identifier="004615")),
}
for _k, v in ROR_ORGANIZATION_TYPE_TO_OBI.items():
v.append_parent(ORG_CLASS)
v.append_contributor(CHARLIE_TERM)
v.append_comment(PYOBO_INJECTED)

_MISSED_ORG_TYPES: set[str] = set()


def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in ROR."""
yield Term(reference=ORG_CLASS)
yield Term(reference=CITY_CLASS)

_version, _source_uri, records = get_latest(force=force)
unhandled_xref_prefixes = set()

Expand All @@ -120,10 +131,11 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
type="Instance",
definition=description,
)
term.append_parent(ORG_CLASS)
# TODO replace term.append_parent(ORG_CLASS) with:
# for organization_type in organization_types:
# term.append_parent(ORG_PARENTS[organization_type])
for organization_type in organization_types:
if organization_type == "Other":
term.append_parent(ORG_CLASS)
else:
term.append_parent(ROR_ORGANIZATION_TYPE_TO_OBI[organization_type])

for link in record.get("links", []):
term.annotate_uri(has_homepage, link)
Expand Down
5 changes: 5 additions & 0 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,11 @@ def auto(
definition=get_definition(prefix, identifier),
)

@classmethod
def default(cls, prefix, identifier, name=None) -> Self:
"""Create a default term."""
return cls(reference=default_reference(prefix=prefix, identifier=identifier, name=name))

def append_see_also_uri(self, uri: str) -> Self:
"""Add a see also property."""
return self.annotate_uri(v.see_also, uri)
Expand Down

0 comments on commit 669f730

Please sign in to comment.