From 669f7301a550c6ef6fc529d7c3a7f27106c6cd3a Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sun, 19 Jan 2025 14:34:25 -0500 Subject: [PATCH] Add ROR organization classifications (#326) --- src/pyobo/sources/ror.py | 50 +++++++++++++++++++++++--------------- src/pyobo/struct/struct.py | 5 ++++ 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/src/pyobo/sources/ror.py b/src/pyobo/sources/ror.py index 3c003a47..7aac9175 100644 --- a/src/pyobo/sources/ror.py +++ b/src/pyobo/sources/ror.py @@ -3,6 +3,7 @@ from __future__ import annotations import json +import logging import zipfile from collections.abc import Iterable from typing import Any @@ -12,7 +13,7 @@ from tqdm.auto import tqdm from pyobo.struct import Obo, Reference, Term -from pyobo.struct.struct import acronym +from pyobo.struct.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED, acronym from pyobo.struct.typedef import ( has_homepage, has_part, @@ -23,6 +24,7 @@ see_also, ) +logger = logging.getLogger(__name__) PREFIX = "ror" ROR_ZENODO_RECORD_ID = "10086202" @@ -81,26 +83,35 @@ def __post_init__(self): def iter_terms(self, force: bool = False) -> Iterable[Term]: """Iterate over terms in the ontology.""" - return iterate_ror_terms(force=force) - - -ROR_ORGANIZATION_TYPE_TO_OBI = { - "Education": ..., - "Facility": ..., - "Company": ..., - "Government": ..., - "Healthcare": ..., - "Other": ..., - "Archive": ..., + yield CHARLIE_TERM + yield HUMAN_TERM + yield Term(reference=ORG_CLASS) + yield Term(reference=CITY_CLASS) + yield from ROR_ORGANIZATION_TYPE_TO_OBI.values() + yield from iterate_ror_terms(force=force) + + +ROR_ORGANIZATION_TYPE_TO_OBI: dict[str, Term] = { + "Education": Term.default(PREFIX, "education", "educational organization"), + "Facility": Term.default(PREFIX, "facility", "facility"), + "Company": Term.default(PREFIX, "company", "company"), + "Government": Term.default(PREFIX, "government", "government organization"), + "Healthcare": Term.default(PREFIX, "healthcare", "healthcare organization"), + "Archive": Term.default(PREFIX, "archive", "archival organization"), + "Nonprofit": Term.default(PREFIX, "healthcare", "nonprofit organization") + .append_xref(Reference(prefix="ICO", identifier="0000048")) + .append_xref(Reference(prefix="GSSO", identifier="004615")), } +for _k, v in ROR_ORGANIZATION_TYPE_TO_OBI.items(): + v.append_parent(ORG_CLASS) + v.append_contributor(CHARLIE_TERM) + v.append_comment(PYOBO_INJECTED) + _MISSED_ORG_TYPES: set[str] = set() def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]: """Iterate over terms in ROR.""" - yield Term(reference=ORG_CLASS) - yield Term(reference=CITY_CLASS) - _version, _source_uri, records = get_latest(force=force) unhandled_xref_prefixes = set() @@ -120,10 +131,11 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]: type="Instance", definition=description, ) - term.append_parent(ORG_CLASS) - # TODO replace term.append_parent(ORG_CLASS) with: - # for organization_type in organization_types: - # term.append_parent(ORG_PARENTS[organization_type]) + for organization_type in organization_types: + if organization_type == "Other": + term.append_parent(ORG_CLASS) + else: + term.append_parent(ROR_ORGANIZATION_TYPE_TO_OBI[organization_type]) for link in record.get("links", []): term.annotate_uri(has_homepage, link) diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index 40472bec..92aebefa 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -323,6 +323,11 @@ def auto( definition=get_definition(prefix, identifier), ) + @classmethod + def default(cls, prefix, identifier, name=None) -> Self: + """Create a default term.""" + return cls(reference=default_reference(prefix=prefix, identifier=identifier, name=name)) + def append_see_also_uri(self, uri: str) -> Self: """Add a see also property.""" return self.annotate_uri(v.see_also, uri)