Skip to content

Commit

Permalink
Refactor API code (#312)
Browse files Browse the repository at this point in the history
1. Reuse typed dictionaries to reduce reused kwargs
2. Add simplified function for prefix/identifier arguments
3. Create edges module
4. Switch graph generation to use all edges
  • Loading branch information
cthoyt authored Jan 15, 2025
1 parent 3c153e8 commit a9abf86
Show file tree
Hide file tree
Showing 12 changed files with 480 additions and 332 deletions.
12 changes: 12 additions & 0 deletions src/pyobo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
get_children,
get_definition,
get_descendants,
get_edges,
get_edges_df,
get_filtered_properties_df,
get_filtered_properties_mapping,
get_filtered_properties_multimapping,
Expand All @@ -20,11 +22,15 @@
get_id_synonyms_mapping,
get_id_to_alts,
get_ids,
get_literal_properties,
get_literal_properties_df,
get_mappings_df,
get_metadata,
get_name,
get_name_by_curie,
get_name_id_mapping,
get_object_properties,
get_object_properties_df,
get_obsolete,
get_primary_curie,
get_primary_identifier,
Expand Down Expand Up @@ -80,6 +86,8 @@
"get_children",
"get_definition",
"get_descendants",
"get_edges",
"get_edges_df",
"get_filtered_properties_df",
"get_filtered_properties_mapping",
"get_filtered_properties_multimapping",
Expand All @@ -94,11 +102,15 @@
"get_id_synonyms_mapping",
"get_id_to_alts",
"get_ids",
"get_literal_properties",
"get_literal_properties_df",
"get_mappings_df",
"get_metadata",
"get_name",
"get_name_by_curie",
"get_name_id_mapping",
"get_object_properties",
"get_object_properties_df",
"get_obsolete",
"get_ontology",
"get_primary_curie",
Expand Down
12 changes: 11 additions & 1 deletion src/pyobo/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
get_primary_curie,
get_primary_identifier,
)
from .edges import get_edges, get_edges_df, get_graph
from .hierarchy import (
get_ancestors,
get_children,
Expand All @@ -32,13 +33,16 @@
get_filtered_properties_df,
get_filtered_properties_mapping,
get_filtered_properties_multimapping,
get_literal_properties,
get_literal_properties_df,
get_object_properties,
get_object_properties_df,
get_properties,
get_properties_df,
get_property,
)
from .relations import (
get_filtered_relations_df,
get_graph,
get_id_multirelations_mapping,
get_relation,
get_relation_mapping,
Expand All @@ -61,6 +65,8 @@
"get_children",
"get_definition",
"get_descendants",
"get_edges",
"get_edges_df",
"get_equivalent",
"get_filtered_properties_df",
"get_filtered_properties_mapping",
Expand All @@ -76,11 +82,15 @@
"get_id_synonyms_mapping",
"get_id_to_alts",
"get_ids",
"get_literal_properties",
"get_literal_properties_df",
"get_mappings_df",
"get_metadata",
"get_name",
"get_name_by_curie",
"get_name_id_mapping",
"get_object_properties",
"get_object_properties_df",
"get_obsolete",
"get_ontology",
"get_primary_curie",
Expand Down
68 changes: 68 additions & 0 deletions src/pyobo/api/edges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""High-level API for edges."""

import networkx as nx
import pandas as pd
from tqdm import tqdm
from typing_extensions import Unpack

from pyobo.api.names import get_ids
from pyobo.api.utils import get_version_from_kwargs
from pyobo.constants import (
GetOntologyKwargs,
check_should_cache,
check_should_force,
check_should_use_tqdm,
)
from pyobo.getters import get_ontology
from pyobo.struct import Reference
from pyobo.utils.path import prefix_cache_join

from ..utils.cache import cached_df

__all__ = [
"get_edges",
"get_edges_df",
"get_graph",
]


def get_graph(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> nx.DiGraph:
"""Get the relation graph."""
rv = nx.MultiDiGraph()
for s in get_ids(prefix, **kwargs):
rv.add_node(f"{prefix}:{s}")
df = get_edges_df(prefix=prefix, **kwargs)
for s, p, o in df.values:
rv.add_edge(s, p, key=o)
return rv


def get_edges_df(prefix, **kwargs: Unpack[GetOntologyKwargs]) -> pd.DataFrame:
"""Get a dataframe of edges triples."""
version = get_version_from_kwargs(prefix, kwargs)
path = prefix_cache_join(prefix, name="object_properties.tsv", version=version)

@cached_df(
path=path, dtype=str, force=check_should_force(kwargs), cache=check_should_cache(kwargs)
)
def _df_getter() -> pd.DataFrame:
return get_ontology(prefix, **kwargs).get_edges_df(use_tqdm=check_should_use_tqdm(kwargs))

return _df_getter()


def get_edges(
prefix, **kwargs: Unpack[GetOntologyKwargs]
) -> list[tuple[Reference, Reference, Reference]]:
"""Get a list of edge triples."""
df = get_edges_df(prefix, **kwargs)
return [
(Reference.from_curie(s), Reference.from_curie(p), Reference.from_curie(o))
for s, p, o in tqdm(
df.values,
desc=f"[{prefix}] parsing edges",
unit="edge",
unit_scale=True,
disable=not check_should_use_tqdm(kwargs),
)
]
Loading

0 comments on commit a9abf86

Please sign in to comment.