From e4a1d67ea7ffcfad952fc2a382458ddf26a753ba Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Tue, 5 Dec 2023 16:30:00 +0100
Subject: [PATCH 1/4] Create evaluate_prediction.py

---
 src/semra/evaluate_prediction.py | 139 +++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 src/semra/evaluate_prediction.py

diff --git a/src/semra/evaluate_prediction.py b/src/semra/evaluate_prediction.py
new file mode 100644
index 0000000..88cc666
--- /dev/null
+++ b/src/semra/evaluate_prediction.py
@@ -0,0 +1,139 @@
+import itertools as itt
+from collections import defaultdict
+from typing import TYPE_CHECKING, Iterable, Tuple
+
+from tqdm import tqdm
+
+from .api import assemble_evidences, get_index
+from .rules import EXACT_MATCH, LEXICAL_MAPPING, MANUAL_MAPPING
+from .struct import Mapping, MappingSet, Reference, SimpleEvidence
+
+if TYPE_CHECKING:
+    import gilda
+
+
+def evaluate_predictions(*, positive: list[Mapping], negative: list[Mapping], predicted: list[Mapping], tag: str):
+    positive_index = get_index(positive, progress=False)
+    negative_index = get_index(negative, progress=False)
+    predicted_index = get_index(predicted, progress=False)
+
+    positive_set = set(positive_index)
+    negative_set = set(negative_index)
+    predicted_set = set(predicted_index)
+
+    union_len = len(positive_set.union(predicted_set).union(negative_set))
+    tp = len(positive_set.intersection(predicted_set))  # true positives
+    fp = len(negative_set.intersection(predicted_set))  # false positives
+    fn = len(positive_set - predicted_set)  # false negatives
+    tn = len(negative_set - predicted_set)  # true negatives
+    predicted_only = len(predicted_set - positive_set - negative_set)
+    print(f"[{tag}] union={union_len:,}, intersection={tp:,}, curated={fn:,}, predicted={predicted_only:,}")
+
+    accuracy = (tp + tn) / (tp + tn + fp + fn)
+    recall = tp / (tp + fn)
+    precision = tp / (tp + fp)
+    f1 = 2 * tp / (2 * tp + fp + fn)
+    completion = 1 - predicted_only / union_len
+
+    # print(f"[{tag}] {completion=:.1%}")
+    # print(f"[{tag}] {accuracy=:.1%}, {precision=:.1%} {recall=:.1%}, {f1=:.1%}")
+    return (completion, accuracy, precision, recall, f1)
+
+
+def _index_text(grounder: "gilda.Grounder"):
+    dd = defaultdict(list)
+    for terms in grounder.entries.values():
+        for term in terms:
+            dd[term.text].append(term)
+    return dict(dd)
+
+
+def _grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> Iterable[Tuple["gilda.Term", "gilda.Term"]]:
+    terms = {prefix: _index_text(grounder) for prefix, grounder in tqdm(grounders.items(), desc="Indexing texts")}
+    for (p1, g1), (p2, _g2) in tqdm(
+        itt.combinations(grounders.items(), 2), unit_scale=True, desc="Generating mappings"
+    ):
+        text_to_terms = terms[p2]
+        for text, terms in tqdm(text_to_terms.items(), unit_scale=True, desc=f"{p1}-{p2} lexical"):
+            scored_matches = g1.ground(text)
+            # there are lots of ways to do this, now we do all-by-all
+            match_terms = [sm.term for sm in scored_matches]
+            yield from itt.product(terms, match_terms)
+
+
+def grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> list[Mapping]:
+    xx = ", ".join(sorted(grounders))
+    mapping_set = MappingSet(name=f"Gilda predicted mappings for {xx}")
+    mappings = []
+    for subject_term, object_term in _grounder_to_mappings(grounders):
+        mapping = Mapping(
+            s=Reference(prefix=subject_term.db, identifier=subject_term.id),
+            p=EXACT_MATCH,
+            o=Reference(prefix=object_term.db, identifier=object_term.id),
+            evidence=[
+                # TODO annotate confidence
+                SimpleEvidence(justification=LEXICAL_MAPPING, mapping_set=mapping_set)
+            ],
+        )
+        mappings.append(mapping)
+    mappings = assemble_evidences(mappings, progress=False)
+    return mappings
+
+
+def main():
+    import click
+    import pyobo.gilda_utils
+    import pystow
+    from tabulate import tabulate
+
+    from semra.api import infer_reversible, keep_prefixes
+    from semra.io import from_sssom, write_sssom
+    from semra.sources import from_biomappings_negative, get_biomappings_positive_mappings
+
+    positive_mappings = get_biomappings_positive_mappings()
+    positive_mappings = infer_reversible(positive_mappings, progress=False)
+    click.echo(f"Got {len(positive_mappings):,} positive mappings")
+
+    negative_mappings = from_biomappings_negative()
+    negative_mappings = infer_reversible(negative_mappings, progress=False)
+    click.echo(f"Got {len(negative_mappings):,} negative mappings")
+
+    rows = []
+    for p in ["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]:
+        path = pystow.join("semra", "evaluation_prediction", name=f"evaluation_prediction_sample_{p}.tsv")
+        prefixes = ["mesh", p]
+        versions = ["2023", None]
+
+        if path.is_file():
+            predicted_mappings = from_sssom(path, mapping_set_name="gilda predictions")
+        else:
+            grounders = {
+                prefix: pyobo.gilda_utils.get_grounder(prefix, versions=version)
+                for prefix, version in zip(prefixes, versions)
+            }
+            predicted_mappings = grounder_to_mappings(grounders)
+            click.echo(f"Got {len(predicted_mappings):,} predicted mappings")
+            predicted_mappings = infer_reversible(predicted_mappings, progress=False)
+            write_sssom(predicted_mappings, path)
+
+        positive_mappings_subset = keep_prefixes(positive_mappings, prefixes, progress=False)
+        negative_mappings_subset = keep_prefixes(negative_mappings, prefixes, progress=False)
+        t = evaluate_predictions(
+            positive=positive_mappings_subset,
+            negative=negative_mappings_subset,
+            predicted=predicted_mappings,
+            tag=p,
+        )
+        rows.append((p, *t))
+
+    print(
+        tabulate(
+            rows,
+            headers=["prefix", "completion", "accuracy", "precision", "recall", "f1"],
+            floatfmt=".1%",
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()

From 9f0de4eb2553300148fc983112c5cb1b1a7ab648 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Tue, 5 Dec 2023 16:59:12 +0100
Subject: [PATCH 2/4] Incorporate upstream curated stuff

---
 src/semra/evaluate_prediction.py | 39 +++++++++++++++++---------------
 src/semra/io.py                  |  2 +-
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/semra/evaluate_prediction.py b/src/semra/evaluate_prediction.py
index 88cc666..1593d29 100644
--- a/src/semra/evaluate_prediction.py
+++ b/src/semra/evaluate_prediction.py
@@ -5,14 +5,15 @@
 from tqdm import tqdm
 
 from .api import assemble_evidences, get_index
-from .rules import EXACT_MATCH, LEXICAL_MAPPING, MANUAL_MAPPING
+from .rules import EXACT_MATCH, LEXICAL_MAPPING
+from .io import from_pyobo
 from .struct import Mapping, MappingSet, Reference, SimpleEvidence
 
 if TYPE_CHECKING:
     import gilda
 
 
-def evaluate_predictions(*, positive: list[Mapping], negative: list[Mapping], predicted: list[Mapping], tag: str):
+def evaluate_predictions(*, positive: Iterable[Mapping], negative: Iterable[Mapping], predicted: Iterable[Mapping], tag: str):
     positive_index = get_index(positive, progress=False)
     negative_index = get_index(negative, progress=False)
     predicted_index = get_index(predicted, progress=False)
@@ -33,11 +34,9 @@ def evaluate_predictions(*, positive: list[Mapping], negative: list[Mapping], pr
     recall = tp / (tp + fn)
     precision = tp / (tp + fp)
     f1 = 2 * tp / (2 * tp + fp + fn)
-    completion = 1 - predicted_only / union_len
+    completion = 1 - predicted_only / len(predicted_set)
 
-    # print(f"[{tag}] {completion=:.1%}")
-    # print(f"[{tag}] {accuracy=:.1%}, {precision=:.1%} {recall=:.1%}, {f1=:.1%}")
-    return (completion, accuracy, precision, recall, f1)
+    return completion, accuracy, precision, recall, f1
 
 
 def _index_text(grounder: "gilda.Grounder"):
@@ -99,38 +98,42 @@ def main():
     click.echo(f"Got {len(negative_mappings):,} negative mappings")
 
     rows = []
-    for p in ["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]:
-        path = pystow.join("semra", "evaluation_prediction", name=f"evaluation_prediction_sample_{p}.tsv")
-        prefixes = ["mesh", p]
-        versions = ["2023", None]
+    mesh_grounder = pyobo.gilda_utils.get_grounder("mesh", versions="2023")
+    for prefix in ["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]:
+        path = pystow.join("semra", "evaluation_prediction", name=f"evaluation_prediction_sample_{prefix}.tsv")
 
         if path.is_file():
             predicted_mappings = from_sssom(path, mapping_set_name="gilda predictions")
         else:
             grounders = {
-                prefix: pyobo.gilda_utils.get_grounder(prefix, versions=version)
-                for prefix, version in zip(prefixes, versions)
+                "mesh": mesh_grounder,
+                prefix: pyobo.gilda_utils.get_grounder(prefix)
             }
             predicted_mappings = grounder_to_mappings(grounders)
             click.echo(f"Got {len(predicted_mappings):,} predicted mappings")
             predicted_mappings = infer_reversible(predicted_mappings, progress=False)
             write_sssom(predicted_mappings, path)
 
-        positive_mappings_subset = keep_prefixes(positive_mappings, prefixes, progress=False)
-        negative_mappings_subset = keep_prefixes(negative_mappings, prefixes, progress=False)
-        t = evaluate_predictions(
-            positive=positive_mappings_subset,
+        ontology_mappings = from_pyobo(prefix, "mesh")
+        ontology_mappings = infer_reversible(ontology_mappings, progress=False)
+        click.echo(f"[{prefix}] got {len(ontology_mappings):,} mappings from the ontology")
+
+        positive_mappings_subset = keep_prefixes(positive_mappings, [prefix, "mesh"], progress=False)
+        negative_mappings_subset = keep_prefixes(negative_mappings, [prefix, "mesh"], progress=False)
+        evaluation_row = evaluate_predictions(
+            positive=itt.chain(positive_mappings_subset, ontology_mappings),
             negative=negative_mappings_subset,
             predicted=predicted_mappings,
-            tag=p,
+            tag=prefix,
         )
-        rows.append((p, *t))
+        rows.append((prefix, *evaluation_row))
 
     print(
         tabulate(
             rows,
             headers=["prefix", "completion", "accuracy", "precision", "recall", "f1"],
             floatfmt=".1%",
+            tablefmt="github",
         )
     )
 
diff --git a/src/semra/io.py b/src/semra/io.py
index f5aad28..acc0fe5 100644
--- a/src/semra/io.py
+++ b/src/semra/io.py
@@ -194,7 +194,7 @@ def _from_df(
 
 def from_pyobo(prefix: str, target_prefix: str | None = None, *, standardize: bool = False, **kwargs) -> list[Mapping]:
     if target_prefix:
-        return _from_pyobo_pair(prefix, target_prefix, standardize=standardize, **kwargs)
+        return _from_pyobo_pair(prefix, target_prefix, **kwargs)
     return _from_pyobo_prefix(prefix, standardize=standardize, **kwargs)
 
 

From 969b74954e0f8ef846bc63b01aca1e757893bab6 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Tue, 5 Dec 2023 17:37:13 +0100
Subject: [PATCH 3/4] Update evaluate_prediction.py

---
 src/semra/evaluate_prediction.py | 41 ++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/semra/evaluate_prediction.py b/src/semra/evaluate_prediction.py
index 1593d29..3d8f049 100644
--- a/src/semra/evaluate_prediction.py
+++ b/src/semra/evaluate_prediction.py
@@ -5,15 +5,25 @@
 from tqdm import tqdm
 
 from .api import assemble_evidences, get_index
-from .rules import EXACT_MATCH, LEXICAL_MAPPING
 from .io import from_pyobo
+from .rules import EXACT_MATCH, LEXICAL_MAPPING
 from .struct import Mapping, MappingSet, Reference, SimpleEvidence
 
 if TYPE_CHECKING:
     import gilda
 
 
-def evaluate_predictions(*, positive: Iterable[Mapping], negative: Iterable[Mapping], predicted: Iterable[Mapping], tag: str):
+def _get_v1(positive_set, negative_set, predicted_set):
+    tp = len(positive_set.intersection(predicted_set))  # true positives
+    fp = len(negative_set.intersection(predicted_set))  # false positives
+    fn = len(positive_set - predicted_set)  # false negatives
+    tn = len(negative_set - predicted_set)  # true negatives
+    return tp, fp, fn, tn
+
+
+def evaluate_predictions(
+    *, positive: Iterable[Mapping], negative: Iterable[Mapping], predicted: Iterable[Mapping], tag: str
+):
     positive_index = get_index(positive, progress=False)
     negative_index = get_index(negative, progress=False)
     predicted_index = get_index(predicted, progress=False)
@@ -22,12 +32,10 @@ def evaluate_predictions(*, positive: Iterable[Mapping], negative: Iterable[Mapp
     negative_set = set(negative_index)
     predicted_set = set(predicted_index)
 
-    union_len = len(positive_set.union(predicted_set).union(negative_set))
-    tp = len(positive_set.intersection(predicted_set))  # true positives
-    fp = len(negative_set.intersection(predicted_set))  # false positives
-    fn = len(positive_set - predicted_set)  # false negatives
-    tn = len(negative_set - predicted_set)  # true negatives
+    tp, fp, fn, tn = _get_v1(positive_set, negative_set, predicted_set)
+
     predicted_only = len(predicted_set - positive_set - negative_set)
+    union_len = len(positive_set.union(predicted_set).union(negative_set))
     print(f"[{tag}] union={union_len:,}, intersection={tp:,}, curated={fn:,}, predicted={predicted_only:,}")
 
     accuracy = (tp + tn) / (tp + tn + fp + fn)
@@ -36,6 +44,8 @@ def evaluate_predictions(*, positive: Iterable[Mapping], negative: Iterable[Mapp
     f1 = 2 * tp / (2 * tp + fp + fn)
     completion = 1 - predicted_only / len(predicted_set)
 
+    # what is the percentage of curated examples that are positive?
+    # positive_percentage = len(positive_set) / (len(positive_set) + len(negative_set))
     return completion, accuracy, precision, recall, f1
 
 
@@ -87,7 +97,7 @@ def main():
 
     from semra.api import infer_reversible, keep_prefixes
     from semra.io import from_sssom, write_sssom
-    from semra.sources import from_biomappings_negative, get_biomappings_positive_mappings
+    from semra.sources import from_biomappings_negative, get_biomappings_positive_mappings, get_clo_mappings
 
     positive_mappings = get_biomappings_positive_mappings()
     positive_mappings = infer_reversible(positive_mappings, progress=False)
@@ -99,22 +109,23 @@ def main():
 
     rows = []
     mesh_grounder = pyobo.gilda_utils.get_grounder("mesh", versions="2023")
-    for prefix in ["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]:
+    for prefix in sorted(["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]):
         path = pystow.join("semra", "evaluation_prediction", name=f"evaluation_prediction_sample_{prefix}.tsv")
 
         if path.is_file():
             predicted_mappings = from_sssom(path, mapping_set_name="gilda predictions")
         else:
-            grounders = {
-                "mesh": mesh_grounder,
-                prefix: pyobo.gilda_utils.get_grounder(prefix)
-            }
+            grounders = {"mesh": mesh_grounder, prefix: pyobo.gilda_utils.get_grounder(prefix)}
             predicted_mappings = grounder_to_mappings(grounders)
             click.echo(f"Got {len(predicted_mappings):,} predicted mappings")
             predicted_mappings = infer_reversible(predicted_mappings, progress=False)
             write_sssom(predicted_mappings, path)
 
-        ontology_mappings = from_pyobo(prefix, "mesh")
+        if prefix == "clo":
+            ontology_mappings = get_clo_mappings()
+            ontology_mappings = keep_prefixes(ontology_mappings, [prefix, "mesh"], progress=False)
+        else:
+            ontology_mappings = from_pyobo(prefix, "mesh")
         ontology_mappings = infer_reversible(ontology_mappings, progress=False)
         click.echo(f"[{prefix}] got {len(ontology_mappings):,} mappings from the ontology")
 
@@ -126,7 +137,7 @@ def main():
             predicted=predicted_mappings,
             tag=prefix,
         )
-        rows.append((prefix, *evaluation_row))
+        rows.append((f"[{prefix}](https://bioregistry.io/{prefix})", *evaluation_row))
 
     print(
         tabulate(

From 008d4570be80f4ed5b4d7534b082afdf2158ade2 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 11:42:41 +0100
Subject: [PATCH 4/4] Update evaluate_prediction.py

---
 src/semra/evaluate_prediction.py | 55 ++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/src/semra/evaluate_prediction.py b/src/semra/evaluate_prediction.py
index 3d8f049..80953b3 100644
--- a/src/semra/evaluate_prediction.py
+++ b/src/semra/evaluate_prediction.py
@@ -1,17 +1,22 @@
 import itertools as itt
+import logging
 from collections import defaultdict
-from typing import TYPE_CHECKING, Iterable, Tuple
+from collections.abc import Iterable
+from typing import TYPE_CHECKING
 
+import click
 from tqdm import tqdm
 
-from .api import assemble_evidences, get_index
-from .io import from_pyobo
-from .rules import EXACT_MATCH, LEXICAL_MAPPING
-from .struct import Mapping, MappingSet, Reference, SimpleEvidence
+from semra.api import assemble_evidences, get_index
+from semra.io import from_pyobo
+from semra.rules import EXACT_MATCH, LEXICAL_MAPPING
+from semra.struct import Mapping, MappingSet, Reference, SimpleEvidence
 
 if TYPE_CHECKING:
     import gilda
 
+logger = logging.getLogger(__name__)
+
 
 def _get_v1(positive_set, negative_set, predicted_set):
     tp = len(positive_set.intersection(predicted_set))  # true positives
@@ -22,8 +27,13 @@ def _get_v1(positive_set, negative_set, predicted_set):
 
 
 def evaluate_predictions(
-    *, positive: Iterable[Mapping], negative: Iterable[Mapping], predicted: Iterable[Mapping], tag: str
+    *,
+    positive: Iterable[Mapping],
+    negative: Iterable[Mapping],
+    predicted: Iterable[Mapping],
+    tag: str | None = None,
 ):
+    """Evaluate predicted mappings using ground truth positive and negative mappings."""
     positive_index = get_index(positive, progress=False)
     negative_index = get_index(negative, progress=False)
     predicted_index = get_index(predicted, progress=False)
@@ -36,7 +46,11 @@ def evaluate_predictions(
 
     predicted_only = len(predicted_set - positive_set - negative_set)
     union_len = len(positive_set.union(predicted_set).union(negative_set))
-    print(f"[{tag}] union={union_len:,}, intersection={tp:,}, curated={fn:,}, predicted={predicted_only:,}")
+
+    msg = f"union={union_len:,}, intersection={tp:,}, curated={fn:,}, predicted={predicted_only:,}"
+    if tag is not None:
+        msg = f"[{tag}] {msg}"
+    logger.info(msg)
 
     accuracy = (tp + tn) / (tp + tn + fp + fn)
     recall = tp / (tp + fn)
@@ -49,7 +63,7 @@ def evaluate_predictions(
     return completion, accuracy, precision, recall, f1
 
 
-def _index_text(grounder: "gilda.Grounder"):
+def _index_text(grounder: "gilda.Grounder") -> dict[str, list["gilda.Term"]]:
     dd = defaultdict(list)
     for terms in grounder.entries.values():
         for term in terms:
@@ -57,7 +71,7 @@ def _index_text(grounder: "gilda.Grounder"):
     return dict(dd)
 
 
-def _grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> Iterable[Tuple["gilda.Term", "gilda.Term"]]:
+def _grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> Iterable[tuple["gilda.Term", "gilda.Term", float]]:
     terms = {prefix: _index_text(grounder) for prefix, grounder in tqdm(grounders.items(), desc="Indexing texts")}
     for (p1, g1), (p2, _g2) in tqdm(
         itt.combinations(grounders.items(), 2), unit_scale=True, desc="Generating mappings"
@@ -66,31 +80,32 @@ def _grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> Iterable[Tu
         for text, terms in tqdm(text_to_terms.items(), unit_scale=True, desc=f"{p1}-{p2} lexical"):
             scored_matches = g1.ground(text)
             # there are lots of ways to do this, now we do all-by-all
-            match_terms = [sm.term for sm in scored_matches]
-            yield from itt.product(terms, match_terms)
+            for subject_term, scored_match in itt.product(terms, scored_matches):
+                yield subject_term, scored_match.term, scored_match.score
+
+
+#: A default confidence for mappings generated by Gilda
+GILDA_CONFIDENCE = 0.9
 
 
 def grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> list[Mapping]:
-    xx = ", ".join(sorted(grounders))
-    mapping_set = MappingSet(name=f"Gilda predicted mappings for {xx}")
+    prefix_list_str = ", ".join(sorted(grounders))
+    mapping_set = MappingSet(name=f"Gilda predicted mappings for {prefix_list_str}", confidence=GILDA_CONFIDENCE)
     mappings = []
-    for subject_term, object_term in _grounder_to_mappings(grounders):
+    for subject_term, object_term, confidence in _grounder_to_mappings(grounders):
         mapping = Mapping(
             s=Reference(prefix=subject_term.db, identifier=subject_term.id),
             p=EXACT_MATCH,
             o=Reference(prefix=object_term.db, identifier=object_term.id),
-            evidence=[
-                # TODO annotate confidence
-                SimpleEvidence(justification=LEXICAL_MAPPING, mapping_set=mapping_set)
-            ],
+            evidence=[SimpleEvidence(justification=LEXICAL_MAPPING, mapping_set=mapping_set, confidence=confidence)],
         )
         mappings.append(mapping)
     mappings = assemble_evidences(mappings, progress=False)
     return mappings
 
 
+@click.command()
 def main():
-    import click
     import pyobo.gilda_utils
     import pystow
     from tabulate import tabulate
@@ -139,7 +154,7 @@ def main():
         )
         rows.append((f"[{prefix}](https://bioregistry.io/{prefix})", *evaluation_row))
 
-    print(
+    click.echo(
         tabulate(
             rows,
             headers=["prefix", "completion", "accuracy", "precision", "recall", "f1"],