From e4a1d67ea7ffcfad952fc2a382458ddf26a753ba Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 5 Dec 2023 16:30:00 +0100 Subject: [PATCH 1/4] Create evaluate_prediction.py --- src/semra/evaluate_prediction.py | 139 +++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 src/semra/evaluate_prediction.py diff --git a/src/semra/evaluate_prediction.py b/src/semra/evaluate_prediction.py new file mode 100644 index 0000000..88cc666 --- /dev/null +++ b/src/semra/evaluate_prediction.py @@ -0,0 +1,139 @@ +import itertools as itt +from collections import defaultdict +from typing import TYPE_CHECKING, Iterable, Tuple + +from tqdm import tqdm + +from .api import assemble_evidences, get_index +from .rules import EXACT_MATCH, LEXICAL_MAPPING, MANUAL_MAPPING +from .struct import Mapping, MappingSet, Reference, SimpleEvidence + +if TYPE_CHECKING: + import gilda + + +def evaluate_predictions(*, positive: list[Mapping], negative: list[Mapping], predicted: list[Mapping], tag: str): + positive_index = get_index(positive, progress=False) + negative_index = get_index(negative, progress=False) + predicted_index = get_index(predicted, progress=False) + + positive_set = set(positive_index) + negative_set = set(negative_index) + predicted_set = set(predicted_index) + + union_len = len(positive_set.union(predicted_set).union(negative_set)) + tp = len(positive_set.intersection(predicted_set)) # true positives + fp = len(negative_set.intersection(predicted_set)) # false positives + fn = len(positive_set - predicted_set) # false negatives + tn = len(negative_set - predicted_set) # true negatives + predicted_only = len(predicted_set - positive_set - negative_set) + print(f"[{tag}] union={union_len:,}, intersection={tp:,}, curated={fn:,}, predicted={predicted_only:,}") + + accuracy = (tp + tn) / (tp + tn + fp + fn) + recall = tp / (tp + fn) + precision = tp / (tp + fp) + f1 = 2 * tp / (2 * tp + fp + fn) + completion = 1 - predicted_only / union_len + + # print(f"[{tag}] {completion=:.1%}") + # print(f"[{tag}] {accuracy=:.1%}, {precision=:.1%} {recall=:.1%}, {f1=:.1%}") + return (completion, accuracy, precision, recall, f1) + + +def _index_text(grounder: "gilda.Grounder"): + dd = defaultdict(list) + for terms in grounder.entries.values(): + for term in terms: + dd[term.text].append(term) + return dict(dd) + + +def _grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> Iterable[Tuple["gilda.Term", "gilda.Term"]]: + terms = {prefix: _index_text(grounder) for prefix, grounder in tqdm(grounders.items(), desc="Indexing texts")} + for (p1, g1), (p2, _g2) in tqdm( + itt.combinations(grounders.items(), 2), unit_scale=True, desc="Generating mappings" + ): + text_to_terms = terms[p2] + for text, terms in tqdm(text_to_terms.items(), unit_scale=True, desc=f"{p1}-{p2} lexical"): + scored_matches = g1.ground(text) + # there are lots of ways to do this, now we do all-by-all + match_terms = [sm.term for sm in scored_matches] + yield from itt.product(terms, match_terms) + + +def grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> list[Mapping]: + xx = ", ".join(sorted(grounders)) + mapping_set = MappingSet(name=f"Gilda predicted mappings for {xx}") + mappings = [] + for subject_term, object_term in _grounder_to_mappings(grounders): + mapping = Mapping( + s=Reference(prefix=subject_term.db, identifier=subject_term.id), + p=EXACT_MATCH, + o=Reference(prefix=object_term.db, identifier=object_term.id), + evidence=[ + # TODO annotate confidence + SimpleEvidence(justification=LEXICAL_MAPPING, mapping_set=mapping_set) + ], + ) + mappings.append(mapping) + mappings = assemble_evidences(mappings, progress=False) + return mappings + + +def main(): + import click + import pyobo.gilda_utils + import pystow + from tabulate import tabulate + + from semra.api import infer_reversible, keep_prefixes + from semra.io import from_sssom, write_sssom + from semra.sources import from_biomappings_negative, get_biomappings_positive_mappings + + positive_mappings = get_biomappings_positive_mappings() + positive_mappings = infer_reversible(positive_mappings, progress=False) + click.echo(f"Got {len(positive_mappings):,} positive mappings") + + negative_mappings = from_biomappings_negative() + negative_mappings = infer_reversible(negative_mappings, progress=False) + click.echo(f"Got {len(negative_mappings):,} negative mappings") + + rows = [] + for p in ["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]: + path = pystow.join("semra", "evaluation_prediction", name=f"evaluation_prediction_sample_{p}.tsv") + prefixes = ["mesh", p] + versions = ["2023", None] + + if path.is_file(): + predicted_mappings = from_sssom(path, mapping_set_name="gilda predictions") + else: + grounders = { + prefix: pyobo.gilda_utils.get_grounder(prefix, versions=version) + for prefix, version in zip(prefixes, versions) + } + predicted_mappings = grounder_to_mappings(grounders) + click.echo(f"Got {len(predicted_mappings):,} predicted mappings") + predicted_mappings = infer_reversible(predicted_mappings, progress=False) + write_sssom(predicted_mappings, path) + + positive_mappings_subset = keep_prefixes(positive_mappings, prefixes, progress=False) + negative_mappings_subset = keep_prefixes(negative_mappings, prefixes, progress=False) + t = evaluate_predictions( + positive=positive_mappings_subset, + negative=negative_mappings_subset, + predicted=predicted_mappings, + tag=p, + ) + rows.append((p, *t)) + + print( + tabulate( + rows, + headers=["prefix", "completion", "accuracy", "precision", "recall", "f1"], + floatfmt=".1%", + ) + ) + + +if __name__ == "__main__": + main() From 9f0de4eb2553300148fc983112c5cb1b1a7ab648 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 5 Dec 2023 16:59:12 +0100 Subject: [PATCH 2/4] Incorporate upstream curated stuff --- src/semra/evaluate_prediction.py | 39 +++++++++++++++++--------------- src/semra/io.py | 2 +- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/semra/evaluate_prediction.py b/src/semra/evaluate_prediction.py index 88cc666..1593d29 100644 --- a/src/semra/evaluate_prediction.py +++ b/src/semra/evaluate_prediction.py @@ -5,14 +5,15 @@ from tqdm import tqdm from .api import assemble_evidences, get_index -from .rules import EXACT_MATCH, LEXICAL_MAPPING, MANUAL_MAPPING +from .rules import EXACT_MATCH, LEXICAL_MAPPING +from .io import from_pyobo from .struct import Mapping, MappingSet, Reference, SimpleEvidence if TYPE_CHECKING: import gilda -def evaluate_predictions(*, positive: list[Mapping], negative: list[Mapping], predicted: list[Mapping], tag: str): +def evaluate_predictions(*, positive: Iterable[Mapping], negative: Iterable[Mapping], predicted: Iterable[Mapping], tag: str): positive_index = get_index(positive, progress=False) negative_index = get_index(negative, progress=False) predicted_index = get_index(predicted, progress=False) @@ -33,11 +34,9 @@ def evaluate_predictions(*, positive: list[Mapping], negative: list[Mapping], pr recall = tp / (tp + fn) precision = tp / (tp + fp) f1 = 2 * tp / (2 * tp + fp + fn) - completion = 1 - predicted_only / union_len + completion = 1 - predicted_only / len(predicted_set) - # print(f"[{tag}] {completion=:.1%}") - # print(f"[{tag}] {accuracy=:.1%}, {precision=:.1%} {recall=:.1%}, {f1=:.1%}") - return (completion, accuracy, precision, recall, f1) + return completion, accuracy, precision, recall, f1 def _index_text(grounder: "gilda.Grounder"): @@ -99,38 +98,42 @@ def main(): click.echo(f"Got {len(negative_mappings):,} negative mappings") rows = [] - for p in ["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]: - path = pystow.join("semra", "evaluation_prediction", name=f"evaluation_prediction_sample_{p}.tsv") - prefixes = ["mesh", p] - versions = ["2023", None] + mesh_grounder = pyobo.gilda_utils.get_grounder("mesh", versions="2023") + for prefix in ["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]: + path = pystow.join("semra", "evaluation_prediction", name=f"evaluation_prediction_sample_{prefix}.tsv") if path.is_file(): predicted_mappings = from_sssom(path, mapping_set_name="gilda predictions") else: grounders = { - prefix: pyobo.gilda_utils.get_grounder(prefix, versions=version) - for prefix, version in zip(prefixes, versions) + "mesh": mesh_grounder, + prefix: pyobo.gilda_utils.get_grounder(prefix) } predicted_mappings = grounder_to_mappings(grounders) click.echo(f"Got {len(predicted_mappings):,} predicted mappings") predicted_mappings = infer_reversible(predicted_mappings, progress=False) write_sssom(predicted_mappings, path) - positive_mappings_subset = keep_prefixes(positive_mappings, prefixes, progress=False) - negative_mappings_subset = keep_prefixes(negative_mappings, prefixes, progress=False) - t = evaluate_predictions( - positive=positive_mappings_subset, + ontology_mappings = from_pyobo(prefix, "mesh") + ontology_mappings = infer_reversible(ontology_mappings, progress=False) + click.echo(f"[{prefix}] got {len(ontology_mappings):,} mappings from the ontology") + + positive_mappings_subset = keep_prefixes(positive_mappings, [prefix, "mesh"], progress=False) + negative_mappings_subset = keep_prefixes(negative_mappings, [prefix, "mesh"], progress=False) + evaluation_row = evaluate_predictions( + positive=itt.chain(positive_mappings_subset, ontology_mappings), negative=negative_mappings_subset, predicted=predicted_mappings, - tag=p, + tag=prefix, ) - rows.append((p, *t)) + rows.append((prefix, *evaluation_row)) print( tabulate( rows, headers=["prefix", "completion", "accuracy", "precision", "recall", "f1"], floatfmt=".1%", + tablefmt="github", ) ) diff --git a/src/semra/io.py b/src/semra/io.py index f5aad28..acc0fe5 100644 --- a/src/semra/io.py +++ b/src/semra/io.py @@ -194,7 +194,7 @@ def _from_df( def from_pyobo(prefix: str, target_prefix: str | None = None, *, standardize: bool = False, **kwargs) -> list[Mapping]: if target_prefix: - return _from_pyobo_pair(prefix, target_prefix, standardize=standardize, **kwargs) + return _from_pyobo_pair(prefix, target_prefix, **kwargs) return _from_pyobo_prefix(prefix, standardize=standardize, **kwargs) From 969b74954e0f8ef846bc63b01aca1e757893bab6 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 5 Dec 2023 17:37:13 +0100 Subject: [PATCH 3/4] Update evaluate_prediction.py --- src/semra/evaluate_prediction.py | 41 ++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/semra/evaluate_prediction.py b/src/semra/evaluate_prediction.py index 1593d29..3d8f049 100644 --- a/src/semra/evaluate_prediction.py +++ b/src/semra/evaluate_prediction.py @@ -5,15 +5,25 @@ from tqdm import tqdm from .api import assemble_evidences, get_index -from .rules import EXACT_MATCH, LEXICAL_MAPPING from .io import from_pyobo +from .rules import EXACT_MATCH, LEXICAL_MAPPING from .struct import Mapping, MappingSet, Reference, SimpleEvidence if TYPE_CHECKING: import gilda -def evaluate_predictions(*, positive: Iterable[Mapping], negative: Iterable[Mapping], predicted: Iterable[Mapping], tag: str): +def _get_v1(positive_set, negative_set, predicted_set): + tp = len(positive_set.intersection(predicted_set)) # true positives + fp = len(negative_set.intersection(predicted_set)) # false positives + fn = len(positive_set - predicted_set) # false negatives + tn = len(negative_set - predicted_set) # true negatives + return tp, fp, fn, tn + + +def evaluate_predictions( + *, positive: Iterable[Mapping], negative: Iterable[Mapping], predicted: Iterable[Mapping], tag: str +): positive_index = get_index(positive, progress=False) negative_index = get_index(negative, progress=False) predicted_index = get_index(predicted, progress=False) @@ -22,12 +32,10 @@ def evaluate_predictions(*, positive: Iterable[Mapping], negative: Iterable[Mapp negative_set = set(negative_index) predicted_set = set(predicted_index) - union_len = len(positive_set.union(predicted_set).union(negative_set)) - tp = len(positive_set.intersection(predicted_set)) # true positives - fp = len(negative_set.intersection(predicted_set)) # false positives - fn = len(positive_set - predicted_set) # false negatives - tn = len(negative_set - predicted_set) # true negatives + tp, fp, fn, tn = _get_v1(positive_set, negative_set, predicted_set) + predicted_only = len(predicted_set - positive_set - negative_set) + union_len = len(positive_set.union(predicted_set).union(negative_set)) print(f"[{tag}] union={union_len:,}, intersection={tp:,}, curated={fn:,}, predicted={predicted_only:,}") accuracy = (tp + tn) / (tp + tn + fp + fn) @@ -36,6 +44,8 @@ def evaluate_predictions(*, positive: Iterable[Mapping], negative: Iterable[Mapp f1 = 2 * tp / (2 * tp + fp + fn) completion = 1 - predicted_only / len(predicted_set) + # what is the percentage of curated examples that are positive? + # positive_percentage = len(positive_set) / (len(positive_set) + len(negative_set)) return completion, accuracy, precision, recall, f1 @@ -87,7 +97,7 @@ def main(): from semra.api import infer_reversible, keep_prefixes from semra.io import from_sssom, write_sssom - from semra.sources import from_biomappings_negative, get_biomappings_positive_mappings + from semra.sources import from_biomappings_negative, get_biomappings_positive_mappings, get_clo_mappings positive_mappings = get_biomappings_positive_mappings() positive_mappings = infer_reversible(positive_mappings, progress=False) @@ -99,22 +109,23 @@ def main(): rows = [] mesh_grounder = pyobo.gilda_utils.get_grounder("mesh", versions="2023") - for prefix in ["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]: + for prefix in sorted(["chebi", "maxo", "cl", "doid", "go", "uberon", "vo", "clo"]): path = pystow.join("semra", "evaluation_prediction", name=f"evaluation_prediction_sample_{prefix}.tsv") if path.is_file(): predicted_mappings = from_sssom(path, mapping_set_name="gilda predictions") else: - grounders = { - "mesh": mesh_grounder, - prefix: pyobo.gilda_utils.get_grounder(prefix) - } + grounders = {"mesh": mesh_grounder, prefix: pyobo.gilda_utils.get_grounder(prefix)} predicted_mappings = grounder_to_mappings(grounders) click.echo(f"Got {len(predicted_mappings):,} predicted mappings") predicted_mappings = infer_reversible(predicted_mappings, progress=False) write_sssom(predicted_mappings, path) - ontology_mappings = from_pyobo(prefix, "mesh") + if prefix == "clo": + ontology_mappings = get_clo_mappings() + ontology_mappings = keep_prefixes(ontology_mappings, [prefix, "mesh"], progress=False) + else: + ontology_mappings = from_pyobo(prefix, "mesh") ontology_mappings = infer_reversible(ontology_mappings, progress=False) click.echo(f"[{prefix}] got {len(ontology_mappings):,} mappings from the ontology") @@ -126,7 +137,7 @@ def main(): predicted=predicted_mappings, tag=prefix, ) - rows.append((prefix, *evaluation_row)) + rows.append((f"[{prefix}](https://bioregistry.io/{prefix})", *evaluation_row)) print( tabulate( From 008d4570be80f4ed5b4d7534b082afdf2158ade2 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 22 Jan 2024 11:42:41 +0100 Subject: [PATCH 4/4] Update evaluate_prediction.py --- src/semra/evaluate_prediction.py | 55 ++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/src/semra/evaluate_prediction.py b/src/semra/evaluate_prediction.py index 3d8f049..80953b3 100644 --- a/src/semra/evaluate_prediction.py +++ b/src/semra/evaluate_prediction.py @@ -1,17 +1,22 @@ import itertools as itt +import logging from collections import defaultdict -from typing import TYPE_CHECKING, Iterable, Tuple +from collections.abc import Iterable +from typing import TYPE_CHECKING +import click from tqdm import tqdm -from .api import assemble_evidences, get_index -from .io import from_pyobo -from .rules import EXACT_MATCH, LEXICAL_MAPPING -from .struct import Mapping, MappingSet, Reference, SimpleEvidence +from semra.api import assemble_evidences, get_index +from semra.io import from_pyobo +from semra.rules import EXACT_MATCH, LEXICAL_MAPPING +from semra.struct import Mapping, MappingSet, Reference, SimpleEvidence if TYPE_CHECKING: import gilda +logger = logging.getLogger(__name__) + def _get_v1(positive_set, negative_set, predicted_set): tp = len(positive_set.intersection(predicted_set)) # true positives @@ -22,8 +27,13 @@ def _get_v1(positive_set, negative_set, predicted_set): def evaluate_predictions( - *, positive: Iterable[Mapping], negative: Iterable[Mapping], predicted: Iterable[Mapping], tag: str + *, + positive: Iterable[Mapping], + negative: Iterable[Mapping], + predicted: Iterable[Mapping], + tag: str | None = None, ): + """Evaluate predicted mappings using ground truth positive and negative mappings.""" positive_index = get_index(positive, progress=False) negative_index = get_index(negative, progress=False) predicted_index = get_index(predicted, progress=False) @@ -36,7 +46,11 @@ def evaluate_predictions( predicted_only = len(predicted_set - positive_set - negative_set) union_len = len(positive_set.union(predicted_set).union(negative_set)) - print(f"[{tag}] union={union_len:,}, intersection={tp:,}, curated={fn:,}, predicted={predicted_only:,}") + + msg = f"union={union_len:,}, intersection={tp:,}, curated={fn:,}, predicted={predicted_only:,}" + if tag is not None: + msg = f"[{tag}] {msg}" + logger.info(msg) accuracy = (tp + tn) / (tp + tn + fp + fn) recall = tp / (tp + fn) @@ -49,7 +63,7 @@ def evaluate_predictions( return completion, accuracy, precision, recall, f1 -def _index_text(grounder: "gilda.Grounder"): +def _index_text(grounder: "gilda.Grounder") -> dict[str, list["gilda.Term"]]: dd = defaultdict(list) for terms in grounder.entries.values(): for term in terms: @@ -57,7 +71,7 @@ def _index_text(grounder: "gilda.Grounder"): return dict(dd) -def _grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> Iterable[Tuple["gilda.Term", "gilda.Term"]]: +def _grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> Iterable[tuple["gilda.Term", "gilda.Term", float]]: terms = {prefix: _index_text(grounder) for prefix, grounder in tqdm(grounders.items(), desc="Indexing texts")} for (p1, g1), (p2, _g2) in tqdm( itt.combinations(grounders.items(), 2), unit_scale=True, desc="Generating mappings" @@ -66,31 +80,32 @@ def _grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> Iterable[Tu for text, terms in tqdm(text_to_terms.items(), unit_scale=True, desc=f"{p1}-{p2} lexical"): scored_matches = g1.ground(text) # there are lots of ways to do this, now we do all-by-all - match_terms = [sm.term for sm in scored_matches] - yield from itt.product(terms, match_terms) + for subject_term, scored_match in itt.product(terms, scored_matches): + yield subject_term, scored_match.term, scored_match.score + + +#: A default confidence for mappings generated by Gilda +GILDA_CONFIDENCE = 0.9 def grounder_to_mappings(grounders: dict[str, "gilda.Grounder"]) -> list[Mapping]: - xx = ", ".join(sorted(grounders)) - mapping_set = MappingSet(name=f"Gilda predicted mappings for {xx}") + prefix_list_str = ", ".join(sorted(grounders)) + mapping_set = MappingSet(name=f"Gilda predicted mappings for {prefix_list_str}", confidence=GILDA_CONFIDENCE) mappings = [] - for subject_term, object_term in _grounder_to_mappings(grounders): + for subject_term, object_term, confidence in _grounder_to_mappings(grounders): mapping = Mapping( s=Reference(prefix=subject_term.db, identifier=subject_term.id), p=EXACT_MATCH, o=Reference(prefix=object_term.db, identifier=object_term.id), - evidence=[ - # TODO annotate confidence - SimpleEvidence(justification=LEXICAL_MAPPING, mapping_set=mapping_set) - ], + evidence=[SimpleEvidence(justification=LEXICAL_MAPPING, mapping_set=mapping_set, confidence=confidence)], ) mappings.append(mapping) mappings = assemble_evidences(mappings, progress=False) return mappings +@click.command() def main(): - import click import pyobo.gilda_utils import pystow from tabulate import tabulate @@ -139,7 +154,7 @@ def main(): ) rows.append((f"[{prefix}](https://bioregistry.io/{prefix})", *evaluation_row)) - print( + click.echo( tabulate( rows, headers=["prefix", "completion", "accuracy", "precision", "recall", "f1"],