From 09eeb590be5d047fa0aff2c50619159e8e5061ef Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Wed, 18 Sep 2024 12:12:42 -0700 Subject: [PATCH 1/7] Add type attribute to CX2 notes --- src/gocam/translation/cx2/main.py | 8 +- tests/input/Model-5ce58dde00001215.yaml | 279 ++++++++++++++++++++++++ tests/test_translation/test_cx2.py | 53 ++++- 3 files changed, 330 insertions(+), 10 deletions(-) create mode 100644 tests/input/Model-5ce58dde00001215.yaml diff --git a/src/gocam/translation/cx2/main.py b/src/gocam/translation/cx2/main.py index 2fc6042..131fd83 100644 --- a/src/gocam/translation/cx2/main.py +++ b/src/gocam/translation/cx2/main.py @@ -1,6 +1,6 @@ from ndex2.cx2 import CX2Network -from gocam.datamodel import Model +from gocam.datamodel import EnabledByProteinComplexAssociation, Model from gocam.translation.cx2.style import ( RELATIONS, VISUAL_EDITOR_PROPERTIES, @@ -27,9 +27,15 @@ def _get_object_label(object_id: str) -> str: if activity.enabled_by is None: continue + if isinstance(activity.enabled_by, EnabledByProteinComplexAssociation): + node_type = "complex" + else: + node_type = "gene" + node_attributes = { "name": _get_object_label(activity.enabled_by.term), "represents": activity.enabled_by.term, + "type": node_type, } if activity.molecular_function: diff --git a/tests/input/Model-5ce58dde00001215.yaml b/tests/input/Model-5ce58dde00001215.yaml new file mode 100644 index 0000000..ca51aa2 --- /dev/null +++ b/tests/input/Model-5ce58dde00001215.yaml @@ -0,0 +1,279 @@ +--- +id: gomodel:5ce58dde00001215 +title: Mouse-Aatf-antiapoptosis +taxon: NCBITaxon:10090 +status: production +comments: +- 'Automated change 2023-03-16: RO:0002213 replaced by RO:0002629' +activities: +- id: gomodel:5ce58dde00001215/5ce58dde00001232 + enabled_by: + term: MGI:MGI:1328355 + molecular_function: + evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0003674 + occurs_in: + evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: CL:0000169 + part_of: + evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2022-11-01' + term: GO:0030968 + causal_associations: + - evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + predicate: RO:0002304 + downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001288 +- id: gomodel:5ce58dde00001215/5ce58dde00001317 + enabled_by: + term: MGI:MGI:1930134 + molecular_function: + evidence: + - term: ECO:0000304 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0003674 + part_of: + evidence: + - term: ECO:0000304 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0030968 + causal_associations: + - evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + predicate: RO:0002304 + downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001288 +- id: gomodel:5ce58dde00001215/5ce58dde00001288 + enabled_by: + term: GO:0005667 + members: + - MGI:MGI:1929608 + - MGI:MGI:103038 + molecular_function: + evidence: + - term: ECO:0000314 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0003700 + occurs_in: + evidence: + - term: ECO:0000314 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0005634 + part_of: + evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2022-11-01' + term: GO:0030968 + causal_associations: + - evidence: + - term: ECO:0000314 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + predicate: RO:0002629 + downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001231 + - evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + predicate: RO:0002304 + downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001311 +- id: gomodel:5ce58dde00001215/5ce58dde00001301 + enabled_by: + term: MGI:MGI:1098684 + molecular_function: + evidence: + - term: ECO:0000304 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0003674 + part_of: + evidence: + - term: ECO:0000304 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2022-11-01' + term: GO:0030968 + causal_associations: + - evidence: + - term: ECO:0000304 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + predicate: RO:0004047 + downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001288 +- id: gomodel:5ce58dde00001215/5ce58dde00001311 + enabled_by: + term: MGI:MGI:1328355 + molecular_function: + evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0003674 + part_of: + evidence: + - term: ECO:0000304 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2022-11-01' + term: GO:0030968 +- id: gomodel:5ce58dde00001215/5ce58dde00001231 + enabled_by: + term: MGI:MGI:87986 + molecular_function: + evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0003674 + part_of: + evidence: + - term: ECO:0000315 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:2001243 +- id: gomodel:5ce58dde00001215/5ce58dde00001298 + enabled_by: + term: MGI:MGI:98970 + molecular_function: + evidence: + - term: ECO:0000314 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0003674 + causal_associations: + - evidence: + - term: ECO:0000314 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + predicate: RO:0002304 + downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001232 +- id: gomodel:5ce58dde00001215/5ce58dde00001302 + enabled_by: + term: MGI:MGI:1341830 + molecular_function: + evidence: + - term: ECO:0000304 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + term: GO:0003674 + part_of: + evidence: + - term: ECO:0000304 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2022-11-01' + term: GO:0030968 + causal_associations: + - evidence: + - term: ECO:0000304 + reference: PMID:19911006 + provenances: + - contributor: https://orcid.org/0000-0001-7476-6306 + date: '2019-06-25' + predicate: RO:0002304 + downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001301 +objects: +- id: MGI:MGI:87986 + label: Akt1 Mmus +- id: GO:0030968 + label: endoplasmic reticulum unfolded protein response +- id: GO:0097193 + label: intrinsic apoptotic signaling pathway +- id: GO:0036503 + label: ERAD pathway +- id: GO:0061077 + label: chaperone-mediated protein folding +- id: ECO:0000304 + label: author statement supported by traceable reference used in manual assertion +- id: GO:0006412 + label: translation +- id: MGI:MGI:1328355 + label: Wfs1 Mmus +- id: MGI:MGI:1929608 + label: Aatf Mmus +- id: GO:0003674 + label: molecular_function +- id: ECO:0000315 + label: mutant phenotype evidence used in manual assertion +- id: CL:0000169 + label: type B pancreatic cell +- id: MGI:MGI:1341830 + label: Eif2ak3 Mmus +- id: MGI:MGI:1098684 + label: Eif2a Mmus +- id: MGI:MGI:1930134 + label: Ern1 Mmus +- id: MGI:MGI:98970 + label: Xbp1 Mmus +- id: GO:2001243 + label: negative regulation of intrinsic apoptotic signaling pathway +- id: ECO:0000314 + label: direct assay evidence used in manual assertion +- id: MGI:MGI:103038 + label: Stat3 Mmus +- id: GO:0005667 + label: transcription regulator complex +- id: GO:0003700 + label: DNA-binding transcription factor activity +- id: GO:0005634 + label: nucleus + diff --git a/tests/test_translation/test_cx2.py b/tests/test_translation/test_cx2.py index 82a1b75..ae7cdda 100644 --- a/tests/test_translation/test_cx2.py +++ b/tests/test_translation/test_cx2.py @@ -4,20 +4,39 @@ from gocam.datamodel import Model from gocam.translation.cx2 import model_to_cx2 -from tests import EXAMPLES_DIR +from tests import EXAMPLES_DIR, INPUT_DIR @pytest.fixture -def model(): - model_file = EXAMPLES_DIR / "Model-663d668500002178.yaml" - with open(model_file, "r") as f: - deserialized = yaml.safe_load(f) - model = Model.model_validate(deserialized) - return model +def get_model(): + def _get_model(model_path): + with open(model_path, "r") as f: + deserialized = yaml.safe_load(f) + model = Model.model_validate(deserialized) + return model + return _get_model -def test_model_to_cx2(model): + +@pytest.fixture +def example_model(get_model): + def _get_example_model(example_name): + return get_model(EXAMPLES_DIR / f"{example_name}.yaml") + + return _get_example_model + + +@pytest.fixture +def input_model(get_model): + def _get_input_model(model_name): + return get_model(INPUT_DIR / f"{model_name}.yaml") + + return _get_input_model + + +def test_model_to_cx2(example_model): """Test the model_to_cx2 function.""" + model = example_model("Model-663d668500002178") cx2 = model_to_cx2(model) assert isinstance(cx2, list) @@ -31,8 +50,9 @@ def test_model_to_cx2(model): assert len(edge_aspect["edges"]) == 14, "Incorrect number of edges in CX2" -def test_load_cx2_to_ndex(model): +def test_load_cx2_to_ndex(example_model): """Test loading generated CX2 file by NDEx library.""" + model = example_model("Model-663d668500002178") cx2 = model_to_cx2(model) factory = RawCX2NetworkFactory() @@ -41,3 +61,18 @@ def test_load_cx2_to_ndex(model): assert isinstance(cx2_network, CX2Network) assert len(cx2_network.get_nodes()) == 10, "Incorrect number of nodes in CX2" assert len(cx2_network.get_edges()) == 14, "Incorrect number of edges in CX2" + + +def test_node_type_attribute(input_model): + """Test that the `type` attribute is correctly set for nodes.""" + model = input_model("Model-5ce58dde00001215") + cx2 = model_to_cx2(model) + + node_aspect = next((aspect for aspect in cx2 if "nodes" in aspect), None) + assert node_aspect is not None + for node in node_aspect["nodes"]: + node_attrs = node["v"] + if node_attrs["name"] == "transcription regulator complex": + assert node_attrs["type"] == "complex" + else: + assert node_attrs["type"] == "gene" From 35fec87a25be2d78cd4458b7ec82285ad3890bd7 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Wed, 18 Sep 2024 13:52:02 -0700 Subject: [PATCH 2/7] Remove species code from gene node labels --- src/gocam/translation/cx2/main.py | 50 ++++++++++++++++++++++++++++++- tests/test_cli.py | 12 ++++---- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/gocam/translation/cx2/main.py b/src/gocam/translation/cx2/main.py index 131fd83..b7babd3 100644 --- a/src/gocam/translation/cx2/main.py +++ b/src/gocam/translation/cx2/main.py @@ -1,3 +1,6 @@ +import logging +import re + from ndex2.cx2 import CX2Network from gocam.datamodel import EnabledByProteinComplexAssociation, Model @@ -7,6 +10,43 @@ VISUAL_PROPERTIES, ) +logger = logging.getLogger(__name__) + +# Derived from https://github.com/geneontology/wc-gocam-viz/blob/6ef1fcaddfef97ece94d04b7c23ac09c33ace168/src/globals/%40noctua.form/data/taxon-dataset.json +# TODO: Can this not be hardcoded? Consider just splitting the label on space and keeping the first +# part? Could also go into the MinervaWrapper class. +SPECIES_CODES = [ + "Atal", + "Btau", + "Cele", + "Cfam", + "Ddis", + "Dmel", + "Drer", + "Ggal", + "Hsap", + "Mmus", + "Pseudomonas", + "Rnor", + "Scer", + "Sjap", + "Solanaceae", + "Spom", + "Sscr", + "Xenopus", +] + + +def _remove_species_code_suffix(label: str) -> str: + for code in SPECIES_CODES: + label = label.removesuffix(code).strip() + return label + + +# Regex from +# https://github.com/ndexbio/ndex-enrichment-rest/wiki/Enrichment-network-structure#via-node-attributes-preferred-method +IQUERY_GENE_SYMBOL_PATTERN = re.compile("(^[A-Z][A-Z0-9-]*$)|(^C[0-9]+orf[0-9]+$)") + def model_to_cx2(gocam: Model) -> list: @@ -32,8 +72,16 @@ def _get_object_label(object_id: str) -> str: else: node_type = "gene" + node_name = _remove_species_code_suffix( + _get_object_label(activity.enabled_by.term) + ) + if node_type == "gene" and IQUERY_GENE_SYMBOL_PATTERN.match(node_name) is None: + logger.warning( + f"Name for gene node does not match expected pattern: {node_name}" + ) + node_attributes = { - "name": _get_object_label(activity.enabled_by.term), + "name": node_name, "represents": activity.enabled_by.term, "type": node_type, } diff --git a/tests/test_cli.py b/tests/test_cli.py index 293e416..0f11826 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -11,7 +11,7 @@ @pytest.fixture def runner(): - return CliRunner() + return CliRunner(mix_stderr=False) @pytest.fixture @@ -28,7 +28,7 @@ def test_fetch_yaml(runner, api_mock): result = runner.invoke(cli, ["fetch", "--format", "yaml", "5b91dbd100002057"]) assert result.exit_code == 0 - parsed_output = yaml.safe_load(result.output) + parsed_output = yaml.safe_load(result.stdout) assert parsed_output["id"] == "gomodel:5b91dbd100002057" @@ -36,14 +36,14 @@ def test_fetch_json(runner, api_mock): result = runner.invoke(cli, ["fetch", "--format", "json", "5b91dbd100002057"]) assert result.exit_code == 0 - parsed_output = json.loads(result.output) + parsed_output = json.loads(result.stdout) assert parsed_output["id"] == "gomodel:5b91dbd100002057" def test_version(runner): result = runner.invoke(cli, ["--version"]) assert result.exit_code == 0 - assert __version__ in result.output + assert __version__ in result.stdout @pytest.mark.parametrize("format", ["json", "yaml"]) @@ -58,7 +58,7 @@ def test_convert_to_cx2_from_file(runner, format): ], ) assert result.exit_code == 0 - cx2 = json.loads(result.output) + cx2 = json.loads(result.stdout) assert isinstance(cx2, list) @@ -69,7 +69,7 @@ def test_convert_to_cx2_from_stdin(runner, format): cli, ["convert", "-O", "cx2", "-I", format], input=f.read() ) assert result.exit_code == 0 - cx2 = json.loads(result.output) + cx2 = json.loads(result.stdout) assert isinstance(cx2, list) From 1db6ed09fdbf6e7f1f84efd601d9f7a6bc51e34b Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Wed, 18 Sep 2024 16:27:47 -0700 Subject: [PATCH 3/7] Populate `member` node attribute for complex type --- src/gocam/translation/cx2/main.py | 34 ++- tests/input/Model-5ce58dde00001215.yaml | 279 ------------------------ tests/input/Model-6606056e00002011.yaml | 210 ++++++++++++++++++ tests/test_translation/test_cx2.py | 21 +- 4 files changed, 256 insertions(+), 288 deletions(-) delete mode 100644 tests/input/Model-5ce58dde00001215.yaml create mode 100644 tests/input/Model-6606056e00002011.yaml diff --git a/src/gocam/translation/cx2/main.py b/src/gocam/translation/cx2/main.py index b7babd3..36d8f7f 100644 --- a/src/gocam/translation/cx2/main.py +++ b/src/gocam/translation/cx2/main.py @@ -1,5 +1,6 @@ import logging import re +from enum import Enum from ndex2.cx2 import CX2Network @@ -12,9 +13,10 @@ logger = logging.getLogger(__name__) -# Derived from https://github.com/geneontology/wc-gocam-viz/blob/6ef1fcaddfef97ece94d04b7c23ac09c33ace168/src/globals/%40noctua.form/data/taxon-dataset.json -# TODO: Can this not be hardcoded? Consider just splitting the label on space and keeping the first -# part? Could also go into the MinervaWrapper class. +# Derived from +# https://github.com/geneontology/wc-gocam-viz/blob/6ef1fcaddfef97ece94d04b7c23ac09c33ace168/src/globals/%40noctua.form/data/taxon-dataset.json +# If maintaining this list becomes onerous, consider splitting the label on a space and taking only +# the first part SPECIES_CODES = [ "Atal", "Btau", @@ -48,6 +50,11 @@ def _remove_species_code_suffix(label: str) -> str: IQUERY_GENE_SYMBOL_PATTERN = re.compile("(^[A-Z][A-Z0-9-]*$)|(^C[0-9]+orf[0-9]+$)") +class NODE_TYPE(str, Enum): + GENE = "gene" + COMPLEX = "complex" + + def model_to_cx2(gocam: Model) -> list: def _get_object_label(object_id: str) -> str: @@ -68,14 +75,17 @@ def _get_object_label(object_id: str) -> str: continue if isinstance(activity.enabled_by, EnabledByProteinComplexAssociation): - node_type = "complex" + node_type = NODE_TYPE.COMPLEX else: - node_type = "gene" + node_type = NODE_TYPE.GENE node_name = _remove_species_code_suffix( _get_object_label(activity.enabled_by.term) ) - if node_type == "gene" and IQUERY_GENE_SYMBOL_PATTERN.match(node_name) is None: + if ( + node_type == NODE_TYPE.GENE + and IQUERY_GENE_SYMBOL_PATTERN.match(node_name) is None + ): logger.warning( f"Name for gene node does not match expected pattern: {node_name}" ) @@ -83,9 +93,19 @@ def _get_object_label(object_id: str) -> str: node_attributes = { "name": node_name, "represents": activity.enabled_by.term, - "type": node_type, + "type": node_type.value, } + if node_type == NODE_TYPE.COMPLEX: + node_attributes["member"] = [] + for member in activity.enabled_by.members: + member_name = _remove_species_code_suffix(_get_object_label(member)) + if IQUERY_GENE_SYMBOL_PATTERN.match(member_name) is None: + logger.warning( + f"Name for complex member does not match expected pattern: {member_name}" + ) + node_attributes["member"].append(member_name) + if activity.molecular_function: node_attributes["molecular_function_id"] = activity.molecular_function.term node_attributes["molecular_function_label"] = _get_object_label( diff --git a/tests/input/Model-5ce58dde00001215.yaml b/tests/input/Model-5ce58dde00001215.yaml deleted file mode 100644 index ca51aa2..0000000 --- a/tests/input/Model-5ce58dde00001215.yaml +++ /dev/null @@ -1,279 +0,0 @@ ---- -id: gomodel:5ce58dde00001215 -title: Mouse-Aatf-antiapoptosis -taxon: NCBITaxon:10090 -status: production -comments: -- 'Automated change 2023-03-16: RO:0002213 replaced by RO:0002629' -activities: -- id: gomodel:5ce58dde00001215/5ce58dde00001232 - enabled_by: - term: MGI:MGI:1328355 - molecular_function: - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0003674 - occurs_in: - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: CL:0000169 - part_of: - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2022-11-01' - term: GO:0030968 - causal_associations: - - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - predicate: RO:0002304 - downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001288 -- id: gomodel:5ce58dde00001215/5ce58dde00001317 - enabled_by: - term: MGI:MGI:1930134 - molecular_function: - evidence: - - term: ECO:0000304 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0003674 - part_of: - evidence: - - term: ECO:0000304 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0030968 - causal_associations: - - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - predicate: RO:0002304 - downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001288 -- id: gomodel:5ce58dde00001215/5ce58dde00001288 - enabled_by: - term: GO:0005667 - members: - - MGI:MGI:1929608 - - MGI:MGI:103038 - molecular_function: - evidence: - - term: ECO:0000314 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0003700 - occurs_in: - evidence: - - term: ECO:0000314 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0005634 - part_of: - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2022-11-01' - term: GO:0030968 - causal_associations: - - evidence: - - term: ECO:0000314 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - predicate: RO:0002629 - downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001231 - - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - predicate: RO:0002304 - downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001311 -- id: gomodel:5ce58dde00001215/5ce58dde00001301 - enabled_by: - term: MGI:MGI:1098684 - molecular_function: - evidence: - - term: ECO:0000304 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0003674 - part_of: - evidence: - - term: ECO:0000304 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2022-11-01' - term: GO:0030968 - causal_associations: - - evidence: - - term: ECO:0000304 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - predicate: RO:0004047 - downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001288 -- id: gomodel:5ce58dde00001215/5ce58dde00001311 - enabled_by: - term: MGI:MGI:1328355 - molecular_function: - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0003674 - part_of: - evidence: - - term: ECO:0000304 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2022-11-01' - term: GO:0030968 -- id: gomodel:5ce58dde00001215/5ce58dde00001231 - enabled_by: - term: MGI:MGI:87986 - molecular_function: - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0003674 - part_of: - evidence: - - term: ECO:0000315 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:2001243 -- id: gomodel:5ce58dde00001215/5ce58dde00001298 - enabled_by: - term: MGI:MGI:98970 - molecular_function: - evidence: - - term: ECO:0000314 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0003674 - causal_associations: - - evidence: - - term: ECO:0000314 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - predicate: RO:0002304 - downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001232 -- id: gomodel:5ce58dde00001215/5ce58dde00001302 - enabled_by: - term: MGI:MGI:1341830 - molecular_function: - evidence: - - term: ECO:0000304 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - term: GO:0003674 - part_of: - evidence: - - term: ECO:0000304 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2022-11-01' - term: GO:0030968 - causal_associations: - - evidence: - - term: ECO:0000304 - reference: PMID:19911006 - provenances: - - contributor: https://orcid.org/0000-0001-7476-6306 - date: '2019-06-25' - predicate: RO:0002304 - downstream_activity: gomodel:5ce58dde00001215/5ce58dde00001301 -objects: -- id: MGI:MGI:87986 - label: Akt1 Mmus -- id: GO:0030968 - label: endoplasmic reticulum unfolded protein response -- id: GO:0097193 - label: intrinsic apoptotic signaling pathway -- id: GO:0036503 - label: ERAD pathway -- id: GO:0061077 - label: chaperone-mediated protein folding -- id: ECO:0000304 - label: author statement supported by traceable reference used in manual assertion -- id: GO:0006412 - label: translation -- id: MGI:MGI:1328355 - label: Wfs1 Mmus -- id: MGI:MGI:1929608 - label: Aatf Mmus -- id: GO:0003674 - label: molecular_function -- id: ECO:0000315 - label: mutant phenotype evidence used in manual assertion -- id: CL:0000169 - label: type B pancreatic cell -- id: MGI:MGI:1341830 - label: Eif2ak3 Mmus -- id: MGI:MGI:1098684 - label: Eif2a Mmus -- id: MGI:MGI:1930134 - label: Ern1 Mmus -- id: MGI:MGI:98970 - label: Xbp1 Mmus -- id: GO:2001243 - label: negative regulation of intrinsic apoptotic signaling pathway -- id: ECO:0000314 - label: direct assay evidence used in manual assertion -- id: MGI:MGI:103038 - label: Stat3 Mmus -- id: GO:0005667 - label: transcription regulator complex -- id: GO:0003700 - label: DNA-binding transcription factor activity -- id: GO:0005634 - label: nucleus - diff --git a/tests/input/Model-6606056e00002011.yaml b/tests/input/Model-6606056e00002011.yaml new file mode 100644 index 0000000..e68cfdd --- /dev/null +++ b/tests/input/Model-6606056e00002011.yaml @@ -0,0 +1,210 @@ +--- +id: gomodel:6606056e00002011 +title: CD72 and BCR co-stimulation by sn/RNP self antigen (Human) +taxon: NCBITaxon:9606 +status: production +activities: +- id: gomodel:6606056e00002011/662af8fa00002857 + enabled_by: + term: GO:0019815 + members: + - UniProtKB:P40259 + - UniProtKB:P11912 + molecular_function: + evidence: + - term: ECO:0000250 + reference: GO_REF:0000024 + with_objects: + - MGI:96892 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-05-07' + term: GO:0004888 + occurs_in: + evidence: + - term: ECO:0000314 + reference: PMID:36426942 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-05-07' + term: GO:0005886 + part_of: + evidence: + - term: ECO:0000250 + reference: GO_REF:0000024 + with_objects: + - MGI:96892 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-05-07' + term: GO:0050853 + has_input: + - evidence: + - term: ECO:0000250 + reference: GO_REF:0000024 + with_objects: + - MGI:96892 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-05-07' + term: UniProtKB:P07948 + causal_associations: + - evidence: + - term: ECO:0000250 + reference: GO_REF:0000024 + with_objects: + - MGI:96892 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-05-07' + predicate: RO:0002629 + downstream_activity: gomodel:6606056e00002011/6606056e00002040 +- id: gomodel:6606056e00002011/6606056e00002049 + enabled_by: + term: UniProtKB:P29350 + molecular_function: + evidence: + - term: ECO:0000314 + reference: PMID:17562706 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + term: GO:0004725 + occurs_in: + evidence: + - term: ECO:0000314 + reference: PMID:10940933 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + term: GO:0005737 + part_of: + evidence: + - term: ECO:0000314 + reference: PMID:35941532 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + term: GO:0050859 +- id: gomodel:6606056e00002011/6606056e00002040 + enabled_by: + term: UniProtKB:P07948 + molecular_function: + evidence: + - term: ECO:0000314 + reference: PMID:11823534 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + term: GO:0004713 + occurs_in: + evidence: + - term: ECO:0000314 + reference: PMID:15173188 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + term: GO:0005886 + part_of: + evidence: + - term: ECO:0000250 + reference: GO_REF:0000024 + with_objects: + - UniProtKB:P25911 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + term: GO:0001782 + has_input: + - evidence: + - term: ECO:0000250 + reference: GO_REF:0000024 + with_objects: + - MGI:96892 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-05-07' + term: UniProtKB:P21854 + causal_associations: + - evidence: + - term: ECO:0000314 + reference: PMID:11823534 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + predicate: RO:0002629 + downstream_activity: gomodel:6606056e00002011/6606056e00002014 +- id: gomodel:6606056e00002011/6606056e00002014 + enabled_by: + term: UniProtKB:P21854 + molecular_function: + evidence: + - term: ECO:0000314 + reference: PMID:27810925 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + term: GO:0004888 + occurs_in: + evidence: + - term: ECO:0000314 + reference: PMID:1711157 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + term: GO:0005886 + part_of: + evidence: + - term: ECO:0000314 + reference: PMID:1711157 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + term: GO:0050859 + causal_associations: + - evidence: + - term: ECO:0000314 + reference: PMID:27810925 + provenances: + - contributor: https://orcid.org/0000-0001-7646-0052 + date: '2024-04-09' + predicate: RO:0002629 + downstream_activity: gomodel:6606056e00002011/6606056e00002049 +objects: +- id: GO:0005615 + label: extracellular space +- id: GO:0004888 + label: transmembrane signaling receptor activity +- id: UniProtKB:P21854 + label: CD72 Hsap +- id: GO:0050859 + label: negative regulation of B cell receptor signaling pathway +- id: GO:0005886 + label: plasma membrane +- id: ECO:0000314 + label: direct assay evidence used in manual assertion +- id: GO:0004713 + label: protein tyrosine kinase activity +- id: UniProtKB:P07948 + label: LYN Hsap +- id: GO:0001782 + label: B cell homeostasis +- id: ECO:0000250 + label: sequence similarity evidence used in manual assertion +- id: GO:0004725 + label: protein tyrosine phosphatase activity +- id: UniProtKB:P29350 + label: PTPN6 Hsap +- id: GO:0005737 + label: cytoplasm +- id: GO:0019815 + label: B cell receptor complex +- id: UniProtKB:P11912 + label: CD79A Hsap +- id: UniProtKB:P40259 + label: CD79B Hsap +- id: GO:0050853 + label: B cell receptor signaling pathway +- id: CHEBI:166824 + label: peptide antigen + diff --git a/tests/test_translation/test_cx2.py b/tests/test_translation/test_cx2.py index ae7cdda..bfd951a 100644 --- a/tests/test_translation/test_cx2.py +++ b/tests/test_translation/test_cx2.py @@ -65,14 +65,31 @@ def test_load_cx2_to_ndex(example_model): def test_node_type_attribute(input_model): """Test that the `type` attribute is correctly set for nodes.""" - model = input_model("Model-5ce58dde00001215") + model = input_model("Model-6606056e00002011") cx2 = model_to_cx2(model) node_aspect = next((aspect for aspect in cx2 if "nodes" in aspect), None) assert node_aspect is not None for node in node_aspect["nodes"]: node_attrs = node["v"] - if node_attrs["name"] == "transcription regulator complex": + if node_attrs["name"] == "B cell receptor complex": assert node_attrs["type"] == "complex" else: assert node_attrs["type"] == "gene" + + +def test_node_name_and_member_attributes(input_model): + model = input_model("Model-6606056e00002011") + cx2 = model_to_cx2(model) + + node_aspect = next((aspect for aspect in cx2 if "nodes" in aspect), None) + assert node_aspect is not None + for node in node_aspect["nodes"]: + node_attrs = node["v"] + if node_attrs["name"] == "B cell receptor complex": + assert "member" in node_attrs + assert len(node_attrs["member"]) == 2 + assert all("Hsap" not in member for member in node_attrs["member"]) + else: + assert "member" not in node_attrs + assert "Hsap" not in node_attrs["name"] From 545398725a27e434efcb115cea5705a82e080d8c Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 19 Sep 2024 10:15:32 -0700 Subject: [PATCH 4/7] Add nodes to CX2 network for input/output molecules --- src/gocam/translation/cx2/main.py | 72 +++++++++- tests/input/Model-63f809ec00000701.yaml | 176 ++++++++++++++++++++++++ tests/test_translation/test_cx2.py | 42 +++++- 3 files changed, 282 insertions(+), 8 deletions(-) create mode 100644 tests/input/Model-63f809ec00000701.yaml diff --git a/src/gocam/translation/cx2/main.py b/src/gocam/translation/cx2/main.py index 36d8f7f..25380e2 100644 --- a/src/gocam/translation/cx2/main.py +++ b/src/gocam/translation/cx2/main.py @@ -1,10 +1,15 @@ import logging import re from enum import Enum +from typing import Dict, List, Optional, Union from ndex2.cx2 import CX2Network -from gocam.datamodel import EnabledByProteinComplexAssociation, Model +from gocam.datamodel import ( + EnabledByProteinComplexAssociation, + Model, + MoleculeAssociation, +) from gocam.translation.cx2.style import ( RELATIONS, VISUAL_EDITOR_PROPERTIES, @@ -57,10 +62,46 @@ class NODE_TYPE(str, Enum): def model_to_cx2(gocam: Model) -> list: + # Internal state + input_output_nodes: Dict[str, int] = {} + activity_nodes: Dict[str, int] = {} + + # Internal helper functions that access internal state def _get_object_label(object_id: str) -> str: object = next((obj for obj in gocam.objects if obj.id == object_id), None) - return object.label if object is not None else "" + return _remove_species_code_suffix(object.label) if object is not None else "" + + def _add_input_output_nodes( + associations: Optional[Union[MoleculeAssociation, List[MoleculeAssociation]]], + edge_attributes: dict, + ) -> None: + if associations is None: + return + if not isinstance(associations, list): + associations = [associations] + for association in associations: + if association.term not in input_output_nodes: + node_attributes = { + "name": _get_object_label(association.term), + "represents": association.term, + } + + if association.provenances: + node_attributes["provenance"] = [ + p.contributor for p in association.provenances + ] + + input_output_nodes[association.term] = cx2_network.add_node( + attributes=node_attributes + ) + cx2_network.add_edge( + source=input_output_nodes[association.term], + target=activity_nodes[activity.id], + attributes=edge_attributes, + ) + + # Create the CX2 network and set network-level attributes cx2_network = CX2Network() cx2_network.set_network_attributes( { @@ -69,7 +110,7 @@ def _get_object_label(object_id: str) -> str: } ) - activity_nodes = {} + # Add nodes for activities, labeled by the activity's enabled_by object for activity in gocam.activities: if activity.enabled_by is None: continue @@ -79,9 +120,7 @@ def _get_object_label(object_id: str) -> str: else: node_type = NODE_TYPE.GENE - node_name = _remove_species_code_suffix( - _get_object_label(activity.enabled_by.term) - ) + node_name = _get_object_label(activity.enabled_by.term) if ( node_type == NODE_TYPE.GENE and IQUERY_GENE_SYMBOL_PATTERN.match(node_name) is None @@ -99,7 +138,7 @@ def _get_object_label(object_id: str) -> str: if node_type == NODE_TYPE.COMPLEX: node_attributes["member"] = [] for member in activity.enabled_by.members: - member_name = _remove_species_code_suffix(_get_object_label(member)) + member_name = _get_object_label(member) if IQUERY_GENE_SYMBOL_PATTERN.match(member_name) is None: logger.warning( f"Name for complex member does not match expected pattern: {member_name}" @@ -129,6 +168,24 @@ def _get_object_label(object_id: str) -> str: activity_nodes[activity.id] = cx2_network.add_node(attributes=node_attributes) + # Add nodes for input/output molecules and create edges to activity nodes + for activity in gocam.activities: + _add_input_output_nodes( + activity.has_input, {"name": "has input", "represents": "RO:0002233"} + ) + _add_input_output_nodes( + activity.has_output, {"name": "has output", "represents": "RO:0002234"} + ) + _add_input_output_nodes( + activity.has_primary_input, + {"name": "has primary input", "represents": "RO:0004009"}, + ) + _add_input_output_nodes( + activity.has_primary_output, + {"name": "has primary output", "represents": "RO:0004008"}, + ) + + # Add edges for causal associations between activity nodes for activity in gocam.activities: for association in activity.causal_associations: if association.downstream_activity in activity_nodes: @@ -157,6 +214,7 @@ def _get_object_label(object_id: str) -> str: attributes=edge_attributes, ) + # Set visual properties for the network cx2_network.set_visual_properties(VISUAL_PROPERTIES) cx2_network.set_opaque_aspect("visualEditorProperties", [VISUAL_EDITOR_PROPERTIES]) diff --git a/tests/input/Model-63f809ec00000701.yaml b/tests/input/Model-63f809ec00000701.yaml new file mode 100644 index 0000000..6f50398 --- /dev/null +++ b/tests/input/Model-63f809ec00000701.yaml @@ -0,0 +1,176 @@ +--- +id: gomodel:63f809ec00000701 +title: tRNA repair and recycling by ANKZF1, ELAC1 and TRNT1 following activity of + the RQC complex in response to stalled ribosomes (Human) +taxon: NCBITaxon:9606 +status: production +activities: +- id: gomodel:63f809ec00000701/63f809ec00000742 + enabled_by: + term: UniProtKB:Q96Q11 + molecular_function: + evidence: + - term: ECO:0000314 + reference: PMID:32075755 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: GO:0004810 + part_of: + evidence: + - term: ECO:0000314 + reference: PMID:32075755 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: GO:0001680 +- id: gomodel:63f809ec00000701/63f809ec00000726 + enabled_by: + term: UniProtKB:Q9H8Y5 + molecular_function: + evidence: + - term: ECO:0000314 + reference: PMID:31011209 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: GO:0004521 + part_of: + evidence: + - term: ECO:0000314 + reference: PMID:31011209 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: GO:0072344 + has_output: + - evidence: + - term: ECO:0000314 + reference: PMID:31011209 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: CHEBI:10668 + causal_associations: + - evidence: + - term: ECO:0000314 + reference: PMID:31011209 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + predicate: RO:0002629 + downstream_activity: gomodel:63f809ec00000701/63f809ec00000735 +- id: gomodel:63f809ec00000701/63f809ec00000735 + enabled_by: + term: UniProtKB:Q9H777 + molecular_function: + evidence: + - term: ECO:0000314 + reference: PMID:32075755 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: GO:0004549 + part_of: + evidence: + - term: ECO:0000314 + reference: PMID:32075755 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: GO:0042780 + has_input: + - evidence: + - term: ECO:0000314 + reference: PMID:32075755 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: CHEBI:10668 + causal_associations: + - evidence: + - term: ECO:0000314 + reference: PMID:32075755 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + predicate: RO:0002629 + downstream_activity: gomodel:63f809ec00000701/63f809ec00000742 +- id: gomodel:63f809ec00000701/63f809ec00000706 + enabled_by: + term: UniProtKB:O60524 + molecular_function: + evidence: + - term: ECO:0000314 + reference: PMID:33909987 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: GO:1904678 + occurs_in: + evidence: + - term: ECO:0000314 + reference: PMID:33909987 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: GO:0022626 + part_of: + evidence: + - term: ECO:0000314 + reference: PMID:33909987 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: GO:0140708 + has_input: + - evidence: + - term: ECO:0000314 + reference: PMID:33909987 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-01' + term: CHEBI:17732 + causal_associations: + - evidence: + - term: ECO:0000314 + reference: PMID:33909987 + provenances: + - contributor: https://orcid.org/0000-0001-7299-6685 + date: '2023-03-02' + predicate: RO:0002304 + downstream_activity: gomodel:63f809ec00000701/63f809ec00000726 +objects: +- id: GO:1904678 + label: alpha-aminoacyl-tRNA binding +- id: UniProtKB:O60524 + label: NEMF Hsap +- id: GO:0140708 + label: CAT tailing +- id: GO:0022626 + label: cytosolic ribosome +- id: CHEBI:17732 + label: Ala-tRNA(Ala) +- id: GO:0004521 + label: RNA endonuclease activity +- id: UniProtKB:Q9H8Y5 + label: ANKZF1 Hsap +- id: GO:0072344 + label: rescue of stalled ribosome +- id: ECO:0000314 + label: direct assay evidence used in manual assertion +- id: CHEBI:10668 + label: tRNA precursor +- id: GO:0004549 + label: tRNA-specific ribonuclease activity +- id: UniProtKB:Q9H777 + label: ELAC1 Hsap +- id: GO:0042780 + label: tRNA 3'-end processing +- id: GO:0004810 + label: CCA tRNA nucleotidyltransferase activity +- id: UniProtKB:Q96Q11 + label: TRNT1 Hsap +- id: GO:0001680 + label: tRNA 3'-terminal CCA addition + diff --git a/tests/test_translation/test_cx2.py b/tests/test_translation/test_cx2.py index bfd951a..af68c23 100644 --- a/tests/test_translation/test_cx2.py +++ b/tests/test_translation/test_cx2.py @@ -72,9 +72,12 @@ def test_node_type_attribute(input_model): assert node_aspect is not None for node in node_aspect["nodes"]: node_attrs = node["v"] + # If this is the expected complex node, check that the type is set to "complex" if node_attrs["name"] == "B cell receptor complex": assert node_attrs["type"] == "complex" - else: + # Otherwise, if the node has a type attribute (nodes created by input/output + # associations won't have that attribute), check that it is set to "gene" + elif "type" in node_attrs: assert node_attrs["type"] == "gene" @@ -93,3 +96,40 @@ def test_node_name_and_member_attributes(input_model): else: assert "member" not in node_attrs assert "Hsap" not in node_attrs["name"] + + +def test_activity_input_output_notes(input_model): + model = input_model("Model-63f809ec00000701") + cx2 = model_to_cx2(model) + + node_aspect = next((aspect for aspect in cx2 if "nodes" in aspect), None) + assert node_aspect is not None + # Find the node that should be the source of both a "has input" and "has output" edge + io_node = next( + ( + node + for node in node_aspect["nodes"] + if node["v"]["name"] == "tRNA precursor" + ), + None, + ) + assert io_node is not None + + edge_aspect = next((aspect for aspect in cx2 if "edges" in aspect), None) + assert edge_aspect is not None + + # Find the edge that has the expected source node and edge named "has input" + input_edge = next( + edge + for edge in edge_aspect["edges"] + if edge["s"] == io_node["id"] and edge["v"]["name"] == "has input" + ) + assert input_edge is not None + + # Find the edge that has the expected source node and edge named "has output" + output_edge = next( + edge + for edge in edge_aspect["edges"] + if edge["s"] == io_node["id"] and edge["v"]["name"] == "has output" + ) + assert output_edge is not None From 03f9a7476fd04533e2d76e9e007524d0f5c595e5 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 19 Sep 2024 12:05:47 -0700 Subject: [PATCH 5/7] Ensure type specifiers are included in serialized output --- src/gocam/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gocam/cli.py b/src/gocam/cli.py index 93a646d..78288f7 100644 --- a/src/gocam/cli.py +++ b/src/gocam/cli.py @@ -69,7 +69,7 @@ def fetch(model_ids, format): for model_id in model_ids: model = wrapper.fetch_model(model_id) - model_dict = model.model_dump(exclude_none=True, exclude_defaults=True) + model_dict = model.model_dump(exclude_none=True) if format == "json": click.echo(json.dumps(model_dict, indent=2)) From 728ee583198147bb69b5161dce3ca17b5e37e4c4 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 19 Sep 2024 12:06:36 -0700 Subject: [PATCH 6/7] Do not add empty `member` attribute to complex nodes --- src/gocam/translation/cx2/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gocam/translation/cx2/main.py b/src/gocam/translation/cx2/main.py index 25380e2..9d5072b 100644 --- a/src/gocam/translation/cx2/main.py +++ b/src/gocam/translation/cx2/main.py @@ -135,7 +135,7 @@ def _add_input_output_nodes( "type": node_type.value, } - if node_type == NODE_TYPE.COMPLEX: + if node_type == NODE_TYPE.COMPLEX and activity.enabled_by.members: node_attributes["member"] = [] for member in activity.enabled_by.members: member_name = _get_object_label(member) From 8188db99452c649e3931ce1ef64e9400866dae99 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 19 Sep 2024 13:28:55 -0700 Subject: [PATCH 7/7] Updated serialized Models used in tests --- tests/input/Model-63f809ec00000701.yaml | 81 +++++++++++++++++++++-- tests/input/Model-6606056e00002011.yaml | 86 +++++++++++++++++++++++-- 2 files changed, 156 insertions(+), 11 deletions(-) diff --git a/tests/input/Model-63f809ec00000701.yaml b/tests/input/Model-63f809ec00000701.yaml index 6f50398..eda5ec3 100644 --- a/tests/input/Model-63f809ec00000701.yaml +++ b/tests/input/Model-63f809ec00000701.yaml @@ -7,170 +7,239 @@ status: production activities: - id: gomodel:63f809ec00000701/63f809ec00000742 enabled_by: + type: EnabledByGeneProductAssociation + evidence: [] + provenances: [] term: UniProtKB:Q96Q11 molecular_function: + type: MolecularFunctionAssociation evidence: - term: ECO:0000314 reference: PMID:32075755 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: GO:0004810 part_of: + type: BiologicalProcessAssociation evidence: - term: ECO:0000314 reference: PMID:32075755 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: GO:0001680 + has_input: [] + has_output: [] + causal_associations: [] + provenances: [] - id: gomodel:63f809ec00000701/63f809ec00000726 enabled_by: + type: EnabledByGeneProductAssociation + evidence: [] + provenances: [] term: UniProtKB:Q9H8Y5 molecular_function: + type: MolecularFunctionAssociation evidence: - term: ECO:0000314 reference: PMID:31011209 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: GO:0004521 part_of: + type: BiologicalProcessAssociation evidence: - term: ECO:0000314 reference: PMID:31011209 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: GO:0072344 + has_input: [] has_output: - - evidence: + - type: MoleculeAssociation + evidence: - term: ECO:0000314 reference: PMID:31011209 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: CHEBI:10668 causal_associations: - - evidence: + - type: CausalAssociation + evidence: - term: ECO:0000314 reference: PMID:31011209 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] predicate: RO:0002629 downstream_activity: gomodel:63f809ec00000701/63f809ec00000735 + provenances: [] - id: gomodel:63f809ec00000701/63f809ec00000735 enabled_by: + type: EnabledByGeneProductAssociation + evidence: [] + provenances: [] term: UniProtKB:Q9H777 molecular_function: + type: MolecularFunctionAssociation evidence: - term: ECO:0000314 reference: PMID:32075755 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: GO:0004549 part_of: + type: BiologicalProcessAssociation evidence: - term: ECO:0000314 reference: PMID:32075755 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: GO:0042780 has_input: - - evidence: + - type: MoleculeAssociation + evidence: - term: ECO:0000314 reference: PMID:32075755 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: CHEBI:10668 + has_output: [] causal_associations: - - evidence: + - type: CausalAssociation + evidence: - term: ECO:0000314 reference: PMID:32075755 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] predicate: RO:0002629 downstream_activity: gomodel:63f809ec00000701/63f809ec00000742 + provenances: [] - id: gomodel:63f809ec00000701/63f809ec00000706 enabled_by: + type: EnabledByGeneProductAssociation + evidence: [] + provenances: [] term: UniProtKB:O60524 molecular_function: + type: MolecularFunctionAssociation evidence: - term: ECO:0000314 reference: PMID:33909987 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: GO:1904678 occurs_in: + type: CellularAnatomicalEntityAssociation evidence: - term: ECO:0000314 reference: PMID:33909987 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: GO:0022626 part_of: + type: BiologicalProcessAssociation evidence: - term: ECO:0000314 reference: PMID:33909987 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: GO:0140708 has_input: - - evidence: + - type: MoleculeAssociation + evidence: - term: ECO:0000314 reference: PMID:33909987 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-01' + provenances: [] term: CHEBI:17732 + has_output: [] causal_associations: - - evidence: + - type: CausalAssociation + evidence: - term: ECO:0000314 reference: PMID:33909987 provenances: - contributor: https://orcid.org/0000-0001-7299-6685 date: '2023-03-02' + provenances: [] predicate: RO:0002304 downstream_activity: gomodel:63f809ec00000701/63f809ec00000726 + provenances: [] objects: - id: GO:1904678 label: alpha-aminoacyl-tRNA binding + type: gocam:Object - id: UniProtKB:O60524 label: NEMF Hsap + type: gocam:Object - id: GO:0140708 label: CAT tailing + type: gocam:Object - id: GO:0022626 label: cytosolic ribosome + type: gocam:Object - id: CHEBI:17732 label: Ala-tRNA(Ala) + type: gocam:Object - id: GO:0004521 label: RNA endonuclease activity + type: gocam:Object - id: UniProtKB:Q9H8Y5 label: ANKZF1 Hsap + type: gocam:Object - id: GO:0072344 label: rescue of stalled ribosome + type: gocam:Object - id: ECO:0000314 label: direct assay evidence used in manual assertion + type: gocam:Object - id: CHEBI:10668 label: tRNA precursor + type: gocam:Object - id: GO:0004549 label: tRNA-specific ribonuclease activity + type: gocam:Object - id: UniProtKB:Q9H777 label: ELAC1 Hsap + type: gocam:Object - id: GO:0042780 label: tRNA 3'-end processing + type: gocam:Object - id: GO:0004810 label: CCA tRNA nucleotidyltransferase activity + type: gocam:Object - id: UniProtKB:Q96Q11 label: TRNT1 Hsap + type: gocam:Object - id: GO:0001680 label: tRNA 3'-terminal CCA addition + type: gocam:Object +provenances: [] diff --git a/tests/input/Model-6606056e00002011.yaml b/tests/input/Model-6606056e00002011.yaml index e68cfdd..4b3be1a 100644 --- a/tests/input/Model-6606056e00002011.yaml +++ b/tests/input/Model-6606056e00002011.yaml @@ -6,11 +6,15 @@ status: production activities: - id: gomodel:6606056e00002011/662af8fa00002857 enabled_by: + type: EnabledByProteinComplexAssociation + evidence: [] + provenances: [] term: GO:0019815 members: - UniProtKB:P40259 - UniProtKB:P11912 molecular_function: + type: MolecularFunctionAssociation evidence: - term: ECO:0000250 reference: GO_REF:0000024 @@ -19,16 +23,20 @@ activities: provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-05-07' + provenances: [] term: GO:0004888 occurs_in: + type: CellularAnatomicalEntityAssociation evidence: - term: ECO:0000314 reference: PMID:36426942 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-05-07' + provenances: [] term: GO:0005886 part_of: + type: BiologicalProcessAssociation evidence: - term: ECO:0000250 reference: GO_REF:0000024 @@ -37,9 +45,11 @@ activities: provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-05-07' + provenances: [] term: GO:0050853 has_input: - - evidence: + - type: MoleculeAssociation + evidence: - term: ECO:0000250 reference: GO_REF:0000024 with_objects: @@ -47,9 +57,12 @@ activities: provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-05-07' + provenances: [] term: UniProtKB:P07948 + has_output: [] causal_associations: - - evidence: + - type: CausalAssociation + evidence: - term: ECO:0000250 reference: GO_REF:0000024 with_objects: @@ -57,55 +70,78 @@ activities: provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-05-07' + provenances: [] predicate: RO:0002629 downstream_activity: gomodel:6606056e00002011/6606056e00002040 + provenances: [] - id: gomodel:6606056e00002011/6606056e00002049 enabled_by: + type: EnabledByGeneProductAssociation + evidence: [] + provenances: [] term: UniProtKB:P29350 molecular_function: + type: MolecularFunctionAssociation evidence: - term: ECO:0000314 reference: PMID:17562706 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] term: GO:0004725 occurs_in: + type: CellularAnatomicalEntityAssociation evidence: - term: ECO:0000314 reference: PMID:10940933 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] term: GO:0005737 part_of: + type: BiologicalProcessAssociation evidence: - term: ECO:0000314 reference: PMID:35941532 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] term: GO:0050859 + has_input: [] + has_output: [] + causal_associations: [] + provenances: [] - id: gomodel:6606056e00002011/6606056e00002040 enabled_by: + type: EnabledByGeneProductAssociation + evidence: [] + provenances: [] term: UniProtKB:P07948 molecular_function: + type: MolecularFunctionAssociation evidence: - term: ECO:0000314 reference: PMID:11823534 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] term: GO:0004713 occurs_in: + type: CellularAnatomicalEntityAssociation evidence: - term: ECO:0000314 reference: PMID:15173188 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] term: GO:0005886 part_of: + type: BiologicalProcessAssociation evidence: - term: ECO:0000250 reference: GO_REF:0000024 @@ -114,9 +150,11 @@ activities: provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] term: GO:0001782 has_input: - - evidence: + - type: MoleculeAssociation + evidence: - term: ECO:0000250 reference: GO_REF:0000024 with_objects: @@ -124,87 +162,125 @@ activities: provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-05-07' + provenances: [] term: UniProtKB:P21854 + has_output: [] causal_associations: - - evidence: + - type: CausalAssociation + evidence: - term: ECO:0000314 reference: PMID:11823534 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] predicate: RO:0002629 downstream_activity: gomodel:6606056e00002011/6606056e00002014 + provenances: [] - id: gomodel:6606056e00002011/6606056e00002014 enabled_by: + type: EnabledByGeneProductAssociation + evidence: [] + provenances: [] term: UniProtKB:P21854 molecular_function: + type: MolecularFunctionAssociation evidence: - term: ECO:0000314 reference: PMID:27810925 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] term: GO:0004888 occurs_in: + type: CellularAnatomicalEntityAssociation evidence: - term: ECO:0000314 reference: PMID:1711157 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] term: GO:0005886 part_of: + type: BiologicalProcessAssociation evidence: - term: ECO:0000314 reference: PMID:1711157 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] term: GO:0050859 + has_input: [] + has_output: [] causal_associations: - - evidence: + - type: CausalAssociation + evidence: - term: ECO:0000314 reference: PMID:27810925 provenances: - contributor: https://orcid.org/0000-0001-7646-0052 date: '2024-04-09' + provenances: [] predicate: RO:0002629 downstream_activity: gomodel:6606056e00002011/6606056e00002049 + provenances: [] objects: - id: GO:0005615 label: extracellular space + type: gocam:Object - id: GO:0004888 label: transmembrane signaling receptor activity + type: gocam:Object - id: UniProtKB:P21854 label: CD72 Hsap + type: gocam:Object - id: GO:0050859 label: negative regulation of B cell receptor signaling pathway + type: gocam:Object - id: GO:0005886 label: plasma membrane + type: gocam:Object - id: ECO:0000314 label: direct assay evidence used in manual assertion + type: gocam:Object - id: GO:0004713 label: protein tyrosine kinase activity + type: gocam:Object - id: UniProtKB:P07948 label: LYN Hsap + type: gocam:Object - id: GO:0001782 label: B cell homeostasis + type: gocam:Object - id: ECO:0000250 label: sequence similarity evidence used in manual assertion + type: gocam:Object - id: GO:0004725 label: protein tyrosine phosphatase activity + type: gocam:Object - id: UniProtKB:P29350 label: PTPN6 Hsap + type: gocam:Object - id: GO:0005737 label: cytoplasm + type: gocam:Object - id: GO:0019815 label: B cell receptor complex + type: gocam:Object - id: UniProtKB:P11912 label: CD79A Hsap + type: gocam:Object - id: UniProtKB:P40259 label: CD79B Hsap + type: gocam:Object - id: GO:0050853 label: B cell receptor signaling pathway + type: gocam:Object - id: CHEBI:166824 label: peptide antigen + type: gocam:Object +provenances: []