Skip to content

Commit

Permalink
Merge pull request #702 from biolink/pipeline-408-gorule-0000027-mgi-…
Browse files Browse the repository at this point in the history
…failing-go-checks

For geneontology/pipeline#408
  • Loading branch information
mugitty authored Dec 14, 2024
2 parents 85dce88 + 69e0b37 commit 5a3da4f
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 4 deletions.
11 changes: 10 additions & 1 deletion ontobio/io/assocparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,9 +800,11 @@ def _validate_id(self, id, line: SplitLine, allowed_ids=None, context=None):
return False

(id_prefix, right) = id.split(":", maxsplit=1)
mgi_id = None
if right.startswith("MGI:"):
## See ticket https://github.com/geneontology/go-site/issues/91
## For purposes of determining allowed IDs in DB XREF, MGI IDs shall look like `MGI:12345`
mgi_id = right
right = right[4:]

if id_prefix == "" or right == "":
Expand Down Expand Up @@ -830,9 +832,16 @@ def _validate_id(self, id, line: SplitLine, allowed_ids=None, context=None):
if regex.fullmatch(right):
identity_matches_pattern = True
break
if identity_matches_pattern == False:
# check syntax for mgi using id instead of internal representation
if mgi_id is not None and regex.fullmatch(mgi_id):
identity_matches_pattern = True
break
if identity_matches_pattern == False and mgi_id is None:
self.report.warning(line.line, Report.INVALID_ID, id,
"GORULE:0000027: {} does not match any id_syntax patterns for {} in dbxrefs".format(right, id_prefix), taxon=line.taxon, rule=27)
elif identity_matches_pattern == False and mgi_id is not None:
self.report.warning(line.line, Report.INVALID_ID, id,
"GORULE:0000027: {} does not match any id_syntax patterns for {} in dbxrefs".format(mgi_id, id_prefix), taxon=line.taxon, rule=27)
else:
self.report.warning(line.line, Report.INVALID_ID, id,
"GORULE:0000027: {} not found in list of database names in dbxrefs".format(id_prefix), taxon=line.taxon, rule=27)
Expand Down
23 changes: 21 additions & 2 deletions tests/test_gafparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,11 @@ def test_id_syntax():
database_id_syntax_lookups['PomBase'] = pombase_types

wb_ref_types = {}
database_id_syntax_lookups['WB_REF'] = wb_ref_types
database_id_syntax_lookups['WB_REF'] = wb_ref_types

mgi_types = {}
mgi_types['entity'] = re.compile('MGI:[0-9]{5,}')
database_id_syntax_lookups['MGI'] = mgi_types

p = GafParser(config=assocparser.AssocParserConfig(
ontology=OntologyFactory().create(ONT), db_type_name_regex_id_syntax=database_id_syntax_lookups))
Expand All @@ -648,7 +652,13 @@ def test_id_syntax():
assert len(assoc_result.associations) == 1
assert assoc_result.skipped == False
messages = p.report.to_report_json()["messages"]
assert "gorule-0000027" not in messages
assert "gorule-0000027" not in messages

assoc_result = p.parse_line("PomBase\tSPBC1289.03c\tspi1\t\tGO:0005515\tWB_REF:WBPaper00006408|PMID:18422602\tIPI\tMGI:MGI:1298204\tF\tRan GTPase Spi1\t\tprotein\ttaxon:4896\t20080718\tPomBase\t")
assert len(assoc_result.associations) == 1
assert assoc_result.skipped == False
messages = p.report.to_report_json()["messages"]
assert "gorule-0000027" not in messages

p = GafParser(config=assocparser.AssocParserConfig(
ontology=OntologyFactory().create(ONT), db_type_name_regex_id_syntax=database_id_syntax_lookups))
Expand Down Expand Up @@ -686,6 +696,15 @@ def test_id_syntax():
assert len(messages["gorule-0000027"]) == 1
assert messages["gorule-0000027"][0]["obj"] == "BLA:18422602"

p = GafParser(config=assocparser.AssocParserConfig(
ontology=OntologyFactory().create(ONT), db_type_name_regex_id_syntax=database_id_syntax_lookups))
assoc_result = p.parse_line("PomBase\tSPBC1289.03c\tspi1\t\tGO:0005515\tWB_REF:WBPaper00006408|PMID:18422602\tIPI\tMGI:1298204\tF\tRan GTPase Spi1\t\tprotein\ttaxon:4896\t20080718\tPomBase\t")
assert len(assoc_result.associations) == 1
assert assoc_result.skipped == False
messages = p.report.to_report_json()["messages"]
assert len(messages["gorule-0000027"]) == 1
assert messages["gorule-0000027"][0]["obj"] == "MGI:1298204"


def test_gaf_gpi_bridge():
gaf = ["MGI", "MGI:1923503", "0610006L08Rik", "enables", "GO:0003674", "MGI:MGI:2156816|GO_REF:0000015", "ND", "",
Expand Down
52 changes: 51 additions & 1 deletion tests/test_gpad_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,10 @@ def test_id_syntax():
pombase_types['entity'] = re.compile('S\\w+(\\.)?\\w+(\\.)?')
database_id_syntax_lookups['PomBase'] = pombase_types

mgi_types = {}
mgi_types['entity'] = re.compile('MGI:[0-9]{5,}')
database_id_syntax_lookups['MGI'] = mgi_types

eco_types = {}
eco_types['entity'] = re.compile(pattern)
database_id_syntax_lookups['ECO'] = eco_types
Expand Down Expand Up @@ -396,7 +400,31 @@ def test_id_syntax():
assert len(result.associations) == 1
assert result.skipped == False
messages = p.report.to_report_json()["messages"]
assert "gorule-0000027" not in messages
assert "gorule-0000027" not in messages

vals = ["PomBase",
"SPAC25A8.01c",
"acts_upstream_of_or_within",
"GO:0007155",
"MGI:MGI:1298204",
"ECO:0000305",
"GO:0005913",
"",
"20041026",
"ZFIN",
"",
"PomBase"
]

config = assocparser.AssocParserConfig(
ontology=OntologyFactory().create(ALT_ID_ONT), db_type_name_regex_id_syntax=database_id_syntax_lookups)
p = GpadParser(config=config)
result = p.parse_line("\t".join(vals))
assert len(result.associations) == 1
assert result.skipped == False
messages = p.report.to_report_json()["messages"]
assert "gorule-0000027" not in messages


vals = ["PomBase",
"SPAC25A8.01c",
Expand Down Expand Up @@ -487,6 +515,28 @@ def test_id_syntax():
assert len(messages["gorule-0000027"]) == 1
assert messages["gorule-0000027"][0]["obj"] == "BLA:15494018"

vals = ["PomBase",
"SPAC25A8.01c",
"acts_upstream_of_or_within",
"GO:0007155",
"MGI:15494018",
"ECO:0000305",
"GO:0005913",
"",
"20041026",
"ZFIN",
"",
"PomBase"
]
p = GpadParser(config=config)
result = p.parse_line("\t".join(vals))
assert len(result.associations) == 1
assert result.skipped == False
messages = p.report.to_report_json()["messages"]
assert len(messages["gorule-0000027"]) == 1
assert messages["gorule-0000027"][0]["obj"] == "MGI:15494018"


def test_gpi_check():
report = assocparser.Report(group="unknown", dataset="unknown")
vals = [
Expand Down

0 comments on commit 5a3da4f

Please sign in to comment.