From 30dd6c6e9da44dcb6c7da15f2c2b9cc995879e37 Mon Sep 17 00:00:00 2001 From: John McCrae Date: Sat, 23 Nov 2024 15:55:07 +0000 Subject: [PATCH] Add QID uniqueness check --- scripts/validate.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/validate.py b/scripts/validate.py index 49a001c1..bc470e24 100644 --- a/scripts/validate.py +++ b/scripts/validate.py @@ -7,6 +7,7 @@ from sense_keys import unmap_sense_key from wordnet import xml_id_char from collections import Counter +from from_yaml import load def check_symmetry(wn, fix): errors = [] @@ -199,7 +200,8 @@ def is_valid_sense_id(xml_id, synset): def main(): - wn = parse_wordnet("wn.xml") + #wn = parse_wordnet("wn.xml") + wn = load() if len(sys.argv) > 1 and sys.argv[1] == "--fix": fix = True @@ -278,6 +280,7 @@ def main(): instances = set() ilis = set() + wikidatas = set() for synset in wn.synsets: if synset.id[-1:] != synset.part_of_speech.value: @@ -381,6 +384,12 @@ def main(): else: ilis.add(synset.ili) + if synset.wikidata and synset.wikidata in wikidatas: + print(f"ERROR: QID {synset.wikidata} is duplicated") + errors += 1 + else: + wikidatas.add(synset.wikidata) + for synset in wn.synsets: for sr in synset.synset_relations: if sr.rel_type == SynsetRelType.HYPERNYM: