Skip to content

Commit

Permalink
Add QID uniqueness check
Browse files Browse the repository at this point in the history
  • Loading branch information
jmccrae committed Nov 23, 2024
1 parent 218cf57 commit 30dd6c6
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion scripts/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from sense_keys import unmap_sense_key
from wordnet import xml_id_char
from collections import Counter
from from_yaml import load

def check_symmetry(wn, fix):
errors = []
Expand Down Expand Up @@ -199,7 +200,8 @@ def is_valid_sense_id(xml_id, synset):


def main():
wn = parse_wordnet("wn.xml")
#wn = parse_wordnet("wn.xml")
wn = load()

if len(sys.argv) > 1 and sys.argv[1] == "--fix":
fix = True
Expand Down Expand Up @@ -278,6 +280,7 @@ def main():

instances = set()
ilis = set()
wikidatas = set()

for synset in wn.synsets:
if synset.id[-1:] != synset.part_of_speech.value:
Expand Down Expand Up @@ -381,6 +384,12 @@ def main():
else:
ilis.add(synset.ili)

if synset.wikidata and synset.wikidata in wikidatas:
print(f"ERROR: QID {synset.wikidata} is duplicated")
errors += 1
else:
wikidatas.add(synset.wikidata)

for synset in wn.synsets:
for sr in synset.synset_relations:
if sr.rel_type == SynsetRelType.HYPERNYM:
Expand Down

0 comments on commit 30dd6c6

Please sign in to comment.