Skip to content

Commit

Permalink
Add support for lexicon requirements
Browse files Browse the repository at this point in the history
Part of #7 and #89
  • Loading branch information
goodmami committed Feb 16, 2021
1 parent 73417ff commit 9230077
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 27 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
* Support for adding ILI resources to the database ([#23])
* `wn.Lexicon.modified()` ([#17])
* `wn.WnWarning` (related to [#92])
* `wn.Lexicon.requires()`

### Fixed

Expand Down Expand Up @@ -54,6 +55,8 @@
`wn.project.is_collection_directory()` now detect
packages/collection with ILI resource files ([#23])
* `wn.project.iterpackages()` now includes ILI packages
* `wn.Wordnet` now sets the default `expand` value to a lexicon's
dependencies if they are specified (related to [#92])

### Schema

Expand All @@ -70,6 +73,7 @@
* Added rowid to tables with metadata
* Added source-sense to definitions table ([#65])
* Preemptively added a `modified` column to `lexicons` table ([#17])
* Added a table for lexicon dependencies ([#7], [#89])


## [v0.5.1]
Expand Down Expand Up @@ -261,6 +265,7 @@ abandoned, but this is an entirely new codebase.
[v0.1.0]: ../../releases/tag/v0.1.0
[unreleased]: ../../tree/main

[#7]: https://github.com/goodmami/wn/issues/7
[#15]: https://github.com/goodmami/wn/issues/15
[#17]: https://github.com/goodmami/wn/issues/17
[#23]: https://github.com/goodmami/wn/issues/23
Expand All @@ -287,6 +292,7 @@ abandoned, but this is an entirely new codebase.
[#83]: https://github.com/goodmami/wn/issues/83
[#86]: https://github.com/goodmami/wn/issues/86
[#87]: https://github.com/goodmami/wn/issues/87
[#89]: https://github.com/goodmami/wn/issues/89
[#90]: https://github.com/goodmami/wn/issues/90
[#91]: https://github.com/goodmami/wn/issues/91
[#92]: https://github.com/goodmami/wn/issues/92
Expand Down
23 changes: 22 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def empty_db_dir():

@pytest.fixture(scope='session')
def mini_db_dir(mini_lmf_1_0):
with tempfile.TemporaryDirectory('wn_data_empty') as dir:
with tempfile.TemporaryDirectory('wn_data_mini') as dir:
old_data_dir = wn.config.data_directory
wn.config.data_directory = dir
wn.add(mini_lmf_1_0)
Expand All @@ -41,6 +41,20 @@ def mini_db_dir(mini_lmf_1_0):
conn.close()


@pytest.fixture(scope='session')
def mini_db_1_1_dir(mini_lmf_1_0, mini_lmf_1_1):
with tempfile.TemporaryDirectory('wn_data_mini_1_1') as dir:
old_data_dir = wn.config.data_directory
wn.config.data_directory = dir
wn.add(mini_lmf_1_0)
wn.add(mini_lmf_1_1)
wn.config.data_directory = old_data_dir
yield Path(dir)
# close any open DB connections before teardown
for conn in wn._db.pool.values():
conn.close()


@pytest.fixture
def empty_db(monkeypatch, empty_db_dir):
with monkeypatch.context() as m:
Expand All @@ -53,3 +67,10 @@ def mini_db(monkeypatch, mini_db_dir):
with monkeypatch.context() as m:
m.setattr(wn.config, 'data_directory', mini_db_dir)
yield


@pytest.fixture
def mini_db_1_1(monkeypatch, mini_db_1_1_dir):
with monkeypatch.context() as m:
m.setattr(wn.config, 'data_directory', mini_db_1_1_dir)
yield
29 changes: 29 additions & 0 deletions tests/primary_query_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,21 @@ def test_lexicons_mini():

results = wn.lexicons(lexicon='*')
assert len(results) == 2
results = wn.lexicons(lexicon='*:1')
assert len(results) == 2
results = wn.lexicons(lexicon='test-en')
assert len(results) == 1 and results[0].language == 'en'
results = wn.lexicons(lexicon='test-en:1')
assert len(results) == 1 and results[0].language == 'en'
results = wn.lexicons(lexicon='test-en:*')
assert len(results) == 1 and results[0].language == 'en'

assert wn.lexicons(lexicon='test-en')[0].specifier() == 'test-en:1'
assert wn.lexicons(lexicon='test-es')[0].specifier() == 'test-es:1'

assert wn.lexicons(lexicon='test-en')[0].requires() == {}
assert wn.lexicons(lexicon='test-es')[0].requires() == {}


@pytest.mark.usefixtures('mini_db')
def test_lexicons_unknown():
Expand Down Expand Up @@ -224,3 +232,24 @@ def test_synset_mini():
assert wn.synset('test-es-0001-n', lang='unk')
with pytest.raises(wn.Error):
assert wn.synset('test-es-0001-n', lexicon='test-unk')


@pytest.mark.usefixtures('mini_db_1_1')
def test_mini_1_1():
assert len(wn.lexicons()) == 3
assert len(wn.lexicons(lang='en')) == 1
assert len(wn.lexicons(lang='ja')) == 1

w = wn.Wordnet(lang='en')
assert len(w.lexicons()) == 1
assert len(w.expanded_lexicons()) == 0

w = wn.Wordnet(lang='ja')
assert len(w.lexicons()) == 1
assert len(w.expanded_lexicons()) == 1
assert len(w.synsets('例え')[0].hypernyms()) == 1

w = wn.Wordnet(lang='ja', expand='')
assert len(w.lexicons()) == 1
assert len(w.expanded_lexicons()) == 0
assert len(w.synsets('例え')[0].hypernyms()) == 0
75 changes: 51 additions & 24 deletions wn/_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,40 +104,19 @@ def _add_lmf(
# all clear, try to add them
progress.flash(f'Reading {source!s}')
for lexicon, info in zip(lmf.load(source), all_infos):

if info.get('skip', False):
progress.flash(
f'Skipping {info["id"]:info["version"]} ({info["label"]})\n',
)
continue

cur.execute(
'INSERT INTO lexicons VALUES (null,?,?,?,?,?,?,?,?,?,?)',
(lexicon.id,
lexicon.label,
lexicon.language,
lexicon.email,
lexicon.license,
lexicon.version,
lexicon.url,
lexicon.citation,
lexicon.meta,
False))
lexid = cur.lastrowid

counts = info['counts']
count = sum(counts.get(name, 0) for name in
('LexicalEntry', 'Lemma', 'Form', 'Tag',
'Sense', 'SenseRelation', 'Example', 'Count',
'SyntacticBehaviour',
'Synset', 'Definition', # 'ILIDefinition',
'SynsetRelation'))
progress.set(count=0, total=count)

progress.set(count=0, total=_sum_counts(info))
synsets = lexicon.synsets
entries = lexicon.lexical_entries
synbhrs = lexicon.syntactic_behaviours

lexid = _insert_lexicon(lexicon, info, cur, progress)

_insert_synsets(synsets, lexid, cur, progress)
_insert_entries(entries, lexid, cur, progress)
_insert_forms(entries, lexid, cur, progress)
Expand All @@ -154,6 +133,7 @@ def _add_lmf(
_insert_examples([sense for entry in entries for sense in entry.senses],
lexid, 'sense_examples', cur, progress)
_insert_examples(synsets, lexid, 'synset_examples', cur, progress)

progress.set(status='') # clear type string
progress.flash(f'Added {lexicon.id}:{lexicon.version} ({lexicon.label})\n')

Expand All @@ -170,6 +150,53 @@ def _precheck(source, cur):
yield info


def _sum_counts(info) -> int:
counts = info['counts']
return sum(counts.get(name, 0) for name in
('LexicalEntry', 'Lemma', 'Form', 'Tag',
'Sense', 'SenseRelation', 'Example', 'Count',
'SyntacticBehaviour',
'Synset', 'Definition', # 'ILIDefinition',
'SynsetRelation'))


def _insert_lexicon(lexicon, info, cur, progress) -> int:
progress.set(status='Lexicon Info')
cur.execute(
'INSERT INTO lexicons VALUES (null,?,?,?,?,?,?,?,?,?,?)',
(lexicon.id,
lexicon.label,
lexicon.language,
lexicon.email,
lexicon.license,
lexicon.version,
lexicon.url,
lexicon.citation,
lexicon.meta,
False))
lexid = cur.lastrowid

query = '''
UPDATE lexicon_dependencies
SET provider_rowid = ?
WHERE provider_id = ? AND provider_version = ?
'''
cur.execute(query, (lexid, lexicon.id, lexicon.version))

query = '''
INSERT INTO lexicon_dependencies
VALUES (?,?,?,?,(SELECT rowid FROM lexicons WHERE id=? AND version=?))
'''
params = []
for dep in lexicon.requires:
_id, ver, url = dep['id'], dep['version'], dep.get('url')
params.append((lexid, _id, ver, url, _id, ver))
if params:
cur.executemany(query, params)

return lexid


def _split(sequence):
it = iter(sequence)
batch = list(islice(it, 0, BATCH_SIZE))
Expand Down
24 changes: 23 additions & 1 deletion wn/_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

from typing import TypeVar, Optional, List, Tuple, Dict, Set, Iterator
import warnings

import wn
from wn._types import Metadata
Expand All @@ -14,6 +15,7 @@
find_synsets,
get_lexicon,
get_modified,
get_lexicon_dependencies,
get_form_tags,
get_entry_senses,
get_sense_relations,
Expand Down Expand Up @@ -153,6 +155,14 @@ def modified(self) -> bool:
"""Return True if the lexicon has local modifications."""
return get_modified(self._id)

def requires(self) -> Dict[str, Optional['Lexicon']]:
"""Return the lexicon dependencies."""
return dict(
(f'{id}:{version}',
None if _id is None else _to_lexicon(get_lexicon(_id)))
for id, version, _, _id in get_lexicon_dependencies(self._id)
)


class _LexiconElement(_DatabaseEntity):
__slots__ = '_lexid', '_wordnet'
Expand Down Expand Up @@ -963,7 +973,19 @@ def __init__(self, lexicon: str = None, *, lang: str = None, expand: str = None)
if expand is None:
if self._default_mode:
expand = '*'
# TODO: use project-specific settings
else:
deps = [(id, ver, _id)
for lex in self._lexicons
for id, ver, _, _id in get_lexicon_dependencies(lex._id)]
for id, ver, _id in deps:
if _id is None:
warnings.warn(
f'dependent lexicon not available: {id}:{ver}',
wn.WnWarning
)
expand = ' '.join(
f'{id}:{ver}' for id, ver, _id in deps if _id is not None
)
if expand:
self._expanded = tuple(map(_to_lexicon, find_lexicons(lexicon=expand)))
self._expanded_ids: Tuple[int, ...] = tuple(lx._id for lx in self._expanded)
Expand Down
2 changes: 1 addition & 1 deletion wn/_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# >>> wn._db.schema_hash(conn)
#
COMPATIBLE_SCHEMA_HASHES = {
'c400433b9d05fcb235f361b5bafdf831a12ea994',
'7cbe31e5148b6cc42bfafa3c1ab6994fa95c555b',
}


Expand Down
9 changes: 9 additions & 0 deletions wn/_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,15 @@ def get_modified(rowid: int) -> bool:
return connect().execute(query, (rowid,)).fetchone()[0]


def get_lexicon_dependencies(rowid: int) -> List[Tuple[str, str, str, Optional[int]]]:
query = '''
SELECT provider_id, provider_version, provider_url, provider_rowid
FROM lexicon_dependencies
WHERE dependent_rowid = ?
'''
return connect().execute(query, (rowid,)).fetchall()


def find_ilis(
id: str = None,
status: str = None,
Expand Down
8 changes: 8 additions & 0 deletions wn/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ CREATE TABLE lexicons (
UNIQUE (id, version)
);

CREATE TABLE lexicon_dependencies (
dependent_rowid INTEGER NOT NULL REFERENCES lexicons (rowid) ON DELETE CASCADE,
provider_id TEXT NOT NULL,
provider_version TEXT NOT NULL,
provider_url TEXT,
provider_rowid INTEGER REFERENCES lexicons (rowid) ON DELETE SET NULL
);


-- Lexical Entries

Expand Down

0 comments on commit 9230077

Please sign in to comment.