Skip to content

Commit

Permalink
improve performance and memory usage of editops
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Dec 11, 2021
1 parent f6e19dd commit 460d291
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 8 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.8.3
1.9.0
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
author = 'Max Bachmann'

# The full version, including alpha/beta/rc tags
release = '1.8.3'
release = '1.9.0'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion extern/rapidfuzz-cpp
2 changes: 1 addition & 1 deletion src/rapidfuzz/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
"""
__author__ = "Max Bachmann"
__license__ = "MIT"
__version__ = "1.8.3"
__version__ = "1.9.0"

from rapidfuzz import process, fuzz, utils, levenshtein, string_metric
33 changes: 30 additions & 3 deletions tests/test_hypothesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,32 @@ def extract_scorer(s1, s2, scorer, processor=None, **kwargs):
def extract_iter_scorer(s1, s2, scorer, processor=None, **kwargs):
return list(process.extract_iter(s1, [s2], processor=processor, scorer=scorer, **kwargs))[0][1]

def apply_editops(s1, s2, ops):
new_str = ''
s1_pos = 0
for op in ops:
j = op[1] - s1_pos
while j:
new_str += s1[s1_pos]
s1_pos += 1
j -= 1

if op[0] == 'delete':
s1_pos += 1
elif op[0] == 'insert':
new_str += s2[op[2]]
elif op[0] == 'replace':
new_str += s2[op[2]]
s1_pos += 1

j = len(s1) - s1_pos
while j:
new_str += s1[s1_pos]
s1_pos += 1
j -= 1

return new_str


HYPOTHESIS_ALPHABET = ascii_letters + digits + punctuation

Expand Down Expand Up @@ -137,12 +163,13 @@ def extract_iter_scorer(s1, s2, scorer, processor=None, **kwargs):
]

@given(s1=st.text(), s2=st.text())
@settings(max_examples=50, deadline=1000)
@settings(max_examples=100, deadline=None)
def test_levenshtein_editops(s1, s2):
"""
test levenshtein_editops. Currently this only tests, so there are no exceptions.
"""
string_metric.levenshtein_editops(s1, s2)
ops = string_metric.levenshtein_editops(s1, s2)
assert apply_editops(s1, s2, ops) == s2

@given(s1=st.text(max_size=64), s2=st.text())
@settings(max_examples=50, deadline=1000)
Expand Down Expand Up @@ -322,4 +349,4 @@ def test_cdist(queries, choices):

reference_matrix = cdist_distance(queries, queries, scorer=string_metric.levenshtein)
matrix = process.cdist(queries, queries, scorer=string_metric.levenshtein)
assert (matrix == reference_matrix).all()
assert (matrix == reference_matrix).all()
2 changes: 1 addition & 1 deletion tests/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def testWithScorer(self):
self.assertEqual(best[0], choices_mapping[1])
best = process.extract(query, choices_mapping, scorer=fuzz.QRatio)[0]
self.assertEqual(best[0], choices_mapping[1])

def testWithCutoff(self):
choices = [
"new york mets vs chicago cubs",
Expand Down
4 changes: 4 additions & 0 deletions tests/test_string_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ def test_levenshtein_editops():
"""
assert string_metric.levenshtein_editops("0", "") == [("delete", 0, 0)]
assert string_metric.levenshtein_editops("", "0") == [("insert", 0, 0)]

assert string_metric.levenshtein_editops("00", "0") == [("delete", 1, 1)]
assert string_metric.levenshtein_editops("0", "00") == [("insert", 1, 1)]

assert string_metric.levenshtein_editops("qabxcd", "abycdf") == [
("delete", 0, 0), ("replace", 3, 2), ("insert", 6, 5)
]
Expand Down

0 comments on commit 460d291

Please sign in to comment.