-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnovar.py
235 lines (199 loc) · 6.74 KB
/
novar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
hardsounds = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 's', 't', 'v' ,'x', 'z', 'r'] # these won't be ignored
softsounds = ['a', 'e', 'i', 'o', 'u', 'y', 'w'] # these 'soft' sounds are ignored
ignored = ['w', 'h', 'r',] #if this is not placed at the start or the end, it will be deleted
nuances = (
('sha', 'tia'),
('f', 'ph'),
('c', 'k'),
('c', 'q'),
('u', 'oo'),
('e', 'i'),
('a', 'e'),
('s', 'z'),
('c', 'x')
)
corresponding = [
('1', 'i', 'l', '!'),
('2', 'r'),
('3', 'e'),
('4', 'a', '@'),
('5', 's', '$'),
('6', 'b'),
('7', 't', '+'),
('0', 'o'),
('(', 'c')
]
corresponding2 = {
'i': ['1', 'i', 'l', '!'],
'l': ['1', 'i', 'l', '!'],
'r': ['2', 'r'],
'e': ['3', 'e'],
'a': ['4', 'a', '@'],
's': ['5', 's', '$'],
'b': ['6', 'b'],
't': ['7', 't', '+'],
'o': ['0', 'o'],
'c': ['(', 'c']
}
__version__ = 1.0
__author__ = "Signetar, 2022"
__meow__ = "meowwww owo"
def compare(typed_char, target_char, corresponding=corresponding2) -> bool:
"""Returns whether the typed character could stand in for the target character."""
if target_char in corresponding.keys():
return typed_char in corresponding[target_char]
return target_char == typed_char
def average_stuck_keyboard_enjoyer(typed, target, corresponding=corresponding2) -> bool:
"""
Returns True if what was typed is the target word, when recurring characters
and stand-ins are disregarded. (sorry in advance)
"""
typed = typed.lower()
target = target.lower()
i = 0 # target pointer
j = 0 # typed pointer
if len(target) == 0 and len(typed) != 0:
return False
if len(typed) < len(target):
return False
while j < len(typed):
if not compare(typed[j], target[i], corresponding):
return False
if i == len(target)-1:
break
if j == len(typed)-1:
return False
if compare(typed[j+1], target[i+1], corresponding):
j += 1
i += 1
elif compare(typed[j+1], target[i], corresponding):
j += 1
else:
return False
while j != len(typed):
if not compare(typed[j], target[-1], corresponding):
return False
j += 1
return True
def __ingroup(inputSet, groups) -> bool:
"""
Returns True if all elements in inputSet are in group.
"""
inputSet = set(inputSet)
if len(inputSet) == 1:
return True
for x in groups:
if inputSet.issubset(set(x)):
return True
return False
def text_variation(typed, target, groups=corresponding) -> float:
typed, target = typed.lower(), target.lower()
typed_list = []
string = ''
for i in range(len(typed)):
if len(string) == 0:
string = typed[i]
elif __ingroup(set((typed[i], string[-1])), groups):
string += typed[i]
else:
typed_list.append(string)
string = typed[i]
typed_list.append(string)
# same thing for target
target_list = []
string = ''
for i in range(len(target)):
if len(string) == 0:
string = target[i]
elif (target[i] == string[-1]):
string += target[i]
else:
target_list.append(string)
string = target[i]
target_list.append(string)
# zip
zipped = list(zip(typed_list, target_list))
matrix = []
for x in zipped:
matrix.append(__ingroup([char for char in "".join([y for y in x])], groups=groups))
return matrix.count(True) / len(typed_list)
def __delete_softsounds(word, softsounds=ignored) -> str:
word = word.lower()
first = word[0]
last = word[-1]
# remove first and last from word
word = word[1:-1]
for x in word:
if x in softsounds:
word = word.replace(x, '')
return first + word + last
#wreck
def strip_consonants(word, consonants=hardsounds, replace_nuances=True) -> str:
word = word.lower()
if replace_nuances==True:
for nuance in nuances:
if nuance[1] in word:
word = word.replace(nuance[1], nuance[0])
word = [char for char in word]
toremove=[]
for x in word:
if x in consonants:
toremove.append(x)
for x in toremove:
word.remove(x)
return word
def __latent(word, delete_ignored=True, softsounds=softsounds, ignored=ignored) -> str:
word = word.lower()
for nuance in nuances:
if nuance[1] in word:
word = word.replace(nuance[1], nuance[0])
if delete_ignored:
word = __delete_softsounds(word, ignored)
output = []
string = ''
for i in range(len(word)):
if len(string) == 0:
string = word[i]
elif word[i] == string[-1]:
string += word[i]
else:
output.append(string)
string = word[i]
output.append(string)
output = ["".join(list(set(x))) for x in output]
toremove = []
for x in output:
if x in softsounds:
toremove.append(x)
for x in toremove:
output.remove(x)
return [x for x in output if x != '']
def __zip(one, two):
output = []
x = max(len(one), len(two))
for i in range(x):
output.append((one[i] if i < len(one) else '', two[i] if i < len(two) else ''))
return output
def pronunciation_similarity(word1, word2, delete_ignored=True, softsounds=softsounds, ignored=ignored) -> float:
"""
Takes in two words made entirely of alphabet characters and returns a float, similarity between 0 and 1 based on how they sound.
"""
# check if there are characters that aren't alphabets in word1 and word2
if not all(x.isalpha() for x in word1) or not all(x.isalpha() for x in word2):
return {"Similarity" : 0, "Error" : "One or more of the words contains non-alphabet characters."}
word1v = __latent(word1, delete_ignored, softsounds, ignored)
word2v = __latent(word2, delete_ignored, softsounds, ignored)
maximum = max(len(word1v), len(word2v))
minimum = min(len(word1v), len(word2v))
output = {}
output["Similarity"] = [x[0]==x[1] for x in __zip(word1v, word2v)].count(True) / maximum
output["Confidence"] = minimum / maximum
return output
def novar(typed, target, groups=corresponding, softsounds=softsounds, ignored=ignored, delete_ignored=True,) -> dict:
"""
A collection of novar's functions.
"""
output = {}
output["text_variation"] = {"Similarity" : text_variation(typed, target, groups)}
output["pronunciation_similarity"] = pronunciation_similarity(typed, target, softsounds=softsounds, ignored=ignored, delete_ignored=delete_ignored)
return output