-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspell.go
124 lines (107 loc) · 3.02 KB
/
spell.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package toyspellingcorrector
import (
"io/ioutil"
"regexp"
"strings"
)
// ToySpellcheck holds the words used for finding corrections
type ToySpellcheck struct {
words map[string]int
}
// Train takes a text file and splits it into words and places
// the words into the ToySpellcheck struct words dictionary
func (s *ToySpellcheck) Train(path string) {
s.words = make(map[string]int)
pattern := regexp.MustCompile("[a-z]+")
content, err := ioutil.ReadFile(path)
if err == nil {
lc := strings.ToLower(string(content))
for _, w := range pattern.FindAllString(lc, -1) {
s.words[w]++
}
}
}
// edits1 runs a series of mutations against a given word
// including deletions, transpositions, alterations and
// insertions totalling 54n+25 where n is len(word)
func (s *ToySpellcheck) edits1(word string) (out []string) {
// break up the alphabet into a slice of characters
alphabet := strings.Split("abcdefghijklmnopqrstuvwxyz", "")
// Delete single letters from word
for i := 0; i < len(word); i++ {
out = append(out, word[:i]+word[i+1:])
}
// Transpose letters meaning swap adjacent letters
for i := 0; i < len(word)-1; i++ {
out = append(out, word[:i]+word[i+1:i+2]+word[i:i+1]+word[i+2:])
}
// Alter letters meaning replace letter with another
for i := 0; i < len(word); i++ {
for _, char := range alphabet {
out = append(out, word[:i]+char+word[i+1:])
}
}
// Insert letters
for i := 0; i <= len(word); i++ {
for _, char := range alphabet {
out = append(out, word[:i]+char+word[i:])
}
}
return out
}
// Run a second round of edits against output of first round
// and limit the output to only known words from the dictionary
func (s *ToySpellcheck) knownEdits2(word string) (out []string) {
firstRound := s.edits1(word)
for _, variation := range firstRound {
secondRound := s.edits1(variation)
out = append(out, s.known(secondRound)...)
}
return out
}
// Find any known words in a slice and return them
func (s *ToySpellcheck) known(words []string) []string {
out := []string{}
for _, word := range words {
_, found := s.words[word]
if found {
out = append(out, word)
}
}
return out
}
// Correct tries to find the best correct spelling of
// a given word, falls back to giving the same word if
// none can be found
func (s *ToySpellcheck) Correct(word string) string {
if _, found := s.words[word]; found {
return word
}
firstRound := s.known(s.edits1(word))
if result := s.bestCandidate(firstRound); result != "" {
return result
}
secondRound := s.knownEdits2(word)
if result := s.bestCandidate(secondRound); result != "" {
return result
}
return word
}
// bestCandidate returns the word with the highest count of use in
// the word list or simply the first word if there is only one
func (s *ToySpellcheck) bestCandidate(words []string) (result string) {
if len(words) > 0 {
if len(words) == 1 {
return words[0]
}
highCount := 0
for _, word := range words {
if s.words[word] > highCount {
highCount = s.words[word]
result = word
}
}
return result
}
return ""
}