-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordsquares.py
122 lines (102 loc) · 3.79 KB
/
wordsquares.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""Usage: python wordsquares.py vocab.txt squares.txt
Format of `vocab.txt`: one word per line, lower case.
Hunts for every possible word square buildable from that vocab, and saves them
to `squares.txt`, one square per line (separated by a ' ').
"""
import collections
import sys
class WordList(object):
def __init__(self, words):
self._words = sorted(words)
self._num_words = len(words)
def words(self):
return self._words
def find_first_prefix_position(self, prefix):
if self._words[0].startswith(prefix):
return 0
prefix_len = len(prefix)
lower = 1
upper = self._num_words - 1
while lower <= upper:
mid = (upper + lower) / 2
mid_prefix = self._words[mid][:prefix_len]
lesser_prefix = self._words[mid - 1][:prefix_len]
if mid_prefix == prefix and lesser_prefix < prefix:
return mid
if mid_prefix < prefix:
lower = mid + 1
else:
upper = mid - 1
return None
def find_last_prefix_position(self, prefix):
if self._words[-1].startswith(prefix):
return self._num_words - 1
prefix_len = len(prefix)
lower = 0
upper = self._num_words - 2
while lower <= upper:
mid = (upper + lower) / 2
mid_prefix = self._words[mid][:prefix_len]
greater_prefix = self._words[mid + 1][:prefix_len]
if mid_prefix == prefix and greater_prefix > prefix:
return mid
if mid_prefix <= prefix:
lower = mid + 1
else:
upper = mid - 1
return None
def find_prefixes(self, prefix):
first = self.find_first_prefix_position(prefix)
last = self.find_last_prefix_position(prefix)
if first is not None and last is not None:
return self._words[first : last + 1]
else:
return []
def PrefixFromGrid(grid):
if not grid:
raise ValueError("grid can't be empty")
n = len(grid[0])
num_words = len(grid)
if num_words >= n:
raise "Too many entries in grid"
if not all([len(w) == n for w in grid]):
raise ValueError("all words in grid must be same length")
return "".join([word[num_words] for word in grid])
def BuildGrid(starting_word, wordlist):
"""Depth first search through possible grid extensions."""
actual_grids = []
grid_stack = [[starting_word,]]
num_char = len(starting_word)
while grid_stack:
grid = grid_stack[-1]
grid_stack = grid_stack[:-1]
if len(grid) == num_char:
actual_grids.append(grid)
continue
prefix = PrefixFromGrid(grid)
for candidate in wordlist.find_prefixes(prefix):
grid_stack.append(grid + [candidate,])
return actual_grids
def WordListsFromFile(filename):
len_to_words = collections.defaultdict(list)
with open(filename, 'r') as infile:
for word in infile.xreadlines():
if word.islower():
len_to_words[len(word)].append(word.strip())
return [WordList(words) for words in len_to_words.values()]
if __name__ == "__main__":
wordlists = WordListsFromFile(sys.argv[1])
print "done building wordlists"
all_grids = []
for wordlist in wordlists:
print "\nWordList starting with", wordlist.words()[0]
if len(wordlist.words()[0]) >= 8:
break
print '\t', "Current length:", len(all_grids)
for word in wordlist.words():
all_grids.extend(BuildGrid(word, wordlist))
PrintSquare(all_grids[-1])
print len(all_grids), "grids found"
with open(sys.argv[2], 'w') as outfile:
for grid in all_grids:
outfile.write(" ".join(grid) + "\n")