1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
#!/usr/bin/env python
import sys
import cPickle
import heapq
import string
chars = string.lowercase
def flub(word, wordlist):
for i in range(len(word)):
for c in chars:
new_word = word[:i] + c + word[i+1:]
if new_word != word and new_word in wordlist:
yield new_word
def hamming_distance(s1, s2):
assert len(s1) == len(s2)
return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))
class Graph(object):
def __init__(self):
self.nodes = {}
def add_word(self, line):
if line in self.nodes:
return
n = Node(line)
for word in flub(line, self.nodes):
n.create_edge(self.nodes[word])
self.nodes[line] = n
def find_shortest_route(self, start, end):
if len(start) != len(end):
return
if start == end:
return [start]
if not self.nodes[start]:
raise Exception("Do not know word %s" % start)
if not self.nodes[end]:
raise Exception("Do not know word %s" % end)
seen = set([self.nodes[start]])
heap = [(hamming_distance(start, end), self.nodes[start])]
path_hop = {}
while heap:
node = heapq.heappop(heap)[1]
for n2 in node.edges:
if n2 in seen:
continue
seen.add(n2)
dist = hamming_distance(n2.word, end)
if dist == 0:
path = [n2.word, node.word]
while path[-1] != start:
path.append(path_hop[path[-1]])
path.reverse()
return path
heapq.heappush(heap, (dist, n2))
path_hop[n2.word] = node.word
return
class Node(object):
def __init__(self, word):
self.word = word
self.edges = set()
def create_edge(self, node):
self.edges.add(node)
node.edges.add(self)
def create_cache():
f = file('/usr/share/dict/words', 'rb')
chars = set(string.lowercase)
graph = Graph()
for line in f:
line = line.strip().lower()
if set(line).difference(chars): # not a word as far as we are concerned
continue
graph.add_word(line)
import sys
sys.setrecursionlimit(100000)
cPickle.Pickler(file('graph.pickle', 'wb'), protocol=2).dump(graph)
return graph
def main():
try:
print "Loading pickle"
graph = cPickle.load(file('graph.pickle', 'rb'))
print "Finished loading pickle"
except IOError:
print "Creating cache"
graph = create_cache()
print "Finished creating cache"
print graph.find_shortest_route(sys.argv[1], sys.argv[2])
if __name__ == '__main__':
main()
|