summaryrefslogtreecommitdiff
path: root/finder.py
blob: 1e5d930c7834fedc43c7981109fed53a4084ff29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python
import sys
import cPickle
import heapq
import string
chars = string.lowercase

def flub(word, wordlist):
    for i in range(len(word)):
        for c in chars:
            new_word = word[:i] + c + word[i+1:]
            if new_word != word and new_word in wordlist:
                yield new_word

def hamming_distance(s1, s2):
    assert len(s1) == len(s2)
    return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))

class Graph(object):
    def __init__(self):
        self.nodes = {}
    def add_word(self, line):
        if line in self.nodes:
            return
        n = Node(line)
        for word in flub(line, self.nodes):
            n.create_edge(self.nodes[word])
        self.nodes[line] = n
    def find_shortest_route(self, start, end):
        if len(start) != len(end):
            return
        if start == end:
            return [start]
        if not self.nodes[start]:
            raise Exception("Do not know word %s" % start)
        if not self.nodes[end]:
            raise Exception("Do not know word %s" % end)
        seen = set([self.nodes[start]])
        heap = [(hamming_distance(start, end), self.nodes[start])]
        path_hop = {}
        while heap:
            node = heapq.heappop(heap)[1]
            for n2 in node.edges:
                if n2 in seen:
                    continue
                seen.add(n2)
                dist = hamming_distance(n2.word, end)
                if dist == 0:
                    path = [n2.word, node.word]
                    while path[-1] != start:
                        path.append(path_hop[path[-1]])
                    path.reverse()
                    return path
                heapq.heappush(heap, (dist, n2))
                path_hop[n2.word] = node.word
        return

class Node(object):
    def __init__(self, word):
        self.word = word
        self.edges = set()

    def create_edge(self, node):
        self.edges.add(node)
        node.edges.add(self)

def create_cache():
    f = file('/usr/share/dict/words', 'rb')
    chars = set(string.lowercase)
    graph = Graph()
    for line in f:
        line = line.strip().lower()
        if set(line).difference(chars): # not a word as far as we are concerned
            continue
        graph.add_word(line)
    import sys
    sys.setrecursionlimit(100000)
    cPickle.Pickler(file('graph.pickle', 'wb'), protocol=2).dump(graph)
    return graph

def main():
    try:
        print "Loading pickle"
        graph = cPickle.load(file('graph.pickle', 'rb'))
        print "Finished loading pickle"
    except IOError:
        print "Creating cache"
        graph = create_cache()
        print "Finished creating cache"
    print graph.find_shortest_route(sys.argv[1], sys.argv[2])

if __name__ == '__main__':
    main()