diff options
author | Eric Anderson <ejona86@gmail.com> | 2010-10-02 00:58:36 -0500 |
---|---|---|
committer | Eric Anderson <ejona86@gmail.com> | 2010-10-02 00:58:36 -0500 |
commit | 085b59d339e89b851766382c3761597586f4f3f6 (patch) | |
tree | 50c547cf7336c464fc8e709174cdbce2f1fb10b3 /finder.py | |
parent | 47fb89d9dacb3d63a194bd7547768ba56d72e617 (diff) | |
download | wordtree-master.tar.gz wordtree-master.zip |
Diffstat (limited to 'finder.py')
-rwxr-xr-x[-rw-r--r--] | finder.py | 105 |
1 files changed, 89 insertions, 16 deletions
diff --git a/finder.py b/finder.py index 0a5fff3..1e5d930 100644..100755 --- a/finder.py +++ b/finder.py @@ -1,21 +1,94 @@ +#!/usr/bin/env python import sys +import cPickle +import heapq +import string +chars = string.lowercase -def find_transformations(input_word, output_word, graph): - seen = set([input_word]) - queue = [(input_word, ())] - while queue: - (i, parents) = queue.pop(0) - can_make = [j for j in graph[i] if j not in seen] - if output_word in can_make: - print ' '.join(parents), output_word - return True - new_parents = parents + (i,) - queue.extend([(w, new_parents) for w in can_make]) - for w in can_make: seen.add(w) - return False +def flub(word, wordlist): + for i in range(len(word)): + for c in chars: + new_word = word[:i] + c + word[i+1:] + if new_word != word and new_word in wordlist: + yield new_word + +def hamming_distance(s1, s2): + assert len(s1) == len(s2) + return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2)) + +class Graph(object): + def __init__(self): + self.nodes = {} + def add_word(self, line): + if line in self.nodes: + return + n = Node(line) + for word in flub(line, self.nodes): + n.create_edge(self.nodes[word]) + self.nodes[line] = n + def find_shortest_route(self, start, end): + if len(start) != len(end): + return + if start == end: + return [start] + if not self.nodes[start]: + raise Exception("Do not know word %s" % start) + if not self.nodes[end]: + raise Exception("Do not know word %s" % end) + seen = set([self.nodes[start]]) + heap = [(hamming_distance(start, end), self.nodes[start])] + path_hop = {} + while heap: + node = heapq.heappop(heap)[1] + for n2 in node.edges: + if n2 in seen: + continue + seen.add(n2) + dist = hamming_distance(n2.word, end) + if dist == 0: + path = [n2.word, node.word] + while path[-1] != start: + path.append(path_hop[path[-1]]) + path.reverse() + return path + heapq.heappush(heap, (dist, n2)) + path_hop[n2.word] = node.word + return + +class Node(object): + def __init__(self, word): + self.word = word + self.edges = set() + + def create_edge(self, node): + self.edges.add(node) + node.edges.add(self) + +def create_cache(): + f = file('/usr/share/dict/words', 'rb') + chars = set(string.lowercase) + graph = Graph() + for line in f: + line = line.strip().lower() + if set(line).difference(chars): # not a word as far as we are concerned + continue + graph.add_word(line) + import sys + sys.setrecursionlimit(100000) + cPickle.Pickler(file('graph.pickle', 'wb'), protocol=2).dump(graph) + return graph + +def main(): + try: + print "Loading pickle" + graph = cPickle.load(file('graph.pickle', 'rb')) + print "Finished loading pickle" + except IOError: + print "Creating cache" + graph = create_cache() + print "Finished creating cache" + print graph.find_shortest_route(sys.argv[1], sys.argv[2]) if __name__ == '__main__': - assert len(sys.argv[1]) == len(sys.argv[2]) - graph = __import__('graph_%d' % len(sys.argv[1])).graph - find_transformations(sys.argv[1], sys.argv[2], graph) + main() |