From 085b59d339e89b851766382c3761597586f4f3f6 Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Sat, 2 Oct 2010 00:58:36 -0500 Subject: Completely change implementation --- finder.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 16 deletions(-) mode change 100644 => 100755 finder.py (limited to 'finder.py') diff --git a/finder.py b/finder.py old mode 100644 new mode 100755 index 0a5fff3..1e5d930 --- a/finder.py +++ b/finder.py @@ -1,21 +1,94 @@ +#!/usr/bin/env python import sys +import cPickle +import heapq +import string +chars = string.lowercase -def find_transformations(input_word, output_word, graph): - seen = set([input_word]) - queue = [(input_word, ())] - while queue: - (i, parents) = queue.pop(0) - can_make = [j for j in graph[i] if j not in seen] - if output_word in can_make: - print ' '.join(parents), output_word - return True - new_parents = parents + (i,) - queue.extend([(w, new_parents) for w in can_make]) - for w in can_make: seen.add(w) - return False +def flub(word, wordlist): + for i in range(len(word)): + for c in chars: + new_word = word[:i] + c + word[i+1:] + if new_word != word and new_word in wordlist: + yield new_word + +def hamming_distance(s1, s2): + assert len(s1) == len(s2) + return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2)) + +class Graph(object): + def __init__(self): + self.nodes = {} + def add_word(self, line): + if line in self.nodes: + return + n = Node(line) + for word in flub(line, self.nodes): + n.create_edge(self.nodes[word]) + self.nodes[line] = n + def find_shortest_route(self, start, end): + if len(start) != len(end): + return + if start == end: + return [start] + if not self.nodes[start]: + raise Exception("Do not know word %s" % start) + if not self.nodes[end]: + raise Exception("Do not know word %s" % end) + seen = set([self.nodes[start]]) + heap = [(hamming_distance(start, end), self.nodes[start])] + path_hop = {} + while heap: + node = heapq.heappop(heap)[1] + for n2 in node.edges: + if n2 in seen: + continue + seen.add(n2) + dist = hamming_distance(n2.word, end) + if dist == 0: + path = [n2.word, node.word] + while path[-1] != start: + path.append(path_hop[path[-1]]) + path.reverse() + return path + heapq.heappush(heap, (dist, n2)) + path_hop[n2.word] = node.word + return + +class Node(object): + def __init__(self, word): + self.word = word + self.edges = set() + + def create_edge(self, node): + self.edges.add(node) + node.edges.add(self) + +def create_cache(): + f = file('/usr/share/dict/words', 'rb') + chars = set(string.lowercase) + graph = Graph() + for line in f: + line = line.strip().lower() + if set(line).difference(chars): # not a word as far as we are concerned + continue + graph.add_word(line) + import sys + sys.setrecursionlimit(100000) + cPickle.Pickler(file('graph.pickle', 'wb'), protocol=2).dump(graph) + return graph + +def main(): + try: + print "Loading pickle" + graph = cPickle.load(file('graph.pickle', 'rb')) + print "Finished loading pickle" + except IOError: + print "Creating cache" + graph = create_cache() + print "Finished creating cache" + print graph.find_shortest_route(sys.argv[1], sys.argv[2]) if __name__ == '__main__': - assert len(sys.argv[1]) == len(sys.argv[2]) - graph = __import__('graph_%d' % len(sys.argv[1])).graph - find_transformations(sys.argv[1], sys.argv[2], graph) + main() -- cgit v1.2.3-70-g09d2