#!/usr/bin/env python import sys import cPickle import heapq import string chars = string.lowercase def flub(word, wordlist): for i in range(len(word)): for c in chars: new_word = word[:i] + c + word[i+1:] if new_word != word and new_word in wordlist: yield new_word def hamming_distance(s1, s2): assert len(s1) == len(s2) return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2)) class Graph(object): def __init__(self): self.nodes = {} def add_word(self, line): if line in self.nodes: return n = Node(line) for word in flub(line, self.nodes): n.create_edge(self.nodes[word]) self.nodes[line] = n def find_shortest_route(self, start, end): if len(start) != len(end): return if start == end: return [start] if not self.nodes[start]: raise Exception("Do not know word %s" % start) if not self.nodes[end]: raise Exception("Do not know word %s" % end) seen = set([self.nodes[start]]) heap = [(hamming_distance(start, end), self.nodes[start])] path_hop = {} while heap: node = heapq.heappop(heap)[1] for n2 in node.edges: if n2 in seen: continue seen.add(n2) dist = hamming_distance(n2.word, end) if dist == 0: path = [n2.word, node.word] while path[-1] != start: path.append(path_hop[path[-1]]) path.reverse() return path heapq.heappush(heap, (dist, n2)) path_hop[n2.word] = node.word return class Node(object): def __init__(self, word): self.word = word self.edges = set() def create_edge(self, node): self.edges.add(node) node.edges.add(self) def create_cache(): f = file('/usr/share/dict/words', 'rb') chars = set(string.lowercase) graph = Graph() for line in f: line = line.strip().lower() if set(line).difference(chars): # not a word as far as we are concerned continue graph.add_word(line) import sys sys.setrecursionlimit(100000) cPickle.Pickler(file('graph.pickle', 'wb'), protocol=2).dump(graph) return graph def main(): try: print "Loading pickle" graph = cPickle.load(file('graph.pickle', 'rb')) print "Finished loading pickle" except IOError: print "Creating cache" graph = create_cache() print "Finished creating cache" print graph.find_shortest_route(sys.argv[1], sys.argv[2]) if __name__ == '__main__': main()