summaryrefslogtreecommitdiff
path: root/finder.py
diff options
context:
space:
mode:
authorEric Anderson <ejona86@gmail.com>2010-10-02 00:58:36 -0500
committerEric Anderson <ejona86@gmail.com>2010-10-02 00:58:36 -0500
commit085b59d339e89b851766382c3761597586f4f3f6 (patch)
tree50c547cf7336c464fc8e709174cdbce2f1fb10b3 /finder.py
parent47fb89d9dacb3d63a194bd7547768ba56d72e617 (diff)
downloadwordtree-085b59d339e89b851766382c3761597586f4f3f6.tar.gz
wordtree-085b59d339e89b851766382c3761597586f4f3f6.zip
Completely change implementationHEADmaster
Diffstat (limited to 'finder.py')
-rwxr-xr-x[-rw-r--r--]finder.py105
1 files changed, 89 insertions, 16 deletions
diff --git a/finder.py b/finder.py
index 0a5fff3..1e5d930 100644..100755
--- a/finder.py
+++ b/finder.py
@@ -1,21 +1,94 @@
+#!/usr/bin/env python
import sys
+import cPickle
+import heapq
+import string
+chars = string.lowercase
-def find_transformations(input_word, output_word, graph):
- seen = set([input_word])
- queue = [(input_word, ())]
- while queue:
- (i, parents) = queue.pop(0)
- can_make = [j for j in graph[i] if j not in seen]
- if output_word in can_make:
- print ' '.join(parents), output_word
- return True
- new_parents = parents + (i,)
- queue.extend([(w, new_parents) for w in can_make])
- for w in can_make: seen.add(w)
- return False
+def flub(word, wordlist):
+ for i in range(len(word)):
+ for c in chars:
+ new_word = word[:i] + c + word[i+1:]
+ if new_word != word and new_word in wordlist:
+ yield new_word
+
+def hamming_distance(s1, s2):
+ assert len(s1) == len(s2)
+ return sum(ch1 != ch2 for ch1, ch2 in zip(s1, s2))
+
+class Graph(object):
+ def __init__(self):
+ self.nodes = {}
+ def add_word(self, line):
+ if line in self.nodes:
+ return
+ n = Node(line)
+ for word in flub(line, self.nodes):
+ n.create_edge(self.nodes[word])
+ self.nodes[line] = n
+ def find_shortest_route(self, start, end):
+ if len(start) != len(end):
+ return
+ if start == end:
+ return [start]
+ if not self.nodes[start]:
+ raise Exception("Do not know word %s" % start)
+ if not self.nodes[end]:
+ raise Exception("Do not know word %s" % end)
+ seen = set([self.nodes[start]])
+ heap = [(hamming_distance(start, end), self.nodes[start])]
+ path_hop = {}
+ while heap:
+ node = heapq.heappop(heap)[1]
+ for n2 in node.edges:
+ if n2 in seen:
+ continue
+ seen.add(n2)
+ dist = hamming_distance(n2.word, end)
+ if dist == 0:
+ path = [n2.word, node.word]
+ while path[-1] != start:
+ path.append(path_hop[path[-1]])
+ path.reverse()
+ return path
+ heapq.heappush(heap, (dist, n2))
+ path_hop[n2.word] = node.word
+ return
+
+class Node(object):
+ def __init__(self, word):
+ self.word = word
+ self.edges = set()
+
+ def create_edge(self, node):
+ self.edges.add(node)
+ node.edges.add(self)
+
+def create_cache():
+ f = file('/usr/share/dict/words', 'rb')
+ chars = set(string.lowercase)
+ graph = Graph()
+ for line in f:
+ line = line.strip().lower()
+ if set(line).difference(chars): # not a word as far as we are concerned
+ continue
+ graph.add_word(line)
+ import sys
+ sys.setrecursionlimit(100000)
+ cPickle.Pickler(file('graph.pickle', 'wb'), protocol=2).dump(graph)
+ return graph
+
+def main():
+ try:
+ print "Loading pickle"
+ graph = cPickle.load(file('graph.pickle', 'rb'))
+ print "Finished loading pickle"
+ except IOError:
+ print "Creating cache"
+ graph = create_cache()
+ print "Finished creating cache"
+ print graph.find_shortest_route(sys.argv[1], sys.argv[2])
if __name__ == '__main__':
- assert len(sys.argv[1]) == len(sys.argv[2])
- graph = __import__('graph_%d' % len(sys.argv[1])).graph
- find_transformations(sys.argv[1], sys.argv[2], graph)
+ main()