diff options
author | Tim Hatch <tim@timhatch.com> | 2010-10-01 18:10:57 -0700 |
---|---|---|
committer | Tim Hatch <tim@timhatch.com> | 2010-10-01 18:10:57 -0700 |
commit | 01de3f2a65c2ed4084446b5574a087b8a07275e3 (patch) | |
tree | 941dfef25d1ddce98eff264150386f6e05655754 | |
download | wordtree-01de3f2a65c2ed4084446b5574a087b8a07275e3.tar.gz wordtree-01de3f2a65c2ed4084446b5574a087b8a07275e3.zip |
basic (slow) scripts
-rw-r--r-- | finder.py | 21 | ||||
-rw-r--r-- | islands.py | 19 | ||||
-rw-r--r-- | preprocess.py | 34 |
3 files changed, 74 insertions, 0 deletions
diff --git a/finder.py b/finder.py new file mode 100644 index 0000000..0a5fff3 --- /dev/null +++ b/finder.py @@ -0,0 +1,21 @@ +import sys + +def find_transformations(input_word, output_word, graph): + seen = set([input_word]) + queue = [(input_word, ())] + while queue: + (i, parents) = queue.pop(0) + can_make = [j for j in graph[i] if j not in seen] + if output_word in can_make: + print ' '.join(parents), output_word + return True + new_parents = parents + (i,) + queue.extend([(w, new_parents) for w in can_make]) + for w in can_make: seen.add(w) + return False + +if __name__ == '__main__': + assert len(sys.argv[1]) == len(sys.argv[2]) + graph = __import__('graph_%d' % len(sys.argv[1])).graph + find_transformations(sys.argv[1], sys.argv[2], graph) + diff --git a/islands.py b/islands.py new file mode 100644 index 0000000..4b0c7c5 --- /dev/null +++ b/islands.py @@ -0,0 +1,19 @@ +import sys +from finder import find_transformations + +def find_islands(graph): + island_size = {} + for g in graph: + for key in island_size.keys(): + if find_transformations(g, key, graph): + island_size[key] += 1 + break + else: + island_size[g] = 1 + + print island_size + +if __name__ == '__main__': + graph = __import__('graph_%d' % int(sys.argv[1])).graph + find_islands(graph) + diff --git a/preprocess.py b/preprocess.py new file mode 100644 index 0000000..8474dc7 --- /dev/null +++ b/preprocess.py @@ -0,0 +1,34 @@ +import string +chars = string.lowercase + +def flub(word, wordlist): + for i in range(len(word)): + for c in chars: + new_word = word[:i] + c + word[i+1:] + if new_word != word and new_word in wordlist: + yield new_word + +def makegraph(wordlist): + g = {} + # bit of a hack to make an undirected graph + for w in wordlist: + for v in flub(w, wordlist): + g.setdefault(w, set()).add(v) + g.setdefault(v, set()).add(w) + return g + +def main(): + f = file('/usr/share/dict/words', 'rb') + bylength = {} + for line in f: + line = line.strip().lower() + bylength.setdefault(len(line), set()).add(line) + for length, wordlist in bylength.iteritems(): + print "processing", length + f = file('graph_%d.py' % length, 'wb') + f.write('# -*- encoding: latin-1 -*-\n') + f.write('graph = ' + repr(makegraph(wordlist)) + '\n') + f.close() + +if __name__ == '__main__': + main() |