diff options
Diffstat (limited to 'preprocess.py')
-rw-r--r-- | preprocess.py | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/preprocess.py b/preprocess.py new file mode 100644 index 0000000..8474dc7 --- /dev/null +++ b/preprocess.py @@ -0,0 +1,34 @@ +import string +chars = string.lowercase + +def flub(word, wordlist): + for i in range(len(word)): + for c in chars: + new_word = word[:i] + c + word[i+1:] + if new_word != word and new_word in wordlist: + yield new_word + +def makegraph(wordlist): + g = {} + # bit of a hack to make an undirected graph + for w in wordlist: + for v in flub(w, wordlist): + g.setdefault(w, set()).add(v) + g.setdefault(v, set()).add(w) + return g + +def main(): + f = file('/usr/share/dict/words', 'rb') + bylength = {} + for line in f: + line = line.strip().lower() + bylength.setdefault(len(line), set()).add(line) + for length, wordlist in bylength.iteritems(): + print "processing", length + f = file('graph_%d.py' % length, 'wb') + f.write('# -*- encoding: latin-1 -*-\n') + f.write('graph = ' + repr(makegraph(wordlist)) + '\n') + f.close() + +if __name__ == '__main__': + main() |