diff options
author | Eric Anderson <ejona86@gmail.com> | 2010-10-02 00:58:36 -0500 |
---|---|---|
committer | Eric Anderson <ejona86@gmail.com> | 2010-10-02 00:58:36 -0500 |
commit | 085b59d339e89b851766382c3761597586f4f3f6 (patch) | |
tree | 50c547cf7336c464fc8e709174cdbce2f1fb10b3 /preprocess.py | |
parent | 47fb89d9dacb3d63a194bd7547768ba56d72e617 (diff) | |
download | wordtree-085b59d339e89b851766382c3761597586f4f3f6.tar.gz wordtree-085b59d339e89b851766382c3761597586f4f3f6.zip |
Diffstat (limited to 'preprocess.py')
-rw-r--r-- | preprocess.py | 34 |
1 files changed, 0 insertions, 34 deletions
diff --git a/preprocess.py b/preprocess.py deleted file mode 100644 index 8474dc7..0000000 --- a/preprocess.py +++ /dev/null @@ -1,34 +0,0 @@ -import string -chars = string.lowercase - -def flub(word, wordlist): - for i in range(len(word)): - for c in chars: - new_word = word[:i] + c + word[i+1:] - if new_word != word and new_word in wordlist: - yield new_word - -def makegraph(wordlist): - g = {} - # bit of a hack to make an undirected graph - for w in wordlist: - for v in flub(w, wordlist): - g.setdefault(w, set()).add(v) - g.setdefault(v, set()).add(w) - return g - -def main(): - f = file('/usr/share/dict/words', 'rb') - bylength = {} - for line in f: - line = line.strip().lower() - bylength.setdefault(len(line), set()).add(line) - for length, wordlist in bylength.iteritems(): - print "processing", length - f = file('graph_%d.py' % length, 'wb') - f.write('# -*- encoding: latin-1 -*-\n') - f.write('graph = ' + repr(makegraph(wordlist)) + '\n') - f.close() - -if __name__ == '__main__': - main() |