summaryrefslogtreecommitdiff
path: root/preprocess.py
diff options
context:
space:
mode:
Diffstat (limited to 'preprocess.py')
-rw-r--r--preprocess.py34
1 files changed, 34 insertions, 0 deletions
diff --git a/preprocess.py b/preprocess.py
new file mode 100644
index 0000000..8474dc7
--- /dev/null
+++ b/preprocess.py
@@ -0,0 +1,34 @@
+import string
+chars = string.lowercase
+
+def flub(word, wordlist):
+ for i in range(len(word)):
+ for c in chars:
+ new_word = word[:i] + c + word[i+1:]
+ if new_word != word and new_word in wordlist:
+ yield new_word
+
+def makegraph(wordlist):
+ g = {}
+ # bit of a hack to make an undirected graph
+ for w in wordlist:
+ for v in flub(w, wordlist):
+ g.setdefault(w, set()).add(v)
+ g.setdefault(v, set()).add(w)
+ return g
+
+def main():
+ f = file('/usr/share/dict/words', 'rb')
+ bylength = {}
+ for line in f:
+ line = line.strip().lower()
+ bylength.setdefault(len(line), set()).add(line)
+ for length, wordlist in bylength.iteritems():
+ print "processing", length
+ f = file('graph_%d.py' % length, 'wb')
+ f.write('# -*- encoding: latin-1 -*-\n')
+ f.write('graph = ' + repr(makegraph(wordlist)) + '\n')
+ f.close()
+
+if __name__ == '__main__':
+ main()