summaryrefslogtreecommitdiff
path: root/preprocess.py
blob: 8474dc74d29edbef300d4efc839e88c1dffeb7cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import string
chars = string.lowercase

def flub(word, wordlist):
    for i in range(len(word)):
        for c in chars:
            new_word = word[:i] + c + word[i+1:]
            if new_word != word and new_word in wordlist:
                yield new_word

def makegraph(wordlist):
    g = {}
    # bit of a hack to make an undirected graph
    for w in wordlist:
        for v in flub(w, wordlist):
            g.setdefault(w, set()).add(v)
            g.setdefault(v, set()).add(w)
    return g

def main():
    f = file('/usr/share/dict/words', 'rb')
    bylength = {}
    for line in f:
        line = line.strip().lower()
        bylength.setdefault(len(line), set()).add(line)
    for length, wordlist in bylength.iteritems():
        print "processing", length
        f = file('graph_%d.py' % length, 'wb')
        f.write('# -*- encoding: latin-1 -*-\n')
        f.write('graph = ' + repr(makegraph(wordlist)) + '\n')
        f.close()

if __name__ == '__main__':
    main()