1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
import sys #input into this structure words = {} #strings generated go here gen = [] #input word word = "" #final output final = [] #first arg = word to parse, second arg = wordlist name, optional, third arg = (generated word) minimum length, optional arglength = len(sys.argv) if arglength > 1: word = sys.argv[1] else: print "Please specify an input file" quit() if arglength > 2: list = sys.argv[2] else: print "Using default wordlist.txt" list = "wordlist.txt" if arglength > 3: cutoff = int(sys.argv[3]) else: print "Using default cutoff of 1 letter" cutoff = 1 #create output file based on the word we're generating outFile = "output-" + word + ".txt" out = open(outFile, 'w') #create input file f = open(list,'r') #file is one word per line, strip out the endline character, and throw in a dictionary for line in f: line = line.strip() if len(line) > cutoff: words[line] = 1 #recurse calculates all the substrings forward, is exponential #input: current string, base string #example: spray => recurse("s","pray"), recurse("","pray") def recurse(current, string): if string == "": return gen.append(current) gen.append(str(current + string[:1])) recurse(current, string[1:]) recurse(str(current + string[:1]), string[1:]) #check the generated strings for words #input: string list, word list def checkOverlap(gen, words): for item in gen: #these are just strings, so lets check if they're words if item in words: #ignoring duplicates as we go if item not in final: final.append(item) #start recursion on the input word recurse("", word) #given the generated strings, find the words checkOverlap(gen, words) #sort the output final.sort() #write the output for item in final: out.write(item) out.write('\n') print "Output written to " + outFile
Refactorings
No refactoring yet !
nosklo
January 20, 2009, January 20, 2009 14:28, permalink
Using optparse to parse options.
Using a set to store words.
Using generators and list comprehensions.
Usage example:
$ python williams.py doubt
do
dot
doubt
dub
out
$ python williams.py -h
Usage: williams.py [options] words...
Options:
-h, --help show this help message and exit
-w FILE, --wordlist=FILE
wordlist filename, default wordlist.txt
-m LENGTH, --minlenght=LENGTH
minimum length of generated word, default 1
-o FILE, --output=FILE
Output to FILE. By default output to stdout.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
import sys import optparse # parse command line options: parser = optparse.OptionParser('%prog [options] words...') parser.add_option("-w", "--wordlist", dest="words", default='wordlist.txt', help="wordlist filename, default wordlist.txt", metavar="FILE") parser.add_option("-m", "--minlenght", dest="cutoff", default=1, type='int', help="minimum length of generated word, default 1", metavar="LENGTH") parser.add_option("-o", "--output", dest="outputfile", help="Output to FILE. By default output to stdout.", metavar="FILE") options, words = parser.parse_args() # read wordlist: wordlist = set(word.strip() for word in open(options.words) if len(word.strip()) > options.cutoff) # open output file if options.outputfile: out = open(options.outputfile, 'w') else: out = sys.stdout def recurse(current, base): """ recurse calculates all the substrings forward, is exponential input: current string, base string example: spray => recurse("s","pray"), recurse("","pray") """ result = set() if base: result.add(current) result.add(current + base[:1]) result.update(recurse(current, base[1:])) result.update(recurse(current + base[:1], base[1:])) return result for word in words: out.writelines(sorted('%s\n' % item for item in recurse('', word) if item in wordlist))
cmurphycode.blogspot.com
February 20, 2009, February 20, 2009 03:21, permalink
For some reason this failed to notify me. Nice stuff! I hadn't seen optparse before, I like it a lot. Using a set is a good idea and generators are always grand. Thanks!
For my blog: http://www.cmurphycode.blogspot.com
A little project via Kottke (http://www.kottke.org/08/11/williams-poems), that I thought would be easy and fun. No need to actually refactor unless you're truly bored- I'm just experimenting with using this as a place to share code.