#!/usr/bin/env python2 import time, re, os, sys GCC = os.getenv('GCC') or 'gcc' BENCH_VAL = os.getenv('BENCH_VAL') or '1000000' def combinator(args): if not args: yield () return for i in (None,) + args[0]: for j in combinator(args[1:]): yield tuple(filter(None, (i,) + j)) args = [ ('-O2', '-O3', '-O4'), ('-fomit-frame-pointer',), ('-m64',), ('-march=core2', '-march=pentium4', '-march=opteron'), ('-ftracer',), ('-ffast-math',), ('-funroll-loops', '-funroll-all-loops'), ('-finline-limit=999', '-finline-limit=999999'), ('-fwhole-program',), ] R_BENCH = re.compile(r'processed in ([0-9]+) us') R_BADCMD = re.compile(r'(?:unrecognized option|unrecognized command line option) ["`]([^"\']+)["\']') R_BADOPT = re.compile(r'error: bad value \((core2)\) for -m\w+= switch') NOT64 = 'CPU you selected does not support x86-64 instruction set' def getstatusoutput(cmd): """Backported from Py3k""" if isinstance(cmd, (list, tuple)): # BAH! cmd = ' '.join(cmd) pipe = os.popen('{ ' + cmd + '; } 2>&1', 'r') text = pipe.read() sts = pipe.close() if sts is None: sts = 0 if text[-1:] == '\n': text = text[:-1] return sts, text def get_total_runs(): n = 1 for a in args: n *= len(a) + 1 return n def tval(x): return '%02d:%02d' % (x/60, x%60) def which_cpu(opts): x = list(filter(lambda x: x.startswith('-mcpu') or x.startswith('-march'), opts)) return x[0] def do_timerun(n, total, opts, benchval): sys.stdout.write("Testing %5d/%d... " % (n, total)) sys.stdout.flush() cmd = (GCC, '-DSINGLERUN', '-DBENCH_VAL='+str(benchval), '-DTESTING', '-o', 'runner_temp', 'runner.c') + opts (rc, output) = getstatusoutput(cmd) if rc != 0: # Try to figure out why, whether we need to cull args cull = set() for arg in R_BADCMD.finditer(output): cull.append(arg.group(1)) # Bad -mcpu sets -march and/or -mtune, so be more lenient on that error. for arg in R_BADOPT.finditer(output): for t in opts: if arg.group(1) in t: cull.add(t) if NOT64 in output: # hrmm. this one is interesting. assert '-m64' in opts cull.add((which_cpu(opts), '-m64')) if ('64-bit mode not compiled in' in output or 'stubs-64.h: No such file' in output): cull.add('-m64') if cull: sys.stdout.write("compile fail: %r %r\n" % (rc, cull)) else: sys.stdout.write("compile fail: %d in %r\n" % (rc, output)) return cull sys.stdout.write("run... ") sys.stdout.flush() (rc, output) = getstatusoutput('./runner_temp') if rc != 0: sys.stdout.write("run fail: %d\n" % (rc,)) else: m = R_BENCH.search(output) speed = float(m.group(1)) elapsed = time.time() - START_TIME remaining = elapsed * (total-n) / n print speed, tval(remaining), "remain" return speed # # elapsed remaining # ------- = --------- # n (d-n) START_TIME = time.time() def main(): """ TODOs - since there's such a large variation in run times, try the initial compiles with a short run. Then take the top quarter and run a longer time trial. Long run should be incremental. - make it parallel so it can take up available cores - split out the quick run to a separate command, so the slow version can be run only after major changes. - parse 'cc1: error: unrecognized command line option "-fwhole-program"' and cull. - same for -m64, not that it takes very long to spin through them... - add "trials left" and "estimated time" ('cull' step needs to just estimate) some options make the compile take longer. """ to_try = list(combinator(args)) print "About to try", len(to_try), "sets of options" def remove_from_set(which): print "Removing %r from the options to try" % (which,) if isinstance(which, tuple): assert len(which) == 2 # only remove when all are there for i in range(len(to_try)-1, -1, -1): if which[0] in to_try[i] and which[1] in to_try[i]: del to_try[i] else: # regular string for i in range(len(to_try)-1, -1, -1): if which in to_try[i]: del to_try[i] results = [] i = 1 spreadsheet = open('compile-opts-fast.csv', 'wb') while to_try: opts = to_try.pop() r = do_timerun(i, i+len(to_try), opts, 100000) if isinstance(r, (int, long, float)): results.append((r, opts)) spreadsheet.write('%s,%s\n' % (r, opts)) else: for bad in r: remove_from_set(bad) i += 1 spreadsheet.close() results.sort() # Now re-run with the ones that came within 5% of the best time. time_to_beat = results[0][0] * 1.05 winners = [x for x in results if x[0] <= time_to_beat] global START_TIME START_TIME = time.time() print "Running long form" winner_results = [] i = 1 spreadsheet = open('compile-opts.csv', 'wb') try: for _, opts in winners: r = do_timerun(i, len(winners), opts, 16000000) if isinstance(r, (int, long, float)): winner_results.append((r, opts)) spreadsheet.write('%s,%s\n' % (r, ' '.join(opts))) else: print "weird failure", r i += 1 except KeyboardInterrupt: print "Interrupted" # save the fastest options as the ones to use in the build winner_results.sort() open('compile-opts', 'wb').write(" ".join(winner_results[0][1])) if __name__ == '__main__': main()