code.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. #!/usr/bin/env python3
  2. #
  3. # This script finds the code size at the function level, with/without
  4. # static functions, and has some conveniences for comparing different
  5. # versions. It's basically one big wrapper around nm, and may or may
  6. # not have been written out of jealousy of Linux's Bloat-O-Meter.
  7. #
  8. # Here's a useful bash script to use while developing:
  9. # ./scripts/code_size.py -qo old.csv
  10. # while true ; do ./code_scripts/size.py -d old.csv ; inotifywait -rqe modify * ; done
  11. #
  12. # Or even better, to automatically update results on commit:
  13. # ./scripts/code_size.py -qo commit.csv
  14. # while true ; do ./scripts/code_size.py -d commit.csv -o current.csv ; git diff --exit-code --quiet && cp current.csv commit.csv ; inotifywait -rqe modify * ; done
  15. #
  16. # Or my personal favorite:
  17. # ./scripts/code_size.py -qo master.csv && cp master.csv commit.csv
  18. # while true ; do ( ./scripts/code_size.py -i commit.csv -d master.csv -s ; ./scripts/code_size.py -i current.csv -d master.csv -s ; ./scripts/code_size.py -d master.csv -o current.csv -s ) | awk 'BEGIN {printf "%-16s %7s %7s %7s\n","","old","new","diff"} (NR==2 && $1="commit") || (NR==4 && $1="prev") || (NR==6 && $1="current") {printf "%-16s %7s %7s %7s %s\n",$1,$2,$3,$5,$6}' ; git diff --exit-code --quiet && cp current.csv commit.csv ; inotifywait -rqe modify * ; done
  19. #
  20. import os
  21. import itertools as it
  22. import subprocess as sp
  23. import shlex
  24. import re
  25. import csv
  26. import collections as co
  27. SIZEDIR = 'sizes'
  28. RULES = """
  29. define FLATTEN
  30. %(sizedir)s/%(build)s.$(subst /,.,$(target)): $(target)
  31. ( echo "#line 1 \\"$$<\\"" ; %(cat)s $$< ) > $$@
  32. %(sizedir)s/%(build)s.$(subst /,.,$(target:.c=.size)): \\
  33. %(sizedir)s/%(build)s.$(subst /,.,$(target:.c=.o))
  34. $(NM) --size-sort $$^ | sed 's/^/$(subst /,\\/,$(target:.c=.o)):/' > $$@
  35. endef
  36. $(foreach target,$(SRC),$(eval $(FLATTEN)))
  37. -include %(sizedir)s/*.d
  38. .SECONDARY:
  39. %%.size: $(foreach t,$(subst /,.,$(OBJ:.o=.size)),%%.$t)
  40. cat $^ > $@
  41. """
  42. CATS = {
  43. 'code': 'cat',
  44. 'code_inlined': 'sed \'s/^static\( inline\)\?//\'',
  45. }
  46. def build(**args):
  47. # mkdir -p sizedir
  48. os.makedirs(args['sizedir'], exist_ok=True)
  49. if args.get('inlined', False):
  50. builds = ['code', 'code_inlined']
  51. else:
  52. builds = ['code']
  53. # write makefiles for the different types of builds
  54. makefiles = []
  55. targets = []
  56. for build in builds:
  57. path = args['sizedir'] + '/' + build
  58. with open(path + '.mk', 'w') as mk:
  59. mk.write(RULES.replace(4*' ', '\t') % dict(
  60. sizedir=args['sizedir'],
  61. build=build,
  62. cat=CATS[build]))
  63. mk.write('\n')
  64. # pass on defines
  65. for d in args['D']:
  66. mk.write('%s: override CFLAGS += -D%s\n' % (
  67. path+'.size', d))
  68. makefiles.append(path + '.mk')
  69. targets.append(path + '.size')
  70. # build in parallel
  71. cmd = (['make', '-f', 'Makefile'] +
  72. list(it.chain.from_iterable(['-f', m] for m in makefiles)) +
  73. [target for target in targets])
  74. if args.get('verbose', False):
  75. print(' '.join(shlex.quote(c) for c in cmd))
  76. proc = sp.Popen(cmd,
  77. stdout=sp.DEVNULL if not args.get('verbose', False) else None)
  78. proc.wait()
  79. if proc.returncode != 0:
  80. sys.exit(-1)
  81. # find results
  82. build_results = co.defaultdict(lambda: 0)
  83. # notes
  84. # - filters type
  85. # - discards internal/debug functions (leading __)
  86. pattern = re.compile(
  87. '^(?P<file>[^:]+)' +
  88. ':(?P<size>[0-9a-fA-F]+)' +
  89. ' (?P<type>[%s])' % re.escape(args['type']) +
  90. ' (?!__)(?P<name>.+?)$')
  91. for build in builds:
  92. path = args['sizedir'] + '/' + build
  93. with open(path + '.size') as size:
  94. for line in size:
  95. match = pattern.match(line)
  96. if match:
  97. file = match.group('file')
  98. # discard .8449 suffixes created by optimizer
  99. name = re.sub('\.[0-9]+', '', match.group('name'))
  100. size = int(match.group('size'), 16)
  101. build_results[(build, file, name)] += size
  102. results = []
  103. for (build, file, name), size in build_results.items():
  104. if build == 'code':
  105. results.append((file, name, size, False))
  106. elif (build == 'code_inlined' and
  107. ('inlined', file, name) not in results):
  108. results.append((file, name, size, True))
  109. return results
  110. def main(**args):
  111. # find results
  112. if not args.get('input', None):
  113. results = build(**args)
  114. else:
  115. with open(args['input']) as f:
  116. r = csv.DictReader(f)
  117. results = [
  118. ( result['file'],
  119. result['name'],
  120. int(result['size']),
  121. bool(int(result.get('inlined', 0))))
  122. for result in r
  123. if (not bool(int(result.get('inlined', 0))) or
  124. args.get('inlined', False))]
  125. total = 0
  126. for _, _, size, inlined in results:
  127. if not inlined:
  128. total += size
  129. # find previous results?
  130. if args.get('diff', None):
  131. with open(args['diff']) as f:
  132. r = csv.DictReader(f)
  133. prev_results = [
  134. ( result['file'],
  135. result['name'],
  136. int(result['size']),
  137. bool(int(result.get('inlined', 0))))
  138. for result in r
  139. if (not bool(int(result.get('inlined', 0))) or
  140. args.get('inlined', False))]
  141. prev_total = 0
  142. for _, _, size, inlined in prev_results:
  143. if not inlined:
  144. prev_total += size
  145. # write results to CSV
  146. if args.get('output', None):
  147. results.sort(key=lambda x: (-x[2], x))
  148. with open(args['output'], 'w') as f:
  149. w = csv.writer(f)
  150. if args.get('inlined', False):
  151. w.writerow(['file', 'name', 'size', 'inlined'])
  152. for file, name, size, inlined in results:
  153. w.writerow((file, name, size, int(inlined)))
  154. else:
  155. w.writerow(['file', 'name', 'size'])
  156. for file, name, size, inlined in results:
  157. w.writerow((file, name, size))
  158. # print results
  159. def dedup_functions(results):
  160. functions = co.defaultdict(lambda: (0, True))
  161. for _, name, size, inlined in results:
  162. if not inlined:
  163. functions[name] = (functions[name][0] + size, False)
  164. for _, name, size, inlined in results:
  165. if inlined and functions[name][1]:
  166. functions[name] = (functions[name][0] + size, True)
  167. return functions
  168. def dedup_files(results):
  169. files = co.defaultdict(lambda: 0)
  170. for file, _, size, inlined in results:
  171. if not inlined:
  172. files[file] += size
  173. return files
  174. def diff_sizes(olds, news):
  175. diff = co.defaultdict(lambda: (None, None, None))
  176. for name, new in news.items():
  177. diff[name] = (None, new, new)
  178. for name, old in olds.items():
  179. new = diff[name][1] or 0
  180. diff[name] = (old, new, new-old)
  181. return diff
  182. def print_header(name=''):
  183. if not args.get('diff', False):
  184. print('%-40s %7s' % (name, 'size'))
  185. else:
  186. print('%-40s %7s %7s %7s' % (name, 'old', 'new', 'diff'))
  187. def print_functions():
  188. functions = dedup_functions(results)
  189. functions = {
  190. name+' (inlined)' if inlined else name: size
  191. for name, (size, inlined) in functions.items()}
  192. if not args.get('diff', None):
  193. print_header('function')
  194. for name, size in sorted(functions.items(),
  195. key=lambda x: (-x[1], x)):
  196. print("%-40s %7d" % (name, size))
  197. else:
  198. prev_functions = dedup_functions(prev_results)
  199. prev_functions = {
  200. name+' (inlined)' if inlined else name: size
  201. for name, (size, inlined) in prev_functions.items()}
  202. diff = diff_sizes(functions, prev_functions)
  203. print_header('function (%d added, %d removed)' % (
  204. sum(1 for old, _, _ in diff.values() if not old),
  205. sum(1 for _, new, _ in diff.values() if not new)))
  206. for name, (old, new, diff) in sorted(diff.items(),
  207. key=lambda x: (-(x[1][2] or 0), x)):
  208. if diff or args.get('all', False):
  209. print("%-40s %7s %7s %+7d%s" % (
  210. name, old or "-", new or "-", diff,
  211. ' (%+.2f%%)' % (100*((new-old)/old))
  212. if old and new else
  213. ''))
  214. def print_files():
  215. files = dedup_files(results)
  216. if not args.get('diff', None):
  217. print_header('file')
  218. for file, size in sorted(files.items(),
  219. key=lambda x: (-x[1], x)):
  220. print("%-40s %7d" % (file, size))
  221. else:
  222. prev_files = dedup_files(prev_results)
  223. diff = diff_sizes(files, prev_files)
  224. print_header('file (%d added, %d removed)' % (
  225. sum(1 for old, _, _ in diff.values() if not old),
  226. sum(1 for _, new, _ in diff.values() if not new)))
  227. for name, (old, new, diff) in sorted(diff.items(),
  228. key=lambda x: (-(x[1][2] or 0), x)):
  229. if diff or args.get('all', False):
  230. print("%-40s %7s %7s %+7d%s" % (
  231. name, old or "-", new or "-", diff,
  232. ' (%+.2f%%)' % (100*((new-old)/old))
  233. if old and new else
  234. ''))
  235. def print_totals():
  236. if not args.get('diff', None):
  237. print("%-40s %7d" % ('TOTALS', total))
  238. else:
  239. print("%-40s %7s %7s %+7d%s" % (
  240. 'TOTALS', prev_total, total, total-prev_total,
  241. ' (%+.2f%%)' % (100*((total-prev_total)/total))
  242. if prev_total and total else
  243. ''))
  244. def print_status():
  245. if not args.get('diff', None):
  246. print(total)
  247. else:
  248. print("%d (%+.2f%%)" % (total, 100*((total-prev_total)/total)))
  249. if args.get('quiet', False):
  250. pass
  251. elif args.get('status', False):
  252. print_status()
  253. elif args.get('summary', False):
  254. print_header()
  255. print_totals()
  256. elif args.get('files', False):
  257. print_files()
  258. print_totals()
  259. else:
  260. print_functions()
  261. print_totals()
  262. if __name__ == "__main__":
  263. import argparse
  264. import sys
  265. parser = argparse.ArgumentParser(
  266. description="Find code size at the function level.")
  267. parser.add_argument('sizedir', nargs='?', default=SIZEDIR,
  268. help="Directory to store intermediary results. Defaults "
  269. "to \"%s\"." % SIZEDIR)
  270. parser.add_argument('-D', action='append', default=[],
  271. help="Specify compile-time define.")
  272. parser.add_argument('-v', '--verbose', action='store_true',
  273. help="Output commands that run behind the scenes.")
  274. parser.add_argument('-i', '--input',
  275. help="Don't compile and find code sizes, instead use this CSV file.")
  276. parser.add_argument('-o', '--output',
  277. help="Specify CSV file to store results.")
  278. parser.add_argument('-d', '--diff',
  279. help="Specify CSV file to diff code size against.")
  280. parser.add_argument('-a', '--all', action='store_true',
  281. help="Show all functions, not just the ones that changed.")
  282. parser.add_argument('--inlined', action='store_true',
  283. help="Run a second compilation to find the sizes of functions normally "
  284. "removed by optimizations. These will be shown as \"*.inlined\" "
  285. "functions, and will not be included in the total.")
  286. parser.add_argument('--files', action='store_true',
  287. help="Show file-level code sizes. Note this does not include padding! "
  288. "So sizes may differ from other tools.")
  289. parser.add_argument('-s', '--summary', action='store_true',
  290. help="Only show the total code size.")
  291. parser.add_argument('-S', '--status', action='store_true',
  292. help="Show minimum info useful for a single-line status.")
  293. parser.add_argument('-q', '--quiet', action='store_true',
  294. help="Don't show anything, useful with -o.")
  295. parser.add_argument('--type', default='tTrRdDbB',
  296. help="Type of symbols to report, this uses the same single-character "
  297. "type-names emitted by nm. Defaults to %(default)r.")
  298. sys.exit(main(**vars(parser.parse_args())))