code.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find code size at the function level. Basically just a bit wrapper
  4. # around nm with some extra conveniences for comparing builds. Heavily inspired
  5. # by Linux's Bloat-O-Meter.
  6. #
  7. import os
  8. import glob
  9. import itertools as it
  10. import subprocess as sp
  11. import shlex
  12. import re
  13. import csv
  14. import collections as co
  15. OBJ_PATHS = ['*.o']
  16. def collect(paths, **args):
  17. results = co.defaultdict(lambda: 0)
  18. pattern = re.compile(
  19. '^(?P<size>[0-9a-fA-F]+)' +
  20. ' (?P<type>[%s])' % re.escape(args['type']) +
  21. ' (?P<func>.+?)$')
  22. for path in paths:
  23. # note nm-tool may contain extra args
  24. cmd = args['nm_tool'] + ['--size-sort', path]
  25. if args.get('verbose'):
  26. print(' '.join(shlex.quote(c) for c in cmd))
  27. proc = sp.Popen(cmd,
  28. stdout=sp.PIPE,
  29. stderr=sp.PIPE if not args.get('verbose') else None,
  30. universal_newlines=True)
  31. for line in proc.stdout:
  32. m = pattern.match(line)
  33. if m:
  34. results[(path, m.group('func'))] += int(m.group('size'), 16)
  35. proc.wait()
  36. if proc.returncode != 0:
  37. if not args.get('verbose'):
  38. for line in proc.stderr:
  39. sys.stdout.write(line)
  40. sys.exit(-1)
  41. flat_results = []
  42. for (file, func), size in results.items():
  43. # map to source files
  44. if args.get('build_dir'):
  45. file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
  46. # discard internal functions
  47. if not args.get('everything'):
  48. if func.startswith('__'):
  49. continue
  50. # discard .8449 suffixes created by optimizer
  51. func = re.sub('\.[0-9]+', '', func)
  52. flat_results.append((file, func, size))
  53. return flat_results
  54. def main(**args):
  55. # find sizes
  56. if not args.get('use', None):
  57. # find .o files
  58. paths = []
  59. for path in args['obj_paths']:
  60. if os.path.isdir(path):
  61. path = path + '/*.o'
  62. for path in glob.glob(path):
  63. paths.append(path)
  64. if not paths:
  65. print('no .obj files found in %r?' % args['obj_paths'])
  66. sys.exit(-1)
  67. results = collect(paths, **args)
  68. else:
  69. with open(args['use']) as f:
  70. r = csv.DictReader(f)
  71. results = [
  72. ( result['file'],
  73. result['function'],
  74. int(result['size']))
  75. for result in r]
  76. total = 0
  77. for _, _, size in results:
  78. total += size
  79. # find previous results?
  80. if args.get('diff'):
  81. with open(args['diff']) as f:
  82. r = csv.DictReader(f)
  83. prev_results = [
  84. ( result['file'],
  85. result['function'],
  86. int(result['size']))
  87. for result in r]
  88. prev_total = 0
  89. for _, _, size in prev_results:
  90. prev_total += size
  91. # write results to CSV
  92. if args.get('output'):
  93. with open(args['output'], 'w') as f:
  94. w = csv.writer(f)
  95. w.writerow(['file', 'function', 'size'])
  96. for file, func, size in sorted(results):
  97. w.writerow((file, func, size))
  98. # print results
  99. def dedup_entries(results, by='function'):
  100. entries = co.defaultdict(lambda: 0)
  101. for file, func, size in results:
  102. entry = (file if by == 'file' else func)
  103. entries[entry] += size
  104. return entries
  105. def diff_entries(olds, news):
  106. diff = co.defaultdict(lambda: (0, 0, 0, 0))
  107. for name, new in news.items():
  108. diff[name] = (0, new, new, 1.0)
  109. for name, old in olds.items():
  110. _, new, _, _ = diff[name]
  111. diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
  112. return diff
  113. def sorted_entries(entries):
  114. if args.get('size_sort'):
  115. return sorted(entries, key=lambda x: (-x[1], x))
  116. elif args.get('reverse_size_sort'):
  117. return sorted(entries, key=lambda x: (+x[1], x))
  118. else:
  119. return sorted(entries)
  120. def sorted_diff_entries(entries):
  121. if args.get('size_sort'):
  122. return sorted(entries, key=lambda x: (-x[1][1], x))
  123. elif args.get('reverse_size_sort'):
  124. return sorted(entries, key=lambda x: (+x[1][1], x))
  125. else:
  126. return sorted(entries, key=lambda x: (-x[1][3], x))
  127. def print_header(by=''):
  128. if not args.get('diff'):
  129. print('%-36s %7s' % (by, 'size'))
  130. else:
  131. print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))
  132. def print_entries(by='function'):
  133. entries = dedup_entries(results, by=by)
  134. if not args.get('diff'):
  135. print_header(by=by)
  136. for name, size in sorted_entries(entries.items()):
  137. print("%-36s %7d" % (name, size))
  138. else:
  139. prev_entries = dedup_entries(prev_results, by=by)
  140. diff = diff_entries(prev_entries, entries)
  141. print_header(by='%s (%d added, %d removed)' % (by,
  142. sum(1 for old, _, _, _ in diff.values() if not old),
  143. sum(1 for _, new, _, _ in diff.values() if not new)))
  144. for name, (old, new, diff, ratio) in sorted_diff_entries(diff.items()):
  145. if ratio or args.get('all'):
  146. print("%-36s %7s %7s %+7d%s" % (name,
  147. old or "-",
  148. new or "-",
  149. diff,
  150. ' (%+.1f%%)' % (100*ratio) if ratio else ''))
  151. def print_totals():
  152. if not args.get('diff'):
  153. print("%-36s %7d" % ('TOTAL', total))
  154. else:
  155. ratio = (total-prev_total)/prev_total if prev_total else 1.0
  156. print("%-36s %7s %7s %+7d%s" % (
  157. 'TOTAL',
  158. prev_total if prev_total else '-',
  159. total if total else '-',
  160. total-prev_total,
  161. ' (%+.1f%%)' % (100*ratio) if ratio else ''))
  162. if args.get('quiet'):
  163. pass
  164. elif args.get('summary'):
  165. print_header()
  166. print_totals()
  167. elif args.get('files'):
  168. print_entries(by='file')
  169. print_totals()
  170. else:
  171. print_entries(by='function')
  172. print_totals()
  173. if __name__ == "__main__":
  174. import argparse
  175. import sys
  176. parser = argparse.ArgumentParser(
  177. description="Find code size at the function level.")
  178. parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
  179. help="Description of where to find *.o files. May be a directory \
  180. or a list of paths. Defaults to %r." % OBJ_PATHS)
  181. parser.add_argument('-v', '--verbose', action='store_true',
  182. help="Output commands that run behind the scenes.")
  183. parser.add_argument('-o', '--output',
  184. help="Specify CSV file to store results.")
  185. parser.add_argument('-u', '--use',
  186. help="Don't compile and find code sizes, instead use this CSV file.")
  187. parser.add_argument('-d', '--diff',
  188. help="Specify CSV file to diff code size against.")
  189. parser.add_argument('-a', '--all', action='store_true',
  190. help="Show all functions, not just the ones that changed.")
  191. parser.add_argument('-A', '--everything', action='store_true',
  192. help="Include builtin and libc specific symbols.")
  193. parser.add_argument('-s', '--size-sort', action='store_true',
  194. help="Sort by size.")
  195. parser.add_argument('-S', '--reverse-size-sort', action='store_true',
  196. help="Sort by size, but backwards.")
  197. parser.add_argument('--files', action='store_true',
  198. help="Show file-level code sizes. Note this does not include padding! "
  199. "So sizes may differ from other tools.")
  200. parser.add_argument('--summary', action='store_true',
  201. help="Only show the total code size.")
  202. parser.add_argument('-q', '--quiet', action='store_true',
  203. help="Don't show anything, useful with -o.")
  204. parser.add_argument('--type', default='tTrRdD',
  205. help="Type of symbols to report, this uses the same single-character "
  206. "type-names emitted by nm. Defaults to %(default)r.")
  207. parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
  208. help="Path to the nm tool to use.")
  209. parser.add_argument('--build-dir',
  210. help="Specify the relative build directory. Used to map object files \
  211. to the correct source files.")
  212. sys.exit(main(**vars(parser.parse_args())))