code.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find code size at the function level. Basically just a bit wrapper
  4. # around nm with some extra conveniences for comparing builds. Heavily inspired
  5. # by Linux's Bloat-O-Meter.
  6. #
  7. import collections as co
  8. import csv
  9. import glob
  10. import itertools as it
  11. import math as m
  12. import os
  13. import re
  14. import shlex
  15. import subprocess as sp
  16. OBJ_PATHS = ['*.o']
  17. NM_TOOL = ['nm']
  18. TYPE = 'tTrRdD'
  19. # integer fields
  20. class IntField(co.namedtuple('IntField', 'x')):
  21. __slots__ = ()
  22. def __new__(cls, x):
  23. if isinstance(x, IntField):
  24. return x
  25. if isinstance(x, str):
  26. try:
  27. x = int(x, 0)
  28. except ValueError:
  29. # also accept +-∞ and +-inf
  30. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  31. x = float('inf')
  32. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  33. x = float('-inf')
  34. else:
  35. raise
  36. return super().__new__(cls, x)
  37. def __int__(self):
  38. assert not m.isinf(self.x)
  39. return self.x
  40. def __float__(self):
  41. return float(self.x)
  42. def __str__(self):
  43. if self.x == float('inf'):
  44. return '∞'
  45. elif self.x == float('-inf'):
  46. return '-∞'
  47. else:
  48. return str(self.x)
  49. none = '%7s' % '-'
  50. def table(self):
  51. return '%7s' % (self,)
  52. diff_none = '%7s' % '-'
  53. diff_table = table
  54. def diff_diff(self, other):
  55. new = self.x if self else 0
  56. old = other.x if other else 0
  57. diff = new - old
  58. if diff == float('+inf'):
  59. return '%7s' % '+∞'
  60. elif diff == float('-inf'):
  61. return '%7s' % '-∞'
  62. else:
  63. return '%+7d' % diff
  64. def ratio(self, other):
  65. new = self.x if self else 0
  66. old = other.x if other else 0
  67. if m.isinf(new) and m.isinf(old):
  68. return 0.0
  69. elif m.isinf(new):
  70. return float('+inf')
  71. elif m.isinf(old):
  72. return float('-inf')
  73. elif not old and not new:
  74. return 0.0
  75. elif not old:
  76. return 1.0
  77. else:
  78. return (new-old) / old
  79. def __add__(self, other):
  80. return IntField(self.x + other.x)
  81. def __mul__(self, other):
  82. return IntField(self.x * other.x)
  83. def __lt__(self, other):
  84. return self.x < other.x
  85. def __gt__(self, other):
  86. return self.__class__.__lt__(other, self)
  87. def __le__(self, other):
  88. return not self.__gt__(other)
  89. def __ge__(self, other):
  90. return not self.__lt__(other)
  91. def __truediv__(self, n):
  92. if m.isinf(self.x):
  93. return self
  94. else:
  95. return IntField(round(self.x / n))
  96. # code size results
  97. class CodeResult(co.namedtuple('CodeResult', 'file,function,code_size')):
  98. __slots__ = ()
  99. def __new__(cls, file, function, code_size):
  100. return super().__new__(cls, file, function, IntField(code_size))
  101. def __add__(self, other):
  102. return CodeResult(self.file, self.function,
  103. self.code_size + other.code_size)
  104. def openio(path, mode='r'):
  105. if path == '-':
  106. if 'r' in mode:
  107. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  108. else:
  109. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  110. else:
  111. return open(path, mode)
  112. def collect(paths, *,
  113. nm_tool=NM_TOOL,
  114. type=TYPE,
  115. build_dir=None,
  116. everything=False,
  117. **args):
  118. results = []
  119. pattern = re.compile(
  120. '^(?P<size>[0-9a-fA-F]+)' +
  121. ' (?P<type>[%s])' % re.escape(type) +
  122. ' (?P<func>.+?)$')
  123. for path in paths:
  124. # map to source file
  125. src_path = re.sub('\.o$', '.c', path)
  126. if build_dir:
  127. src_path = re.sub('%s/*' % re.escape(build_dir), '',
  128. src_path)
  129. # note nm-tool may contain extra args
  130. cmd = nm_tool + ['--size-sort', path]
  131. if args.get('verbose'):
  132. print(' '.join(shlex.quote(c) for c in cmd))
  133. proc = sp.Popen(cmd,
  134. stdout=sp.PIPE,
  135. stderr=sp.PIPE if not args.get('verbose') else None,
  136. universal_newlines=True,
  137. errors='replace')
  138. for line in proc.stdout:
  139. m = pattern.match(line)
  140. if m:
  141. func = m.group('func')
  142. # discard internal functions
  143. if not everything and func.startswith('__'):
  144. continue
  145. # discard .8449 suffixes created by optimizer
  146. func = re.sub('\.[0-9]+', '', func)
  147. results.append(CodeResult(
  148. src_path, func,
  149. int(m.group('size'), 16)))
  150. proc.wait()
  151. if proc.returncode != 0:
  152. if not args.get('verbose'):
  153. for line in proc.stderr:
  154. sys.stdout.write(line)
  155. sys.exit(-1)
  156. return results
  157. def fold(results, *,
  158. by=['file', 'function'],
  159. **_):
  160. folding = co.OrderedDict()
  161. for r in results:
  162. name = tuple(getattr(r, k) for k in by)
  163. if name not in folding:
  164. folding[name] = []
  165. folding[name].append(r)
  166. folded = []
  167. for rs in folding.values():
  168. folded.append(sum(rs[1:], start=rs[0]))
  169. return folded
  170. def table(results, diff_results=None, *,
  171. by_file=False,
  172. size_sort=False,
  173. reverse_size_sort=False,
  174. summary=False,
  175. all=False,
  176. percent=False,
  177. **_):
  178. all_, all = all, __builtins__.all
  179. # fold
  180. results = fold(results, by=['file' if by_file else 'function'])
  181. if diff_results is not None:
  182. diff_results = fold(diff_results,
  183. by=['file' if by_file else 'function'])
  184. table = {
  185. r.file if by_file else r.function: r
  186. for r in results}
  187. diff_table = {
  188. r.file if by_file else r.function: r
  189. for r in diff_results or []}
  190. # sort, note that python's sort is stable
  191. names = list(table.keys() | diff_table.keys())
  192. names.sort()
  193. if diff_results is not None:
  194. names.sort(key=lambda n: -IntField.ratio(
  195. table[n].code_size if n in table else None,
  196. diff_table[n].code_size if n in diff_table else None))
  197. if size_sort:
  198. names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
  199. reverse=True)
  200. elif reverse_size_sort:
  201. names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
  202. reverse=False)
  203. # print header
  204. print('%-36s' % ('%s%s' % (
  205. 'file' if by_file else 'function',
  206. ' (%d added, %d removed)' % (
  207. sum(1 for n in table if n not in diff_table),
  208. sum(1 for n in diff_table if n not in table))
  209. if diff_results is not None and not percent else '')
  210. if not summary else ''),
  211. end='')
  212. if diff_results is None:
  213. print(' %s' % ('size'.rjust(len(IntField.none))))
  214. elif percent:
  215. print(' %s' % ('size'.rjust(len(IntField.diff_none))))
  216. else:
  217. print(' %s %s %s' % (
  218. 'old'.rjust(len(IntField.diff_none)),
  219. 'new'.rjust(len(IntField.diff_none)),
  220. 'diff'.rjust(len(IntField.diff_none))))
  221. # print entries
  222. if not summary:
  223. for name in names:
  224. r = table.get(name)
  225. if diff_results is not None:
  226. diff_r = diff_table.get(name)
  227. ratio = IntField.ratio(
  228. r.code_size if r else None,
  229. diff_r.code_size if diff_r else None)
  230. if not ratio and not all_:
  231. continue
  232. print('%-36s' % name, end='')
  233. if diff_results is None:
  234. print(' %s' % (
  235. r.code_size.table()
  236. if r else IntField.none))
  237. elif percent:
  238. print(' %s%s' % (
  239. r.code_size.diff_table()
  240. if r else IntField.diff_none,
  241. ' (%s)' % (
  242. '+∞%' if ratio == float('+inf')
  243. else '-∞%' if ratio == float('-inf')
  244. else '%+.1f%%' % (100*ratio))))
  245. else:
  246. print(' %s %s %s%s' % (
  247. diff_r.code_size.diff_table()
  248. if diff_r else IntField.diff_none,
  249. r.code_size.diff_table()
  250. if r else IntField.diff_none,
  251. IntField.diff_diff(
  252. r.code_size if r else None,
  253. diff_r.code_size if diff_r else None)
  254. if r or diff_r else IntField.diff_none,
  255. ' (%s)' % (
  256. '+∞%' if ratio == float('+inf')
  257. else '-∞%' if ratio == float('-inf')
  258. else '%+.1f%%' % (100*ratio))
  259. if ratio else ''))
  260. # print total
  261. total = fold(results, by=[])
  262. r = total[0] if total else None
  263. if diff_results is not None:
  264. diff_total = fold(diff_results, by=[])
  265. diff_r = diff_total[0] if diff_total else None
  266. ratio = IntField.ratio(
  267. r.code_size if r else None,
  268. diff_r.code_size if diff_r else None)
  269. print('%-36s' % 'TOTAL', end='')
  270. if diff_results is None:
  271. print(' %s' % (
  272. r.code_size.table()
  273. if r else IntField.none))
  274. elif percent:
  275. print(' %s%s' % (
  276. r.code_size.diff_table()
  277. if r else IntField.diff_none,
  278. ' (%s)' % (
  279. '+∞%' if ratio == float('+inf')
  280. else '-∞%' if ratio == float('-inf')
  281. else '%+.1f%%' % (100*ratio))))
  282. else:
  283. print(' %s %s %s%s' % (
  284. diff_r.code_size.diff_table()
  285. if diff_r else IntField.diff_none,
  286. r.code_size.diff_table()
  287. if r else IntField.diff_none,
  288. IntField.diff_diff(
  289. r.code_size if r else None,
  290. diff_r.code_size if diff_r else None)
  291. if r or diff_r else IntField.diff_none,
  292. ' (%s)' % (
  293. '+∞%' if ratio == float('+inf')
  294. else '-∞%' if ratio == float('-inf')
  295. else '%+.1f%%' % (100*ratio))
  296. if ratio else ''))
  297. def main(obj_paths, **args):
  298. # find sizes
  299. if not args.get('use', None):
  300. # find .o files
  301. paths = []
  302. for path in obj_paths:
  303. if os.path.isdir(path):
  304. path = path + '/*.o'
  305. for path in glob.glob(path):
  306. paths.append(path)
  307. if not paths:
  308. print('no .obj files found in %r?' % obj_paths)
  309. sys.exit(-1)
  310. results = collect(paths, **args)
  311. else:
  312. results = []
  313. with openio(args['use']) as f:
  314. reader = csv.DictReader(f)
  315. for r in reader:
  316. try:
  317. results.append(CodeResult(**{
  318. k: v for k, v in r.items()
  319. if k in CodeResult._fields}))
  320. except TypeError:
  321. pass
  322. # fold to remove duplicates
  323. results = fold(results)
  324. # sort because why not
  325. results.sort()
  326. # write results to CSV
  327. if args.get('output'):
  328. with openio(args['output'], 'w') as f:
  329. writer = csv.DictWriter(f, CodeResult._fields)
  330. writer.writeheader()
  331. for r in results:
  332. writer.writerow(r._asdict())
  333. # find previous results?
  334. if args.get('diff'):
  335. diff_results = []
  336. try:
  337. with openio(args['diff']) as f:
  338. reader = csv.DictReader(f)
  339. for r in reader:
  340. try:
  341. diff_results.append(CodeResult(**{
  342. k: v for k, v in r.items()
  343. if k in CodeResult._fields}))
  344. except TypeError:
  345. pass
  346. except FileNotFoundError:
  347. pass
  348. # fold to remove duplicates
  349. diff_results = fold(diff_results)
  350. # print table
  351. if not args.get('quiet'):
  352. table(
  353. results,
  354. diff_results if args.get('diff') else None,
  355. **args)
  356. if __name__ == "__main__":
  357. import argparse
  358. import sys
  359. parser = argparse.ArgumentParser(
  360. description="Find code size at the function level.")
  361. parser.add_argument(
  362. 'obj_paths',
  363. nargs='*',
  364. default=OBJ_PATHS,
  365. help="Description of where to find *.o files. May be a directory "
  366. "or a list of paths. Defaults to %(default)r.")
  367. parser.add_argument(
  368. '-v', '--verbose',
  369. action='store_true',
  370. help="Output commands that run behind the scenes.")
  371. parser.add_argument(
  372. '-q', '--quiet',
  373. action='store_true',
  374. help="Don't show anything, useful with -o.")
  375. parser.add_argument(
  376. '-o', '--output',
  377. help="Specify CSV file to store results.")
  378. parser.add_argument(
  379. '-u', '--use',
  380. help="Don't parse anything, use this CSV file.")
  381. parser.add_argument(
  382. '-d', '--diff',
  383. help="Specify CSV file to diff against.")
  384. parser.add_argument(
  385. '-a', '--all',
  386. action='store_true',
  387. help="Show all, not just the ones that changed.")
  388. parser.add_argument(
  389. '-p', '--percent',
  390. action='store_true',
  391. help="Only show percentage change, not a full diff.")
  392. parser.add_argument(
  393. '-b', '--by-file',
  394. action='store_true',
  395. help="Group by file. Note this does not include padding "
  396. "so sizes may differ from other tools.")
  397. parser.add_argument(
  398. '-s', '--size-sort',
  399. action='store_true',
  400. help="Sort by size.")
  401. parser.add_argument(
  402. '-S', '--reverse-size-sort',
  403. action='store_true',
  404. help="Sort by size, but backwards.")
  405. parser.add_argument(
  406. '-Y', '--summary',
  407. action='store_true',
  408. help="Only show the total size.")
  409. parser.add_argument(
  410. '-A', '--everything',
  411. action='store_true',
  412. help="Include builtin and libc specific symbols.")
  413. parser.add_argument(
  414. '--type',
  415. default=TYPE,
  416. help="Type of symbols to report, this uses the same single-character "
  417. "type-names emitted by nm. Defaults to %(default)r.")
  418. parser.add_argument(
  419. '--nm-tool',
  420. type=lambda x: x.split(),
  421. default=NM_TOOL,
  422. help="Path to the nm tool to use. Defaults to %(default)r")
  423. parser.add_argument(
  424. '--build-dir',
  425. help="Specify the relative build directory. Used to map object files "
  426. "to the correct source files.")
  427. sys.exit(main(**{k: v
  428. for k, v in vars(parser.parse_args()).items()
  429. if v is not None}))