code.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find code size at the function level. Basically just a big wrapper
  4. # around nm with some extra conveniences for comparing builds. Heavily inspired
  5. # by Linux's Bloat-O-Meter.
  6. #
  7. # Example:
  8. # ./scripts/code.py lfs.o lfs_util.o -S
  9. #
  10. # Copyright (c) 2022, The littlefs authors.
  11. # Copyright (c) 2020, Arm Limited. All rights reserved.
  12. # SPDX-License-Identifier: BSD-3-Clause
  13. #
  14. import collections as co
  15. import csv
  16. import glob
  17. import itertools as it
  18. import math as m
  19. import os
  20. import re
  21. import shlex
  22. import subprocess as sp
  23. OBJ_PATHS = ['*.o']
  24. NM_TOOL = ['nm']
  25. TYPE = 'tTrRdD'
  26. # integer fields
  27. class IntField(co.namedtuple('IntField', 'x')):
  28. __slots__ = ()
  29. def __new__(cls, x=0):
  30. if isinstance(x, IntField):
  31. return x
  32. if isinstance(x, str):
  33. try:
  34. x = int(x, 0)
  35. except ValueError:
  36. # also accept +-∞ and +-inf
  37. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  38. x = m.inf
  39. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  40. x = -m.inf
  41. else:
  42. raise
  43. assert isinstance(x, int) or m.isinf(x), x
  44. return super().__new__(cls, x)
  45. def __str__(self):
  46. if self.x == m.inf:
  47. return '∞'
  48. elif self.x == -m.inf:
  49. return '-∞'
  50. else:
  51. return str(self.x)
  52. def __int__(self):
  53. assert not m.isinf(self.x)
  54. return self.x
  55. def __float__(self):
  56. return float(self.x)
  57. none = '%7s' % '-'
  58. def table(self):
  59. return '%7s' % (self,)
  60. diff_none = '%7s' % '-'
  61. diff_table = table
  62. def diff_diff(self, other):
  63. new = self.x if self else 0
  64. old = other.x if other else 0
  65. diff = new - old
  66. if diff == +m.inf:
  67. return '%7s' % '+∞'
  68. elif diff == -m.inf:
  69. return '%7s' % '-∞'
  70. else:
  71. return '%+7d' % diff
  72. def ratio(self, other):
  73. new = self.x if self else 0
  74. old = other.x if other else 0
  75. if m.isinf(new) and m.isinf(old):
  76. return 0.0
  77. elif m.isinf(new):
  78. return +m.inf
  79. elif m.isinf(old):
  80. return -m.inf
  81. elif not old and not new:
  82. return 0.0
  83. elif not old:
  84. return 1.0
  85. else:
  86. return (new-old) / old
  87. def __add__(self, other):
  88. return IntField(self.x + other.x)
  89. def __sub__(self, other):
  90. return IntField(self.x - other.x)
  91. def __mul__(self, other):
  92. return IntField(self.x * other.x)
  93. def __lt__(self, other):
  94. return self.x < other.x
  95. def __gt__(self, other):
  96. return self.__class__.__lt__(other, self)
  97. def __le__(self, other):
  98. return not self.__gt__(other)
  99. def __ge__(self, other):
  100. return not self.__lt__(other)
  101. # code size results
  102. class CodeResult(co.namedtuple('CodeResult', 'file,function,code_size')):
  103. __slots__ = ()
  104. def __new__(cls, file, function, code_size):
  105. return super().__new__(cls, file, function, IntField(code_size))
  106. def __add__(self, other):
  107. return CodeResult(self.file, self.function,
  108. self.code_size + other.code_size)
  109. def openio(path, mode='r'):
  110. if path == '-':
  111. if mode == 'r':
  112. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  113. else:
  114. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  115. else:
  116. return open(path, mode)
  117. def collect(paths, *,
  118. nm_tool=NM_TOOL,
  119. type=TYPE,
  120. build_dir=None,
  121. everything=False,
  122. **args):
  123. results = []
  124. pattern = re.compile(
  125. '^(?P<size>[0-9a-fA-F]+)' +
  126. ' (?P<type>[%s])' % re.escape(type) +
  127. ' (?P<func>.+?)$')
  128. for path in paths:
  129. # map to source file
  130. src_path = re.sub('\.o$', '.c', path)
  131. if build_dir:
  132. src_path = re.sub('%s/*' % re.escape(build_dir), '',
  133. src_path)
  134. # note nm-tool may contain extra args
  135. cmd = nm_tool + ['--size-sort', path]
  136. if args.get('verbose'):
  137. print(' '.join(shlex.quote(c) for c in cmd))
  138. proc = sp.Popen(cmd,
  139. stdout=sp.PIPE,
  140. stderr=sp.PIPE if not args.get('verbose') else None,
  141. universal_newlines=True,
  142. errors='replace')
  143. for line in proc.stdout:
  144. m = pattern.match(line)
  145. if m:
  146. func = m.group('func')
  147. # discard internal functions
  148. if not everything and func.startswith('__'):
  149. continue
  150. # discard .8449 suffixes created by optimizer
  151. func = re.sub('\.[0-9]+', '', func)
  152. results.append(CodeResult(
  153. src_path, func,
  154. int(m.group('size'), 16)))
  155. proc.wait()
  156. if proc.returncode != 0:
  157. if not args.get('verbose'):
  158. for line in proc.stderr:
  159. sys.stdout.write(line)
  160. sys.exit(-1)
  161. return results
  162. def fold(results, *,
  163. by=['file', 'function'],
  164. **_):
  165. folding = co.OrderedDict()
  166. for r in results:
  167. name = tuple(getattr(r, k) for k in by)
  168. if name not in folding:
  169. folding[name] = []
  170. folding[name].append(r)
  171. folded = []
  172. for rs in folding.values():
  173. folded.append(sum(rs[1:], start=rs[0]))
  174. return folded
  175. def table(results, diff_results=None, *,
  176. by_file=False,
  177. size_sort=False,
  178. reverse_size_sort=False,
  179. summary=False,
  180. all=False,
  181. percent=False,
  182. **_):
  183. all_, all = all, __builtins__.all
  184. # fold
  185. results = fold(results, by=['file' if by_file else 'function'])
  186. if diff_results is not None:
  187. diff_results = fold(diff_results,
  188. by=['file' if by_file else 'function'])
  189. table = {
  190. r.file if by_file else r.function: r
  191. for r in results}
  192. diff_table = {
  193. r.file if by_file else r.function: r
  194. for r in diff_results or []}
  195. # sort, note that python's sort is stable
  196. names = list(table.keys() | diff_table.keys())
  197. names.sort()
  198. if diff_results is not None:
  199. names.sort(key=lambda n: -IntField.ratio(
  200. table[n].code_size if n in table else None,
  201. diff_table[n].code_size if n in diff_table else None))
  202. if size_sort:
  203. names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
  204. reverse=True)
  205. elif reverse_size_sort:
  206. names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
  207. reverse=False)
  208. # print header
  209. print('%-36s' % ('%s%s' % (
  210. 'file' if by_file else 'function',
  211. ' (%d added, %d removed)' % (
  212. sum(1 for n in table if n not in diff_table),
  213. sum(1 for n in diff_table if n not in table))
  214. if diff_results is not None and not percent else '')
  215. if not summary else ''),
  216. end='')
  217. if diff_results is None:
  218. print(' %s' % ('size'.rjust(len(IntField.none))))
  219. elif percent:
  220. print(' %s' % ('size'.rjust(len(IntField.diff_none))))
  221. else:
  222. print(' %s %s %s' % (
  223. 'old'.rjust(len(IntField.diff_none)),
  224. 'new'.rjust(len(IntField.diff_none)),
  225. 'diff'.rjust(len(IntField.diff_none))))
  226. # print entries
  227. if not summary:
  228. for name in names:
  229. r = table.get(name)
  230. if diff_results is not None:
  231. diff_r = diff_table.get(name)
  232. ratio = IntField.ratio(
  233. r.code_size if r else None,
  234. diff_r.code_size if diff_r else None)
  235. if not ratio and not all_:
  236. continue
  237. print('%-36s' % name, end='')
  238. if diff_results is None:
  239. print(' %s' % (
  240. r.code_size.table()
  241. if r else IntField.none))
  242. elif percent:
  243. print(' %s%s' % (
  244. r.code_size.diff_table()
  245. if r else IntField.diff_none,
  246. ' (%s)' % (
  247. '+∞%' if ratio == +m.inf
  248. else '-∞%' if ratio == -m.inf
  249. else '%+.1f%%' % (100*ratio))))
  250. else:
  251. print(' %s %s %s%s' % (
  252. diff_r.code_size.diff_table()
  253. if diff_r else IntField.diff_none,
  254. r.code_size.diff_table()
  255. if r else IntField.diff_none,
  256. IntField.diff_diff(
  257. r.code_size if r else None,
  258. diff_r.code_size if diff_r else None)
  259. if r or diff_r else IntField.diff_none,
  260. ' (%s)' % (
  261. '+∞%' if ratio == +m.inf
  262. else '-∞%' if ratio == -m.inf
  263. else '%+.1f%%' % (100*ratio))
  264. if ratio else ''))
  265. # print total
  266. total = fold(results, by=[])
  267. r = total[0] if total else None
  268. if diff_results is not None:
  269. diff_total = fold(diff_results, by=[])
  270. diff_r = diff_total[0] if diff_total else None
  271. ratio = IntField.ratio(
  272. r.code_size if r else None,
  273. diff_r.code_size if diff_r else None)
  274. print('%-36s' % 'TOTAL', end='')
  275. if diff_results is None:
  276. print(' %s' % (
  277. r.code_size.table()
  278. if r else IntField.none))
  279. elif percent:
  280. print(' %s%s' % (
  281. r.code_size.diff_table()
  282. if r else IntField.diff_none,
  283. ' (%s)' % (
  284. '+∞%' if ratio == +m.inf
  285. else '-∞%' if ratio == -m.inf
  286. else '%+.1f%%' % (100*ratio))))
  287. else:
  288. print(' %s %s %s%s' % (
  289. diff_r.code_size.diff_table()
  290. if diff_r else IntField.diff_none,
  291. r.code_size.diff_table()
  292. if r else IntField.diff_none,
  293. IntField.diff_diff(
  294. r.code_size if r else None,
  295. diff_r.code_size if diff_r else None)
  296. if r or diff_r else IntField.diff_none,
  297. ' (%s)' % (
  298. '+∞%' if ratio == +m.inf
  299. else '-∞%' if ratio == -m.inf
  300. else '%+.1f%%' % (100*ratio))
  301. if ratio else ''))
  302. def main(obj_paths, **args):
  303. # find sizes
  304. if not args.get('use', None):
  305. # find .o files
  306. paths = []
  307. for path in obj_paths:
  308. if os.path.isdir(path):
  309. path = path + '/*.o'
  310. for path in glob.glob(path):
  311. paths.append(path)
  312. if not paths:
  313. print('no .obj files found in %r?' % obj_paths)
  314. sys.exit(-1)
  315. results = collect(paths, **args)
  316. else:
  317. results = []
  318. with openio(args['use']) as f:
  319. reader = csv.DictReader(f, restval='')
  320. for r in reader:
  321. try:
  322. results.append(CodeResult(**{
  323. k: v for k, v in r.items()
  324. if k in CodeResult._fields}))
  325. except TypeError:
  326. pass
  327. # fold to remove duplicates
  328. results = fold(results)
  329. # sort because why not
  330. results.sort()
  331. # write results to CSV
  332. if args.get('output'):
  333. with openio(args['output'], 'w') as f:
  334. writer = csv.DictWriter(f, CodeResult._fields)
  335. writer.writeheader()
  336. for r in results:
  337. writer.writerow(r._asdict())
  338. # find previous results?
  339. if args.get('diff'):
  340. diff_results = []
  341. try:
  342. with openio(args['diff']) as f:
  343. reader = csv.DictReader(f, restval='')
  344. for r in reader:
  345. try:
  346. diff_results.append(CodeResult(**{
  347. k: v for k, v in r.items()
  348. if k in CodeResult._fields}))
  349. except TypeError:
  350. pass
  351. except FileNotFoundError:
  352. pass
  353. # fold to remove duplicates
  354. diff_results = fold(diff_results)
  355. # print table
  356. if not args.get('quiet'):
  357. table(
  358. results,
  359. diff_results if args.get('diff') else None,
  360. **args)
  361. if __name__ == "__main__":
  362. import argparse
  363. import sys
  364. parser = argparse.ArgumentParser(
  365. description="Find code size at the function level.")
  366. parser.add_argument(
  367. 'obj_paths',
  368. nargs='*',
  369. default=OBJ_PATHS,
  370. help="Description of where to find *.o files. May be a directory "
  371. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  372. parser.add_argument(
  373. '-v', '--verbose',
  374. action='store_true',
  375. help="Output commands that run behind the scenes.")
  376. parser.add_argument(
  377. '-q', '--quiet',
  378. action='store_true',
  379. help="Don't show anything, useful with -o.")
  380. parser.add_argument(
  381. '-o', '--output',
  382. help="Specify CSV file to store results.")
  383. parser.add_argument(
  384. '-u', '--use',
  385. help="Don't parse anything, use this CSV file.")
  386. parser.add_argument(
  387. '-d', '--diff',
  388. help="Specify CSV file to diff against.")
  389. parser.add_argument(
  390. '-a', '--all',
  391. action='store_true',
  392. help="Show all, not just the ones that changed.")
  393. parser.add_argument(
  394. '-p', '--percent',
  395. action='store_true',
  396. help="Only show percentage change, not a full diff.")
  397. parser.add_argument(
  398. '-b', '--by-file',
  399. action='store_true',
  400. help="Group by file. Note this does not include padding "
  401. "so sizes may differ from other tools.")
  402. parser.add_argument(
  403. '-s', '--size-sort',
  404. action='store_true',
  405. help="Sort by size.")
  406. parser.add_argument(
  407. '-S', '--reverse-size-sort',
  408. action='store_true',
  409. help="Sort by size, but backwards.")
  410. parser.add_argument(
  411. '-Y', '--summary',
  412. action='store_true',
  413. help="Only show the total size.")
  414. parser.add_argument(
  415. '-A', '--everything',
  416. action='store_true',
  417. help="Include builtin and libc specific symbols.")
  418. parser.add_argument(
  419. '--type',
  420. default=TYPE,
  421. help="Type of symbols to report, this uses the same single-character "
  422. "type-names emitted by nm. Defaults to %r." % TYPE)
  423. parser.add_argument(
  424. '--nm-tool',
  425. type=lambda x: x.split(),
  426. default=NM_TOOL,
  427. help="Path to the nm tool to use. Defaults to %r." % NM_TOOL)
  428. parser.add_argument(
  429. '--build-dir',
  430. help="Specify the relative build directory. Used to map object files "
  431. "to the correct source files.")
  432. sys.exit(main(**{k: v
  433. for k, v in vars(parser.parse_intermixed_args()).items()
  434. if v is not None}))