code.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find code size at the function level. Basically just a big wrapper
  4. # around nm with some extra conveniences for comparing builds. Heavily inspired
  5. # by Linux's Bloat-O-Meter.
  6. #
  7. # Example:
  8. # ./scripts/code.py lfs.o lfs_util.o -S
  9. #
  10. # Copyright (c) 2022, The littlefs authors.
  11. # Copyright (c) 2020, Arm Limited. All rights reserved.
  12. # SPDX-License-Identifier: BSD-3-Clause
  13. #
  14. import collections as co
  15. import csv
  16. import glob
  17. import itertools as it
  18. import math as m
  19. import os
  20. import re
  21. import shlex
  22. import subprocess as sp
  23. OBJ_PATHS = ['*.o']
  24. NM_TOOL = ['nm']
  25. TYPE = 'tTrRdD'
  26. # integer fields
  27. class IntField(co.namedtuple('IntField', 'x')):
  28. __slots__ = ()
  29. def __new__(cls, x):
  30. if isinstance(x, IntField):
  31. return x
  32. if isinstance(x, str):
  33. try:
  34. x = int(x, 0)
  35. except ValueError:
  36. # also accept +-∞ and +-inf
  37. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  38. x = float('inf')
  39. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  40. x = float('-inf')
  41. else:
  42. raise
  43. return super().__new__(cls, x)
  44. def __int__(self):
  45. assert not m.isinf(self.x)
  46. return self.x
  47. def __float__(self):
  48. return float(self.x)
  49. def __str__(self):
  50. if self.x == float('inf'):
  51. return '∞'
  52. elif self.x == float('-inf'):
  53. return '-∞'
  54. else:
  55. return str(self.x)
  56. none = '%7s' % '-'
  57. def table(self):
  58. return '%7s' % (self,)
  59. diff_none = '%7s' % '-'
  60. diff_table = table
  61. def diff_diff(self, other):
  62. new = self.x if self else 0
  63. old = other.x if other else 0
  64. diff = new - old
  65. if diff == float('+inf'):
  66. return '%7s' % '+∞'
  67. elif diff == float('-inf'):
  68. return '%7s' % '-∞'
  69. else:
  70. return '%+7d' % diff
  71. def ratio(self, other):
  72. new = self.x if self else 0
  73. old = other.x if other else 0
  74. if m.isinf(new) and m.isinf(old):
  75. return 0.0
  76. elif m.isinf(new):
  77. return float('+inf')
  78. elif m.isinf(old):
  79. return float('-inf')
  80. elif not old and not new:
  81. return 0.0
  82. elif not old:
  83. return 1.0
  84. else:
  85. return (new-old) / old
  86. def __add__(self, other):
  87. return IntField(self.x + other.x)
  88. def __mul__(self, other):
  89. return IntField(self.x * other.x)
  90. def __lt__(self, other):
  91. return self.x < other.x
  92. def __gt__(self, other):
  93. return self.__class__.__lt__(other, self)
  94. def __le__(self, other):
  95. return not self.__gt__(other)
  96. def __ge__(self, other):
  97. return not self.__lt__(other)
  98. def __truediv__(self, n):
  99. if m.isinf(self.x):
  100. return self
  101. else:
  102. return IntField(round(self.x / n))
  103. # code size results
  104. class CodeResult(co.namedtuple('CodeResult', 'file,function,code_size')):
  105. __slots__ = ()
  106. def __new__(cls, file, function, code_size):
  107. return super().__new__(cls, file, function, IntField(code_size))
  108. def __add__(self, other):
  109. return CodeResult(self.file, self.function,
  110. self.code_size + other.code_size)
  111. def openio(path, mode='r'):
  112. if path == '-':
  113. if mode == 'r':
  114. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  115. else:
  116. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  117. else:
  118. return open(path, mode)
  119. def collect(paths, *,
  120. nm_tool=NM_TOOL,
  121. type=TYPE,
  122. build_dir=None,
  123. everything=False,
  124. **args):
  125. results = []
  126. pattern = re.compile(
  127. '^(?P<size>[0-9a-fA-F]+)' +
  128. ' (?P<type>[%s])' % re.escape(type) +
  129. ' (?P<func>.+?)$')
  130. for path in paths:
  131. # map to source file
  132. src_path = re.sub('\.o$', '.c', path)
  133. if build_dir:
  134. src_path = re.sub('%s/*' % re.escape(build_dir), '',
  135. src_path)
  136. # note nm-tool may contain extra args
  137. cmd = nm_tool + ['--size-sort', path]
  138. if args.get('verbose'):
  139. print(' '.join(shlex.quote(c) for c in cmd))
  140. proc = sp.Popen(cmd,
  141. stdout=sp.PIPE,
  142. stderr=sp.PIPE if not args.get('verbose') else None,
  143. universal_newlines=True,
  144. errors='replace')
  145. for line in proc.stdout:
  146. m = pattern.match(line)
  147. if m:
  148. func = m.group('func')
  149. # discard internal functions
  150. if not everything and func.startswith('__'):
  151. continue
  152. # discard .8449 suffixes created by optimizer
  153. func = re.sub('\.[0-9]+', '', func)
  154. results.append(CodeResult(
  155. src_path, func,
  156. int(m.group('size'), 16)))
  157. proc.wait()
  158. if proc.returncode != 0:
  159. if not args.get('verbose'):
  160. for line in proc.stderr:
  161. sys.stdout.write(line)
  162. sys.exit(-1)
  163. return results
  164. def fold(results, *,
  165. by=['file', 'function'],
  166. **_):
  167. folding = co.OrderedDict()
  168. for r in results:
  169. name = tuple(getattr(r, k) for k in by)
  170. if name not in folding:
  171. folding[name] = []
  172. folding[name].append(r)
  173. folded = []
  174. for rs in folding.values():
  175. folded.append(sum(rs[1:], start=rs[0]))
  176. return folded
  177. def table(results, diff_results=None, *,
  178. by_file=False,
  179. size_sort=False,
  180. reverse_size_sort=False,
  181. summary=False,
  182. all=False,
  183. percent=False,
  184. **_):
  185. all_, all = all, __builtins__.all
  186. # fold
  187. results = fold(results, by=['file' if by_file else 'function'])
  188. if diff_results is not None:
  189. diff_results = fold(diff_results,
  190. by=['file' if by_file else 'function'])
  191. table = {
  192. r.file if by_file else r.function: r
  193. for r in results}
  194. diff_table = {
  195. r.file if by_file else r.function: r
  196. for r in diff_results or []}
  197. # sort, note that python's sort is stable
  198. names = list(table.keys() | diff_table.keys())
  199. names.sort()
  200. if diff_results is not None:
  201. names.sort(key=lambda n: -IntField.ratio(
  202. table[n].code_size if n in table else None,
  203. diff_table[n].code_size if n in diff_table else None))
  204. if size_sort:
  205. names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
  206. reverse=True)
  207. elif reverse_size_sort:
  208. names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
  209. reverse=False)
  210. # print header
  211. print('%-36s' % ('%s%s' % (
  212. 'file' if by_file else 'function',
  213. ' (%d added, %d removed)' % (
  214. sum(1 for n in table if n not in diff_table),
  215. sum(1 for n in diff_table if n not in table))
  216. if diff_results is not None and not percent else '')
  217. if not summary else ''),
  218. end='')
  219. if diff_results is None:
  220. print(' %s' % ('size'.rjust(len(IntField.none))))
  221. elif percent:
  222. print(' %s' % ('size'.rjust(len(IntField.diff_none))))
  223. else:
  224. print(' %s %s %s' % (
  225. 'old'.rjust(len(IntField.diff_none)),
  226. 'new'.rjust(len(IntField.diff_none)),
  227. 'diff'.rjust(len(IntField.diff_none))))
  228. # print entries
  229. if not summary:
  230. for name in names:
  231. r = table.get(name)
  232. if diff_results is not None:
  233. diff_r = diff_table.get(name)
  234. ratio = IntField.ratio(
  235. r.code_size if r else None,
  236. diff_r.code_size if diff_r else None)
  237. if not ratio and not all_:
  238. continue
  239. print('%-36s' % name, end='')
  240. if diff_results is None:
  241. print(' %s' % (
  242. r.code_size.table()
  243. if r else IntField.none))
  244. elif percent:
  245. print(' %s%s' % (
  246. r.code_size.diff_table()
  247. if r else IntField.diff_none,
  248. ' (%s)' % (
  249. '+∞%' if ratio == float('+inf')
  250. else '-∞%' if ratio == float('-inf')
  251. else '%+.1f%%' % (100*ratio))))
  252. else:
  253. print(' %s %s %s%s' % (
  254. diff_r.code_size.diff_table()
  255. if diff_r else IntField.diff_none,
  256. r.code_size.diff_table()
  257. if r else IntField.diff_none,
  258. IntField.diff_diff(
  259. r.code_size if r else None,
  260. diff_r.code_size if diff_r else None)
  261. if r or diff_r else IntField.diff_none,
  262. ' (%s)' % (
  263. '+∞%' if ratio == float('+inf')
  264. else '-∞%' if ratio == float('-inf')
  265. else '%+.1f%%' % (100*ratio))
  266. if ratio else ''))
  267. # print total
  268. total = fold(results, by=[])
  269. r = total[0] if total else None
  270. if diff_results is not None:
  271. diff_total = fold(diff_results, by=[])
  272. diff_r = diff_total[0] if diff_total else None
  273. ratio = IntField.ratio(
  274. r.code_size if r else None,
  275. diff_r.code_size if diff_r else None)
  276. print('%-36s' % 'TOTAL', end='')
  277. if diff_results is None:
  278. print(' %s' % (
  279. r.code_size.table()
  280. if r else IntField.none))
  281. elif percent:
  282. print(' %s%s' % (
  283. r.code_size.diff_table()
  284. if r else IntField.diff_none,
  285. ' (%s)' % (
  286. '+∞%' if ratio == float('+inf')
  287. else '-∞%' if ratio == float('-inf')
  288. else '%+.1f%%' % (100*ratio))))
  289. else:
  290. print(' %s %s %s%s' % (
  291. diff_r.code_size.diff_table()
  292. if diff_r else IntField.diff_none,
  293. r.code_size.diff_table()
  294. if r else IntField.diff_none,
  295. IntField.diff_diff(
  296. r.code_size if r else None,
  297. diff_r.code_size if diff_r else None)
  298. if r or diff_r else IntField.diff_none,
  299. ' (%s)' % (
  300. '+∞%' if ratio == float('+inf')
  301. else '-∞%' if ratio == float('-inf')
  302. else '%+.1f%%' % (100*ratio))
  303. if ratio else ''))
  304. def main(obj_paths, **args):
  305. # find sizes
  306. if not args.get('use', None):
  307. # find .o files
  308. paths = []
  309. for path in obj_paths:
  310. if os.path.isdir(path):
  311. path = path + '/*.o'
  312. for path in glob.glob(path):
  313. paths.append(path)
  314. if not paths:
  315. print('no .obj files found in %r?' % obj_paths)
  316. sys.exit(-1)
  317. results = collect(paths, **args)
  318. else:
  319. results = []
  320. with openio(args['use']) as f:
  321. reader = csv.DictReader(f, restval='')
  322. for r in reader:
  323. try:
  324. results.append(CodeResult(**{
  325. k: v for k, v in r.items()
  326. if k in CodeResult._fields}))
  327. except TypeError:
  328. pass
  329. # fold to remove duplicates
  330. results = fold(results)
  331. # sort because why not
  332. results.sort()
  333. # write results to CSV
  334. if args.get('output'):
  335. with openio(args['output'], 'w') as f:
  336. writer = csv.DictWriter(f, CodeResult._fields)
  337. writer.writeheader()
  338. for r in results:
  339. writer.writerow(r._asdict())
  340. # find previous results?
  341. if args.get('diff'):
  342. diff_results = []
  343. try:
  344. with openio(args['diff']) as f:
  345. reader = csv.DictReader(f, restval='')
  346. for r in reader:
  347. try:
  348. diff_results.append(CodeResult(**{
  349. k: v for k, v in r.items()
  350. if k in CodeResult._fields}))
  351. except TypeError:
  352. pass
  353. except FileNotFoundError:
  354. pass
  355. # fold to remove duplicates
  356. diff_results = fold(diff_results)
  357. # print table
  358. if not args.get('quiet'):
  359. table(
  360. results,
  361. diff_results if args.get('diff') else None,
  362. **args)
  363. if __name__ == "__main__":
  364. import argparse
  365. import sys
  366. parser = argparse.ArgumentParser(
  367. description="Find code size at the function level.")
  368. parser.add_argument(
  369. 'obj_paths',
  370. nargs='*',
  371. default=OBJ_PATHS,
  372. help="Description of where to find *.o files. May be a directory "
  373. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  374. parser.add_argument(
  375. '-v', '--verbose',
  376. action='store_true',
  377. help="Output commands that run behind the scenes.")
  378. parser.add_argument(
  379. '-q', '--quiet',
  380. action='store_true',
  381. help="Don't show anything, useful with -o.")
  382. parser.add_argument(
  383. '-o', '--output',
  384. help="Specify CSV file to store results.")
  385. parser.add_argument(
  386. '-u', '--use',
  387. help="Don't parse anything, use this CSV file.")
  388. parser.add_argument(
  389. '-d', '--diff',
  390. help="Specify CSV file to diff against.")
  391. parser.add_argument(
  392. '-a', '--all',
  393. action='store_true',
  394. help="Show all, not just the ones that changed.")
  395. parser.add_argument(
  396. '-p', '--percent',
  397. action='store_true',
  398. help="Only show percentage change, not a full diff.")
  399. parser.add_argument(
  400. '-b', '--by-file',
  401. action='store_true',
  402. help="Group by file. Note this does not include padding "
  403. "so sizes may differ from other tools.")
  404. parser.add_argument(
  405. '-s', '--size-sort',
  406. action='store_true',
  407. help="Sort by size.")
  408. parser.add_argument(
  409. '-S', '--reverse-size-sort',
  410. action='store_true',
  411. help="Sort by size, but backwards.")
  412. parser.add_argument(
  413. '-Y', '--summary',
  414. action='store_true',
  415. help="Only show the total size.")
  416. parser.add_argument(
  417. '-A', '--everything',
  418. action='store_true',
  419. help="Include builtin and libc specific symbols.")
  420. parser.add_argument(
  421. '--type',
  422. default=TYPE,
  423. help="Type of symbols to report, this uses the same single-character "
  424. "type-names emitted by nm. Defaults to %r." % TYPE)
  425. parser.add_argument(
  426. '--nm-tool',
  427. type=lambda x: x.split(),
  428. default=NM_TOOL,
  429. help="Path to the nm tool to use. Defaults to %r." % NM_TOOL)
  430. parser.add_argument(
  431. '--build-dir',
  432. help="Specify the relative build directory. Used to map object files "
  433. "to the correct source files.")
  434. sys.exit(main(**{k: v
  435. for k, v in vars(parser.parse_intermixed_args()).items()
  436. if v is not None}))