code.py 18 KB


  1. #!/usr/bin/env python3
  2. #
  3. # Script to find code size at the function level. Basically just a big wrapper
  4. # around nm with some extra conveniences for comparing builds. Heavily inspired
  5. # by Linux's Bloat-O-Meter.
  6. #
  7. # Example:
  8. # ./scripts/code.py lfs.o lfs_util.o -S
  9. #
  10. # Copyright (c) 2022, The littlefs authors.
  11. # Copyright (c) 2020, Arm Limited. All rights reserved.
  12. # SPDX-License-Identifier: BSD-3-Clause
  13. #
  14. import collections as co
  15. import csv
  16. import glob
  17. import itertools as it
  18. import math as m
  19. import os
  20. import re
  21. import shlex
  22. import subprocess as sp
  23. OBJ_PATHS = ['*.o']
  24. NM_TOOL = ['nm']
  25. TYPE = 'tTrRdD'
  26. # integer fields
  27. class Int(co.namedtuple('Int', 'x')):
  28. __slots__ = ()
  29. def __new__(cls, x=0):
  30. if isinstance(x, Int):
  31. return x
  32. if isinstance(x, str):
  33. try:
  34. x = int(x, 0)
  35. except ValueError:
  36. # also accept +-∞ and +-inf
  37. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  38. x = m.inf
  39. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  40. x = -m.inf
  41. else:
  42. raise
  43. assert isinstance(x, int) or m.isinf(x), x
  44. return super().__new__(cls, x)
  45. def __str__(self):
  46. if self.x == m.inf:
  47. return '∞'
  48. elif self.x == -m.inf:
  49. return '-∞'
  50. else:
  51. return str(self.x)
  52. def __int__(self):
  53. assert not m.isinf(self.x)
  54. return self.x
  55. def __float__(self):
  56. return float(self.x)
  57. none = '%7s' % '-'
  58. def table(self):
  59. return '%7s' % (self,)
  60. diff_none = '%7s' % '-'
  61. diff_table = table
  62. def diff_diff(self, other):
  63. new = self.x if self else 0
  64. old = other.x if other else 0
  65. diff = new - old
  66. if diff == +m.inf:
  67. return '%7s' % '+∞'
  68. elif diff == -m.inf:
  69. return '%7s' % '-∞'
  70. else:
  71. return '%+7d' % diff
  72. def ratio(self, other):
  73. new = self.x if self else 0
  74. old = other.x if other else 0
  75. if m.isinf(new) and m.isinf(old):
  76. return 0.0
  77. elif m.isinf(new):
  78. return +m.inf
  79. elif m.isinf(old):
  80. return -m.inf
  81. elif not old and not new:
  82. return 0.0
  83. elif not old:
  84. return 1.0
  85. else:
  86. return (new-old) / old
  87. def __add__(self, other):
  88. return self.__class__(self.x + other.x)
  89. def __sub__(self, other):
  90. return self.__class__(self.x - other.x)
  91. def __mul__(self, other):
  92. return self.__class__(self.x * other.x)
  93. # code size results
  94. class CodeResult(co.namedtuple('CodeResult', [
  95. 'file', 'function',
  96. 'size'])):
  97. _by = ['file', 'function']
  98. _fields = ['size']
  99. _types = {'size': Int}
  100. __slots__ = ()
  101. def __new__(cls, file='', function='', size=0):
  102. return super().__new__(cls, file, function,
  103. Int(size))
  104. def __add__(self, other):
  105. return CodeResult(self.file, self.function,
  106. self.size + other.size)
  107. def openio(path, mode='r'):
  108. if path == '-':
  109. if mode == 'r':
  110. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  111. else:
  112. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  113. else:
  114. return open(path, mode)
  115. def collect(paths, *,
  116. nm_tool=NM_TOOL,
  117. type=TYPE,
  118. build_dir=None,
  119. everything=False,
  120. **args):
  121. results = []
  122. pattern = re.compile(
  123. '^(?P<size>[0-9a-fA-F]+)' +
  124. ' (?P<type>[%s])' % re.escape(type) +
  125. ' (?P<func>.+?)$')
  126. for path in paths:
  127. # map to source file
  128. src_path = re.sub('\.o$', '.c', path)
  129. if build_dir:
  130. src_path = re.sub('%s/*' % re.escape(build_dir), '',
  131. src_path)
  132. # note nm-tool may contain extra args
  133. cmd = nm_tool + ['--size-sort', path]
  134. if args.get('verbose'):
  135. print(' '.join(shlex.quote(c) for c in cmd))
  136. proc = sp.Popen(cmd,
  137. stdout=sp.PIPE,
  138. stderr=sp.PIPE if not args.get('verbose') else None,
  139. universal_newlines=True,
  140. errors='replace')
  141. for line in proc.stdout:
  142. m = pattern.match(line)
  143. if m:
  144. func = m.group('func')
  145. # discard internal functions
  146. if not everything and func.startswith('__'):
  147. continue
  148. # discard .8449 suffixes created by optimizer
  149. func = re.sub('\.[0-9]+', '', func)
  150. results.append(CodeResult(
  151. src_path, func,
  152. int(m.group('size'), 16)))
  153. proc.wait()
  154. if proc.returncode != 0:
  155. if not args.get('verbose'):
  156. for line in proc.stderr:
  157. sys.stdout.write(line)
  158. sys.exit(-1)
  159. return results
  160. def fold(Result, results, *,
  161. by=None,
  162. defines=None,
  163. **_):
  164. if by is None:
  165. by = Result._by
  166. for k in it.chain(by or [], (k for k, _ in defines or [])):
  167. if k not in Result._by and k not in Result._fields:
  168. print("error: could not find field %r?" % k)
  169. sys.exit(-1)
  170. # filter by matching defines
  171. if defines is not None:
  172. results_ = []
  173. for r in results:
  174. if all(getattr(r, k) in vs for k, vs in defines):
  175. results_.append(r)
  176. results = results_
  177. # organize results into conflicts
  178. folding = co.OrderedDict()
  179. for r in results:
  180. name = tuple(getattr(r, k) for k in by)
  181. if name not in folding:
  182. folding[name] = []
  183. folding[name].append(r)
  184. # merge conflicts
  185. folded = []
  186. for name, rs in folding.items():
  187. folded.append(sum(rs[1:], start=rs[0]))
  188. return folded
  189. def table(Result, results, diff_results=None, *,
  190. by=None,
  191. fields=None,
  192. sort=None,
  193. summary=False,
  194. all=False,
  195. percent=False,
  196. **_):
  197. all_, all = all, __builtins__.all
  198. if by is None:
  199. by = Result._by
  200. if fields is None:
  201. fields = Result._fields
  202. types = Result._types
  203. # fold again
  204. results = fold(Result, results, by=by)
  205. if diff_results is not None:
  206. diff_results = fold(Result, diff_results, by=by)
  207. # organize by name
  208. table = {
  209. ','.join(str(getattr(r, k) or '') for k in by): r
  210. for r in results}
  211. diff_table = {
  212. ','.join(str(getattr(r, k) or '') for k in by): r
  213. for r in diff_results or []}
  214. names = list(table.keys() | diff_table.keys())
  215. # sort again, now with diff info, note that python's sort is stable
  216. names.sort()
  217. if diff_results is not None:
  218. names.sort(key=lambda n: tuple(
  219. types[k].ratio(
  220. getattr(table.get(n), k, None),
  221. getattr(diff_table.get(n), k, None))
  222. for k in fields),
  223. reverse=True)
  224. if sort:
  225. for k, reverse in reversed(sort):
  226. names.sort(key=lambda n: (getattr(table[n], k),)
  227. if getattr(table.get(n), k, None) is not None else (),
  228. reverse=reverse ^ (not k or k in Result._fields))
  229. # build up our lines
  230. lines = []
  231. # header
  232. line = []
  233. line.append('%s%s' % (
  234. ','.join(by),
  235. ' (%d added, %d removed)' % (
  236. sum(1 for n in table if n not in diff_table),
  237. sum(1 for n in diff_table if n not in table))
  238. if diff_results is not None and not percent else '')
  239. if not summary else '')
  240. if diff_results is None:
  241. for k in fields:
  242. line.append(k)
  243. elif percent:
  244. for k in fields:
  245. line.append(k)
  246. else:
  247. for k in fields:
  248. line.append('o'+k)
  249. for k in fields:
  250. line.append('n'+k)
  251. for k in fields:
  252. line.append('d'+k)
  253. line.append('')
  254. lines.append(line)
  255. # entries
  256. if not summary:
  257. for name in names:
  258. r = table.get(name)
  259. if diff_results is not None:
  260. diff_r = diff_table.get(name)
  261. ratios = [
  262. types[k].ratio(
  263. getattr(r, k, None),
  264. getattr(diff_r, k, None))
  265. for k in fields]
  266. if not any(ratios) and not all_:
  267. continue
  268. line = []
  269. line.append(name)
  270. if diff_results is None:
  271. for k in fields:
  272. line.append(getattr(r, k).table()
  273. if getattr(r, k, None) is not None
  274. else types[k].none)
  275. elif percent:
  276. for k in fields:
  277. line.append(getattr(r, k).diff_table()
  278. if getattr(r, k, None) is not None
  279. else types[k].diff_none)
  280. else:
  281. for k in fields:
  282. line.append(getattr(diff_r, k).diff_table()
  283. if getattr(diff_r, k, None) is not None
  284. else types[k].diff_none)
  285. for k in fields:
  286. line.append(getattr(r, k).diff_table()
  287. if getattr(r, k, None) is not None
  288. else types[k].diff_none)
  289. for k in fields:
  290. line.append(types[k].diff_diff(
  291. getattr(r, k, None),
  292. getattr(diff_r, k, None)))
  293. if diff_results is None:
  294. line.append('')
  295. elif percent:
  296. line.append(' (%s)' % ', '.join(
  297. '+∞%' if t == +m.inf
  298. else '-∞%' if t == -m.inf
  299. else '%+.1f%%' % (100*t)
  300. for t in ratios))
  301. else:
  302. line.append(' (%s)' % ', '.join(
  303. '+∞%' if t == +m.inf
  304. else '-∞%' if t == -m.inf
  305. else '%+.1f%%' % (100*t)
  306. for t in ratios
  307. if t)
  308. if any(ratios) else '')
  309. lines.append(line)
  310. # total
  311. r = next(iter(fold(Result, results, by=[])), None)
  312. if diff_results is not None:
  313. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  314. ratios = [
  315. types[k].ratio(
  316. getattr(r, k, None),
  317. getattr(diff_r, k, None))
  318. for k in fields]
  319. line = []
  320. line.append('TOTAL')
  321. if diff_results is None:
  322. for k in fields:
  323. line.append(getattr(r, k).table()
  324. if getattr(r, k, None) is not None
  325. else types[k].none)
  326. elif percent:
  327. for k in fields:
  328. line.append(getattr(r, k).diff_table()
  329. if getattr(r, k, None) is not None
  330. else types[k].diff_none)
  331. else:
  332. for k in fields:
  333. line.append(getattr(diff_r, k).diff_table()
  334. if getattr(diff_r, k, None) is not None
  335. else types[k].diff_none)
  336. for k in fields:
  337. line.append(getattr(r, k).diff_table()
  338. if getattr(r, k, None) is not None
  339. else types[k].diff_none)
  340. for k in fields:
  341. line.append(types[k].diff_diff(
  342. getattr(r, k, None),
  343. getattr(diff_r, k, None)))
  344. if diff_results is None:
  345. line.append('')
  346. elif percent:
  347. line.append(' (%s)' % ', '.join(
  348. '+∞%' if t == +m.inf
  349. else '-∞%' if t == -m.inf
  350. else '%+.1f%%' % (100*t)
  351. for t in ratios))
  352. else:
  353. line.append(' (%s)' % ', '.join(
  354. '+∞%' if t == +m.inf
  355. else '-∞%' if t == -m.inf
  356. else '%+.1f%%' % (100*t)
  357. for t in ratios
  358. if t)
  359. if any(ratios) else '')
  360. lines.append(line)
  361. # find the best widths, note that column 0 contains the names and column -1
  362. # the ratios, so those are handled a bit differently
  363. widths = [
  364. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  365. for w, i in zip(
  366. it.chain([23], it.repeat(7)),
  367. range(len(lines[0])-1))]
  368. # print our table
  369. for line in lines:
  370. print('%-*s %s%s' % (
  371. widths[0], line[0],
  372. ' '.join('%*s' % (w, x)
  373. for w, x in zip(widths[1:], line[1:-1])),
  374. line[-1]))
  375. def main(obj_paths, *,
  376. by=None,
  377. fields=None,
  378. defines=None,
  379. sort=None,
  380. **args):
  381. # find sizes
  382. if not args.get('use', None):
  383. # find .o files
  384. paths = []
  385. for path in obj_paths:
  386. if os.path.isdir(path):
  387. path = path + '/*.o'
  388. for path in glob.glob(path):
  389. paths.append(path)
  390. if not paths:
  391. print("error: no .obj files found in %r?" % obj_paths)
  392. sys.exit(-1)
  393. results = collect(paths, **args)
  394. else:
  395. results = []
  396. with openio(args['use']) as f:
  397. reader = csv.DictReader(f, restval='')
  398. for r in reader:
  399. try:
  400. results.append(CodeResult(
  401. **{k: r[k] for k in CodeResult._by
  402. if k in r and r[k].strip()},
  403. **{k: r['code_'+k] for k in CodeResult._fields
  404. if 'code_'+k in r and r['code_'+k].strip()}))
  405. except TypeError:
  406. pass
  407. # fold
  408. results = fold(CodeResult, results, by=by, defines=defines)
  409. # sort, note that python's sort is stable
  410. results.sort()
  411. if sort:
  412. for k, reverse in reversed(sort):
  413. results.sort(key=lambda r: (getattr(r, k),)
  414. if getattr(r, k) is not None else (),
  415. reverse=reverse ^ (not k or k in CodeResult._fields))
  416. # write results to CSV
  417. if args.get('output'):
  418. with openio(args['output'], 'w') as f:
  419. writer = csv.DictWriter(f, CodeResult._by
  420. + ['code_'+k for k in CodeResult._fields])
  421. writer.writeheader()
  422. for r in results:
  423. writer.writerow(
  424. {k: getattr(r, k) for k in CodeResult._by}
  425. | {'code_'+k: getattr(r, k) for k in CodeResult._fields})
  426. # find previous results?
  427. if args.get('diff'):
  428. diff_results = []
  429. try:
  430. with openio(args['diff']) as f:
  431. reader = csv.DictReader(f, restval='')
  432. for r in reader:
  433. try:
  434. diff_results.append(CodeResult(
  435. **{k: r[k] for k in CodeResult._by
  436. if k in r and r[k].strip()},
  437. **{k: r['code_'+k] for k in CodeResult._fields
  438. if 'code_'+k in r and r['code_'+k].strip()}))
  439. except TypeError:
  440. pass
  441. except FileNotFoundError:
  442. pass
  443. # fold
  444. diff_results = fold(CodeResult, diff_results, by=by, defines=defines)
  445. # print table
  446. if not args.get('quiet'):
  447. table(CodeResult, results,
  448. diff_results if args.get('diff') else None,
  449. by=by if by is not None else ['function'],
  450. fields=fields,
  451. sort=sort,
  452. **args)
  453. if __name__ == "__main__":
  454. import argparse
  455. import sys
  456. parser = argparse.ArgumentParser(
  457. description="Find code size at the function level.")
  458. parser.add_argument(
  459. 'obj_paths',
  460. nargs='*',
  461. default=OBJ_PATHS,
  462. help="Description of where to find *.o files. May be a directory "
  463. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  464. parser.add_argument(
  465. '-v', '--verbose',
  466. action='store_true',
  467. help="Output commands that run behind the scenes.")
  468. parser.add_argument(
  469. '-q', '--quiet',
  470. action='store_true',
  471. help="Don't show anything, useful with -o.")
  472. parser.add_argument(
  473. '-o', '--output',
  474. help="Specify CSV file to store results.")
  475. parser.add_argument(
  476. '-u', '--use',
  477. help="Don't parse anything, use this CSV file.")
  478. parser.add_argument(
  479. '-d', '--diff',
  480. help="Specify CSV file to diff against.")
  481. parser.add_argument(
  482. '-a', '--all',
  483. action='store_true',
  484. help="Show all, not just the ones that changed.")
  485. parser.add_argument(
  486. '-p', '--percent',
  487. action='store_true',
  488. help="Only show percentage change, not a full diff.")
  489. parser.add_argument(
  490. '-b', '--by',
  491. action='append',
  492. choices=CodeResult._by,
  493. help="Group by this field.")
  494. parser.add_argument(
  495. '-f', '--field',
  496. dest='fields',
  497. action='append',
  498. choices=CodeResult._fields,
  499. help="Show this field.")
  500. parser.add_argument(
  501. '-D', '--define',
  502. dest='defines',
  503. action='append',
  504. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  505. help="Only include results where this field is this value.")
  506. class AppendSort(argparse.Action):
  507. def __call__(self, parser, namespace, value, option):
  508. if namespace.sort is None:
  509. namespace.sort = []
  510. namespace.sort.append((value, True if option == '-S' else False))
  511. parser.add_argument(
  512. '-s', '--sort',
  513. action=AppendSort,
  514. help="Sort by this fields.")
  515. parser.add_argument(
  516. '-S', '--reverse-sort',
  517. action=AppendSort,
  518. help="Sort by this fields, but backwards.")
  519. parser.add_argument(
  520. '-Y', '--summary',
  521. action='store_true',
  522. help="Only show the total.")
  523. parser.add_argument(
  524. '-A', '--everything',
  525. action='store_true',
  526. help="Include builtin and libc specific symbols.")
  527. parser.add_argument(
  528. '--type',
  529. default=TYPE,
  530. help="Type of symbols to report, this uses the same single-character "
  531. "type-names emitted by nm. Defaults to %r." % TYPE)
  532. parser.add_argument(
  533. '--nm-tool',
  534. type=lambda x: x.split(),
  535. default=NM_TOOL,
  536. help="Path to the nm tool to use. Defaults to %r." % NM_TOOL)
  537. parser.add_argument(
  538. '--build-dir',
  539. help="Specify the relative build directory. Used to map object files "
  540. "to the correct source files.")
  541. sys.exit(main(**{k: v
  542. for k, v in vars(parser.parse_intermixed_args()).items()
  543. if v is not None}))