data.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find data size at the function level. Basically just a bit wrapper
  4. # around nm with some extra conveniences for comparing builds. Heavily inspired
  5. # by Linux's Bloat-O-Meter.
  6. #
  7. # Example:
  8. # ./scripts/data.py lfs.o lfs_util.o -S
  9. #
  10. # Copyright (c) 2022, The littlefs authors.
  11. # Copyright (c) 2020, Arm Limited. All rights reserved.
  12. # SPDX-License-Identifier: BSD-3-Clause
  13. #
  14. import collections as co
  15. import csv
  16. import glob
  17. import itertools as it
  18. import math as m
  19. import os
  20. import re
  21. import shlex
  22. import subprocess as sp
  23. OBJ_PATHS = ['*.o']
  24. NM_TOOL = ['nm']
  25. TYPE = 'dDbB'
  26. # integer fields
  27. class IntField(co.namedtuple('IntField', 'x')):
  28. __slots__ = ()
  29. def __new__(cls, x=0):
  30. if isinstance(x, IntField):
  31. return x
  32. if isinstance(x, str):
  33. try:
  34. x = int(x, 0)
  35. except ValueError:
  36. # also accept +-∞ and +-inf
  37. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  38. x = m.inf
  39. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  40. x = -m.inf
  41. else:
  42. raise
  43. assert isinstance(x, int) or m.isinf(x), x
  44. return super().__new__(cls, x)
  45. def __str__(self):
  46. if self.x == m.inf:
  47. return '∞'
  48. elif self.x == -m.inf:
  49. return '-∞'
  50. else:
  51. return str(self.x)
  52. def __int__(self):
  53. assert not m.isinf(self.x)
  54. return self.x
  55. def __float__(self):
  56. return float(self.x)
  57. none = '%7s' % '-'
  58. def table(self):
  59. return '%7s' % (self,)
  60. diff_none = '%7s' % '-'
  61. diff_table = table
  62. def diff_diff(self, other):
  63. new = self.x if self else 0
  64. old = other.x if other else 0
  65. diff = new - old
  66. if diff == +m.inf:
  67. return '%7s' % '+∞'
  68. elif diff == -m.inf:
  69. return '%7s' % '-∞'
  70. else:
  71. return '%+7d' % diff
  72. def ratio(self, other):
  73. new = self.x if self else 0
  74. old = other.x if other else 0
  75. if m.isinf(new) and m.isinf(old):
  76. return 0.0
  77. elif m.isinf(new):
  78. return +m.inf
  79. elif m.isinf(old):
  80. return -m.inf
  81. elif not old and not new:
  82. return 0.0
  83. elif not old:
  84. return 1.0
  85. else:
  86. return (new-old) / old
  87. def __add__(self, other):
  88. return IntField(self.x + other.x)
  89. def __sub__(self, other):
  90. return IntField(self.x - other.x)
  91. def __mul__(self, other):
  92. return IntField(self.x * other.x)
  93. def __lt__(self, other):
  94. return self.x < other.x
  95. def __gt__(self, other):
  96. return self.__class__.__lt__(other, self)
  97. def __le__(self, other):
  98. return not self.__gt__(other)
  99. def __ge__(self, other):
  100. return not self.__lt__(other)
  101. # data size results
  102. class DataResult(co.namedtuple('DataResult', 'file,function,data_size')):
  103. __slots__ = ()
  104. def __new__(cls, file, function, data_size):
  105. return super().__new__(cls, file, function, IntField(data_size))
  106. def __add__(self, other):
  107. return DataResult(self.file, self.function,
  108. self.data_size + other.data_size)
  109. def openio(path, mode='r'):
  110. if path == '-':
  111. if mode == 'r':
  112. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  113. else:
  114. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  115. else:
  116. return open(path, mode)
  117. def collect(paths, *,
  118. nm_tool=NM_TOOL,
  119. type=TYPE,
  120. build_dir=None,
  121. everything=False,
  122. **args):
  123. results = []
  124. pattern = re.compile(
  125. '^(?P<size>[0-9a-fA-F]+)' +
  126. ' (?P<type>[%s])' % re.escape(type) +
  127. ' (?P<func>.+?)$')
  128. for path in paths:
  129. # map to source file
  130. src_path = re.sub('\.o$', '.c', path)
  131. if build_dir:
  132. src_path = re.sub('%s/*' % re.escape(build_dir), '',
  133. src_path)
  134. # note nm-tool may contain extra args
  135. cmd = nm_tool + ['--size-sort', path]
  136. if args.get('verbose'):
  137. print(' '.join(shlex.quote(c) for c in cmd))
  138. proc = sp.Popen(cmd,
  139. stdout=sp.PIPE,
  140. stderr=sp.PIPE if not args.get('verbose') else None,
  141. universal_newlines=True,
  142. errors='replace')
  143. for line in proc.stdout:
  144. m = pattern.match(line)
  145. if m:
  146. func = m.group('func')
  147. # discard internal functions
  148. if not everything and func.startswith('__'):
  149. continue
  150. # discard .8449 suffixes created by optimizer
  151. func = re.sub('\.[0-9]+', '', func)
  152. results.append(DataResult(
  153. src_path, func,
  154. int(m.group('size'), 16)))
  155. proc.wait()
  156. if proc.returncode != 0:
  157. if not args.get('verbose'):
  158. for line in proc.stderr:
  159. sys.stdout.write(line)
  160. sys.exit(-1)
  161. return results
  162. def fold(results, *,
  163. by=['file', 'function'],
  164. **_):
  165. folding = co.OrderedDict()
  166. for r in results:
  167. name = tuple(getattr(r, k) for k in by)
  168. if name not in folding:
  169. folding[name] = []
  170. folding[name].append(r)
  171. folded = []
  172. for rs in folding.values():
  173. folded.append(sum(rs[1:], start=rs[0]))
  174. return folded
  175. def table(results, diff_results=None, *,
  176. by_file=False,
  177. size_sort=False,
  178. reverse_size_sort=False,
  179. summary=False,
  180. all=False,
  181. percent=False,
  182. **_):
  183. all_, all = all, __builtins__.all
  184. # fold
  185. results = fold(results, by=['file' if by_file else 'function'])
  186. if diff_results is not None:
  187. diff_results = fold(diff_results,
  188. by=['file' if by_file else 'function'])
  189. table = {
  190. r.file if by_file else r.function: r
  191. for r in results}
  192. diff_table = {
  193. r.file if by_file else r.function: r
  194. for r in diff_results or []}
  195. # sort, note that python's sort is stable
  196. names = list(table.keys() | diff_table.keys())
  197. names.sort()
  198. if diff_results is not None:
  199. names.sort(key=lambda n: -IntField.ratio(
  200. table[n].data_size if n in table else None,
  201. diff_table[n].data_size if n in diff_table else None))
  202. if size_sort:
  203. names.sort(key=lambda n: (table[n].data_size,) if n in table else (),
  204. reverse=True)
  205. elif reverse_size_sort:
  206. names.sort(key=lambda n: (table[n].data_size,) if n in table else (),
  207. reverse=False)
  208. # print header
  209. if not summary:
  210. title = '%s%s' % (
  211. 'file' if by_file else 'function',
  212. ' (%d added, %d removed)' % (
  213. sum(1 for n in table if n not in diff_table),
  214. sum(1 for n in diff_table if n not in table))
  215. if diff_results is not None and not percent else '')
  216. name_width = max(it.chain([23, len(title)], (len(n) for n in names)))
  217. else:
  218. title = ''
  219. name_width = 23
  220. name_width = 4*((name_width+1+4-1)//4)-1
  221. print('%-*s ' % (name_width, title), end='')
  222. if diff_results is None:
  223. print(' %s' % ('size'.rjust(len(IntField.none))))
  224. elif percent:
  225. print(' %s' % ('size'.rjust(len(IntField.diff_none))))
  226. else:
  227. print(' %s %s %s' % (
  228. 'old'.rjust(len(IntField.diff_none)),
  229. 'new'.rjust(len(IntField.diff_none)),
  230. 'diff'.rjust(len(IntField.diff_none))))
  231. # print entries
  232. if not summary:
  233. for name in names:
  234. r = table.get(name)
  235. if diff_results is not None:
  236. diff_r = diff_table.get(name)
  237. ratio = IntField.ratio(
  238. r.data_size if r else None,
  239. diff_r.data_size if diff_r else None)
  240. if not ratio and not all_:
  241. continue
  242. print('%-*s ' % (name_width, name), end='')
  243. if diff_results is None:
  244. print(' %s' % (
  245. r.data_size.table()
  246. if r else IntField.none))
  247. elif percent:
  248. print(' %s%s' % (
  249. r.data_size.diff_table()
  250. if r else IntField.diff_none,
  251. ' (%s)' % (
  252. '+∞%' if ratio == +m.inf
  253. else '-∞%' if ratio == -m.inf
  254. else '%+.1f%%' % (100*ratio))))
  255. else:
  256. print(' %s %s %s%s' % (
  257. diff_r.data_size.diff_table()
  258. if diff_r else IntField.diff_none,
  259. r.data_size.diff_table()
  260. if r else IntField.diff_none,
  261. IntField.diff_diff(
  262. r.data_size if r else None,
  263. diff_r.data_size if diff_r else None)
  264. if r or diff_r else IntField.diff_none,
  265. ' (%s)' % (
  266. '+∞%' if ratio == +m.inf
  267. else '-∞%' if ratio == -m.inf
  268. else '%+.1f%%' % (100*ratio))
  269. if ratio else ''))
  270. # print total
  271. total = fold(results, by=[])
  272. r = total[0] if total else None
  273. if diff_results is not None:
  274. diff_total = fold(diff_results, by=[])
  275. diff_r = diff_total[0] if diff_total else None
  276. ratio = IntField.ratio(
  277. r.data_size if r else None,
  278. diff_r.data_size if diff_r else None)
  279. print('%-*s ' % (name_width, 'TOTAL'), end='')
  280. if diff_results is None:
  281. print(' %s' % (
  282. r.data_size.table()
  283. if r else IntField.none))
  284. elif percent:
  285. print(' %s%s' % (
  286. r.data_size.diff_table()
  287. if r else IntField.diff_none,
  288. ' (%s)' % (
  289. '+∞%' if ratio == +m.inf
  290. else '-∞%' if ratio == -m.inf
  291. else '%+.1f%%' % (100*ratio))))
  292. else:
  293. print(' %s %s %s%s' % (
  294. diff_r.data_size.diff_table()
  295. if diff_r else IntField.diff_none,
  296. r.data_size.diff_table()
  297. if r else IntField.diff_none,
  298. IntField.diff_diff(
  299. r.data_size if r else None,
  300. diff_r.data_size if diff_r else None)
  301. if r or diff_r else IntField.diff_none,
  302. ' (%s)' % (
  303. '+∞%' if ratio == +m.inf
  304. else '-∞%' if ratio == -m.inf
  305. else '%+.1f%%' % (100*ratio))
  306. if ratio else ''))
  307. def main(obj_paths, **args):
  308. # find sizes
  309. if not args.get('use', None):
  310. # find .o files
  311. paths = []
  312. for path in obj_paths:
  313. if os.path.isdir(path):
  314. path = path + '/*.o'
  315. for path in glob.glob(path):
  316. paths.append(path)
  317. if not paths:
  318. print('no .obj files found in %r?' % obj_paths)
  319. sys.exit(-1)
  320. results = collect(paths, **args)
  321. else:
  322. results = []
  323. with openio(args['use']) as f:
  324. reader = csv.DictReader(f, restval='')
  325. for r in reader:
  326. try:
  327. results.append(DataResult(**{
  328. k: v for k, v in r.items()
  329. if k in DataResult._fields}))
  330. except TypeError:
  331. pass
  332. # fold to remove duplicates
  333. results = fold(results)
  334. # sort because why not
  335. results.sort()
  336. # write results to CSV
  337. if args.get('output'):
  338. with openio(args['output'], 'w') as f:
  339. writer = csv.DictWriter(f, DataResult._fields)
  340. writer.writeheader()
  341. for r in results:
  342. writer.writerow(r._asdict())
  343. # find previous results?
  344. if args.get('diff'):
  345. diff_results = []
  346. try:
  347. with openio(args['diff']) as f:
  348. reader = csv.DictReader(f, restval='')
  349. for r in reader:
  350. try:
  351. diff_results.append(DataResult(**{
  352. k: v for k, v in r.items()
  353. if k in DataResult._fields}))
  354. except TypeError:
  355. pass
  356. except FileNotFoundError:
  357. pass
  358. # fold to remove duplicates
  359. diff_results = fold(diff_results)
  360. # print table
  361. if not args.get('quiet'):
  362. table(
  363. results,
  364. diff_results if args.get('diff') else None,
  365. **args)
  366. if __name__ == "__main__":
  367. import argparse
  368. import sys
  369. parser = argparse.ArgumentParser(
  370. description="Find data size at the function level.")
  371. parser.add_argument(
  372. 'obj_paths',
  373. nargs='*',
  374. default=OBJ_PATHS,
  375. help="Description of where to find *.o files. May be a directory "
  376. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  377. parser.add_argument(
  378. '-v', '--verbose',
  379. action='store_true',
  380. help="Output commands that run behind the scenes.")
  381. parser.add_argument(
  382. '-q', '--quiet',
  383. action='store_true',
  384. help="Don't show anything, useful with -o.")
  385. parser.add_argument(
  386. '-o', '--output',
  387. help="Specify CSV file to store results.")
  388. parser.add_argument(
  389. '-u', '--use',
  390. help="Don't parse anything, use this CSV file.")
  391. parser.add_argument(
  392. '-d', '--diff',
  393. help="Specify CSV file to diff against.")
  394. parser.add_argument(
  395. '-a', '--all',
  396. action='store_true',
  397. help="Show all, not just the ones that changed.")
  398. parser.add_argument(
  399. '-p', '--percent',
  400. action='store_true',
  401. help="Only show percentage change, not a full diff.")
  402. parser.add_argument(
  403. '-b', '--by-file',
  404. action='store_true',
  405. help="Group by file. Note this does not include padding "
  406. "so sizes may differ from other tools.")
  407. parser.add_argument(
  408. '-s', '--size-sort',
  409. action='store_true',
  410. help="Sort by size.")
  411. parser.add_argument(
  412. '-S', '--reverse-size-sort',
  413. action='store_true',
  414. help="Sort by size, but backwards.")
  415. parser.add_argument(
  416. '-Y', '--summary',
  417. action='store_true',
  418. help="Only show the total size.")
  419. parser.add_argument(
  420. '-A', '--everything',
  421. action='store_true',
  422. help="Include builtin and libc specific symbols.")
  423. parser.add_argument(
  424. '--type',
  425. default=TYPE,
  426. help="Type of symbols to report, this uses the same single-character "
  427. "type-names emitted by nm. Defaults to %r." % TYPE)
  428. parser.add_argument(
  429. '--nm-tool',
  430. type=lambda x: x.split(),
  431. default=NM_TOOL,
  432. help="Path to the nm tool to use. Defaults to %r." % NM_TOOL)
  433. parser.add_argument(
  434. '--build-dir',
  435. help="Specify the relative build directory. Used to map object files "
  436. "to the correct source files.")
  437. sys.exit(main(**{k: v
  438. for k, v in vars(parser.parse_intermixed_args()).items()
  439. if v is not None}))