structs.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. # Example:
  6. # ./scripts/structs.py lfs.o lfs_util.o -Ssize
  7. #
  8. # Copyright (c) 2022, The littlefs authors.
  9. # SPDX-License-Identifier: BSD-3-Clause
  10. #
  11. import collections as co
  12. import csv
  13. import difflib
  14. import itertools as it
  15. import math as m
  16. import os
  17. import re
  18. import shlex
  19. import subprocess as sp
  20. OBJDUMP_PATH = ['objdump']
  21. # integer fields
  22. class Int(co.namedtuple('Int', 'x')):
  23. __slots__ = ()
  24. def __new__(cls, x=0):
  25. if isinstance(x, Int):
  26. return x
  27. if isinstance(x, str):
  28. try:
  29. x = int(x, 0)
  30. except ValueError:
  31. # also accept +-∞ and +-inf
  32. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  33. x = m.inf
  34. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  35. x = -m.inf
  36. else:
  37. raise
  38. assert isinstance(x, int) or m.isinf(x), x
  39. return super().__new__(cls, x)
  40. def __str__(self):
  41. if self.x == m.inf:
  42. return '∞'
  43. elif self.x == -m.inf:
  44. return '-∞'
  45. else:
  46. return str(self.x)
  47. def __int__(self):
  48. assert not m.isinf(self.x)
  49. return self.x
  50. def __float__(self):
  51. return float(self.x)
  52. none = '%7s' % '-'
  53. def table(self):
  54. return '%7s' % (self,)
  55. diff_none = '%7s' % '-'
  56. diff_table = table
  57. def diff_diff(self, other):
  58. new = self.x if self else 0
  59. old = other.x if other else 0
  60. diff = new - old
  61. if diff == +m.inf:
  62. return '%7s' % '+∞'
  63. elif diff == -m.inf:
  64. return '%7s' % '-∞'
  65. else:
  66. return '%+7d' % diff
  67. def ratio(self, other):
  68. new = self.x if self else 0
  69. old = other.x if other else 0
  70. if m.isinf(new) and m.isinf(old):
  71. return 0.0
  72. elif m.isinf(new):
  73. return +m.inf
  74. elif m.isinf(old):
  75. return -m.inf
  76. elif not old and not new:
  77. return 0.0
  78. elif not old:
  79. return 1.0
  80. else:
  81. return (new-old) / old
  82. def __add__(self, other):
  83. return self.__class__(self.x + other.x)
  84. def __sub__(self, other):
  85. return self.__class__(self.x - other.x)
  86. def __mul__(self, other):
  87. return self.__class__(self.x * other.x)
  88. # struct size results
  89. class StructResult(co.namedtuple('StructResult', ['file', 'struct', 'size'])):
  90. _by = ['file', 'struct']
  91. _fields = ['size']
  92. _sort = ['size']
  93. _types = {'size': Int}
  94. __slots__ = ()
  95. def __new__(cls, file='', struct='', size=0):
  96. return super().__new__(cls, file, struct,
  97. Int(size))
  98. def __add__(self, other):
  99. return StructResult(self.file, self.struct,
  100. self.size + other.size)
  101. def openio(path, mode='r', buffering=-1):
  102. # allow '-' for stdin/stdout
  103. if path == '-':
  104. if mode == 'r':
  105. return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
  106. else:
  107. return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
  108. else:
  109. return open(path, mode, buffering)
  110. def collect(obj_paths, *,
  111. objdump_path=OBJDUMP_PATH,
  112. sources=None,
  113. everything=False,
  114. internal=False,
  115. **args):
  116. line_pattern = re.compile(
  117. '^\s+(?P<no>[0-9]+)'
  118. '(?:\s+(?P<dir>[0-9]+))?'
  119. '\s+.*'
  120. '\s+(?P<path>[^\s]+)$')
  121. info_pattern = re.compile(
  122. '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
  123. '|.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  124. '|.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
  125. '|.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  126. results = []
  127. for path in obj_paths:
  128. # find files, we want to filter by structs in .h files
  129. dirs = {}
  130. files = {}
  131. # note objdump-path may contain extra args
  132. cmd = objdump_path + ['--dwarf=rawline', path]
  133. if args.get('verbose'):
  134. print(' '.join(shlex.quote(c) for c in cmd))
  135. proc = sp.Popen(cmd,
  136. stdout=sp.PIPE,
  137. stderr=sp.PIPE if not args.get('verbose') else None,
  138. universal_newlines=True,
  139. errors='replace',
  140. close_fds=False)
  141. for line in proc.stdout:
  142. # note that files contain references to dirs, which we
  143. # dereference as soon as we see them as each file table follows a
  144. # dir table
  145. m = line_pattern.match(line)
  146. if m:
  147. if not m.group('dir'):
  148. # found a directory entry
  149. dirs[int(m.group('no'))] = m.group('path')
  150. else:
  151. # found a file entry
  152. dir = int(m.group('dir'))
  153. if dir in dirs:
  154. files[int(m.group('no'))] = os.path.join(
  155. dirs[dir],
  156. m.group('path'))
  157. else:
  158. files[int(m.group('no'))] = m.group('path')
  159. proc.wait()
  160. if proc.returncode != 0:
  161. if not args.get('verbose'):
  162. for line in proc.stderr:
  163. sys.stdout.write(line)
  164. sys.exit(-1)
  165. # collect structs as we parse dwarf info
  166. results_ = []
  167. is_struct = False
  168. s_name = None
  169. s_file = None
  170. s_size = None
  171. # note objdump-path may contain extra args
  172. cmd = objdump_path + ['--dwarf=info', path]
  173. if args.get('verbose'):
  174. print(' '.join(shlex.quote(c) for c in cmd))
  175. proc = sp.Popen(cmd,
  176. stdout=sp.PIPE,
  177. stderr=sp.PIPE if not args.get('verbose') else None,
  178. universal_newlines=True,
  179. errors='replace',
  180. close_fds=False)
  181. for line in proc.stdout:
  182. # state machine here to find structs
  183. m = info_pattern.match(line)
  184. if m:
  185. if m.group('tag'):
  186. if is_struct:
  187. file = files.get(s_file, '?')
  188. results_.append(StructResult(file, s_name, s_size))
  189. is_struct = (m.group('tag') == 'DW_TAG_structure_type')
  190. elif m.group('name'):
  191. s_name = m.group('name')
  192. elif m.group('file'):
  193. s_file = int(m.group('file'))
  194. elif m.group('size'):
  195. s_size = int(m.group('size'))
  196. if is_struct:
  197. file = files.get(s_file, '?')
  198. results_.append(StructResult(file, s_name, s_size))
  199. proc.wait()
  200. if proc.returncode != 0:
  201. if not args.get('verbose'):
  202. for line in proc.stderr:
  203. sys.stdout.write(line)
  204. sys.exit(-1)
  205. for r in results_:
  206. # ignore filtered sources
  207. if sources is not None:
  208. if not any(
  209. os.path.abspath(r.file) == os.path.abspath(s)
  210. for s in sources):
  211. continue
  212. else:
  213. # default to only cwd
  214. if not everything and not os.path.commonpath([
  215. os.getcwd(),
  216. os.path.abspath(r.file)]) == os.getcwd():
  217. continue
  218. # limit to .h files unless --internal
  219. if not internal and not r.file.endswith('.h'):
  220. continue
  221. # simplify path
  222. if os.path.commonpath([
  223. os.getcwd(),
  224. os.path.abspath(r.file)]) == os.getcwd():
  225. file = os.path.relpath(r.file)
  226. else:
  227. file = os.path.abspath(r.file)
  228. results.append(r._replace(file=file))
  229. return results
  230. def fold(Result, results, *,
  231. by=None,
  232. defines=None,
  233. **_):
  234. if by is None:
  235. by = Result._by
  236. for k in it.chain(by or [], (k for k, _ in defines or [])):
  237. if k not in Result._by and k not in Result._fields:
  238. print("error: could not find field %r?" % k)
  239. sys.exit(-1)
  240. # filter by matching defines
  241. if defines is not None:
  242. results_ = []
  243. for r in results:
  244. if all(getattr(r, k) in vs for k, vs in defines):
  245. results_.append(r)
  246. results = results_
  247. # organize results into conflicts
  248. folding = co.OrderedDict()
  249. for r in results:
  250. name = tuple(getattr(r, k) for k in by)
  251. if name not in folding:
  252. folding[name] = []
  253. folding[name].append(r)
  254. # merge conflicts
  255. folded = []
  256. for name, rs in folding.items():
  257. folded.append(sum(rs[1:], start=rs[0]))
  258. return folded
  259. def table(Result, results, diff_results=None, *,
  260. by=None,
  261. fields=None,
  262. sort=None,
  263. summary=False,
  264. all=False,
  265. percent=False,
  266. **_):
  267. all_, all = all, __builtins__.all
  268. if by is None:
  269. by = Result._by
  270. if fields is None:
  271. fields = Result._fields
  272. types = Result._types
  273. # fold again
  274. results = fold(Result, results, by=by)
  275. if diff_results is not None:
  276. diff_results = fold(Result, diff_results, by=by)
  277. # organize by name
  278. table = {
  279. ','.join(str(getattr(r, k) or '') for k in by): r
  280. for r in results}
  281. diff_table = {
  282. ','.join(str(getattr(r, k) or '') for k in by): r
  283. for r in diff_results or []}
  284. names = list(table.keys() | diff_table.keys())
  285. # sort again, now with diff info, note that python's sort is stable
  286. names.sort()
  287. if diff_results is not None:
  288. names.sort(key=lambda n: tuple(
  289. types[k].ratio(
  290. getattr(table.get(n), k, None),
  291. getattr(diff_table.get(n), k, None))
  292. for k in fields),
  293. reverse=True)
  294. if sort:
  295. for k, reverse in reversed(sort):
  296. names.sort(
  297. key=lambda n: tuple(
  298. (getattr(table[n], k),)
  299. if getattr(table.get(n), k, None) is not None else ()
  300. for k in ([k] if k else [
  301. k for k in Result._sort if k in fields])),
  302. reverse=reverse ^ (not k or k in Result._fields))
  303. # build up our lines
  304. lines = []
  305. # header
  306. header = []
  307. header.append('%s%s' % (
  308. ','.join(by),
  309. ' (%d added, %d removed)' % (
  310. sum(1 for n in table if n not in diff_table),
  311. sum(1 for n in diff_table if n not in table))
  312. if diff_results is not None and not percent else '')
  313. if not summary else '')
  314. if diff_results is None:
  315. for k in fields:
  316. header.append(k)
  317. elif percent:
  318. for k in fields:
  319. header.append(k)
  320. else:
  321. for k in fields:
  322. header.append('o'+k)
  323. for k in fields:
  324. header.append('n'+k)
  325. for k in fields:
  326. header.append('d'+k)
  327. header.append('')
  328. lines.append(header)
  329. def table_entry(name, r, diff_r=None, ratios=[]):
  330. entry = []
  331. entry.append(name)
  332. if diff_results is None:
  333. for k in fields:
  334. entry.append(getattr(r, k).table()
  335. if getattr(r, k, None) is not None
  336. else types[k].none)
  337. elif percent:
  338. for k in fields:
  339. entry.append(getattr(r, k).diff_table()
  340. if getattr(r, k, None) is not None
  341. else types[k].diff_none)
  342. else:
  343. for k in fields:
  344. entry.append(getattr(diff_r, k).diff_table()
  345. if getattr(diff_r, k, None) is not None
  346. else types[k].diff_none)
  347. for k in fields:
  348. entry.append(getattr(r, k).diff_table()
  349. if getattr(r, k, None) is not None
  350. else types[k].diff_none)
  351. for k in fields:
  352. entry.append(types[k].diff_diff(
  353. getattr(r, k, None),
  354. getattr(diff_r, k, None)))
  355. if diff_results is None:
  356. entry.append('')
  357. elif percent:
  358. entry.append(' (%s)' % ', '.join(
  359. '+∞%' if t == +m.inf
  360. else '-∞%' if t == -m.inf
  361. else '%+.1f%%' % (100*t)
  362. for t in ratios))
  363. else:
  364. entry.append(' (%s)' % ', '.join(
  365. '+∞%' if t == +m.inf
  366. else '-∞%' if t == -m.inf
  367. else '%+.1f%%' % (100*t)
  368. for t in ratios
  369. if t)
  370. if any(ratios) else '')
  371. return entry
  372. # entries
  373. if not summary:
  374. for name in names:
  375. r = table.get(name)
  376. if diff_results is None:
  377. diff_r = None
  378. ratios = None
  379. else:
  380. diff_r = diff_table.get(name)
  381. ratios = [
  382. types[k].ratio(
  383. getattr(r, k, None),
  384. getattr(diff_r, k, None))
  385. for k in fields]
  386. if not all_ and not any(ratios):
  387. continue
  388. lines.append(table_entry(name, r, diff_r, ratios))
  389. # total
  390. r = next(iter(fold(Result, results, by=[])), None)
  391. if diff_results is None:
  392. diff_r = None
  393. ratios = None
  394. else:
  395. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  396. ratios = [
  397. types[k].ratio(
  398. getattr(r, k, None),
  399. getattr(diff_r, k, None))
  400. for k in fields]
  401. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  402. # find the best widths, note that column 0 contains the names and column -1
  403. # the ratios, so those are handled a bit differently
  404. widths = [
  405. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  406. for w, i in zip(
  407. it.chain([23], it.repeat(7)),
  408. range(len(lines[0])-1))]
  409. # print our table
  410. for line in lines:
  411. print('%-*s %s%s' % (
  412. widths[0], line[0],
  413. ' '.join('%*s' % (w, x)
  414. for w, x in zip(widths[1:], line[1:-1])),
  415. line[-1]))
  416. def main(obj_paths, *,
  417. by=None,
  418. fields=None,
  419. defines=None,
  420. sort=None,
  421. **args):
  422. # find sizes
  423. if not args.get('use', None):
  424. results = collect(obj_paths, **args)
  425. else:
  426. results = []
  427. with openio(args['use']) as f:
  428. reader = csv.DictReader(f, restval='')
  429. for r in reader:
  430. if not any('struct_'+k in r and r['struct_'+k].strip()
  431. for k in StructResult._fields):
  432. continue
  433. try:
  434. results.append(StructResult(
  435. **{k: r[k] for k in StructResult._by
  436. if k in r and r[k].strip()},
  437. **{k: r['struct_'+k]
  438. for k in StructResult._fields
  439. if 'struct_'+k in r
  440. and r['struct_'+k].strip()}))
  441. except TypeError:
  442. pass
  443. # fold
  444. results = fold(StructResult, results, by=by, defines=defines)
  445. # sort, note that python's sort is stable
  446. results.sort()
  447. if sort:
  448. for k, reverse in reversed(sort):
  449. results.sort(
  450. key=lambda r: tuple(
  451. (getattr(r, k),) if getattr(r, k) is not None else ()
  452. for k in ([k] if k else StructResult._sort)),
  453. reverse=reverse ^ (not k or k in StructResult._fields))
  454. # write results to CSV
  455. if args.get('output'):
  456. with openio(args['output'], 'w') as f:
  457. writer = csv.DictWriter(f,
  458. (by if by is not None else StructResult._by)
  459. + ['struct_'+k for k in (
  460. fields if fields is not None else StructResult._fields)])
  461. writer.writeheader()
  462. for r in results:
  463. writer.writerow(
  464. {k: getattr(r, k) for k in (
  465. by if by is not None else StructResult._by)}
  466. | {'struct_'+k: getattr(r, k) for k in (
  467. fields if fields is not None else StructResult._fields)})
  468. # find previous results?
  469. if args.get('diff'):
  470. diff_results = []
  471. try:
  472. with openio(args['diff']) as f:
  473. reader = csv.DictReader(f, restval='')
  474. for r in reader:
  475. if not any('struct_'+k in r and r['struct_'+k].strip()
  476. for k in StructResult._fields):
  477. continue
  478. try:
  479. diff_results.append(StructResult(
  480. **{k: r[k] for k in StructResult._by
  481. if k in r and r[k].strip()},
  482. **{k: r['struct_'+k]
  483. for k in StructResult._fields
  484. if 'struct_'+k in r
  485. and r['struct_'+k].strip()}))
  486. except TypeError:
  487. pass
  488. except FileNotFoundError:
  489. pass
  490. # fold
  491. diff_results = fold(StructResult, diff_results, by=by, defines=defines)
  492. # print table
  493. if not args.get('quiet'):
  494. table(StructResult, results,
  495. diff_results if args.get('diff') else None,
  496. by=by if by is not None else ['struct'],
  497. fields=fields,
  498. sort=sort,
  499. **args)
  500. if __name__ == "__main__":
  501. import argparse
  502. import sys
  503. parser = argparse.ArgumentParser(
  504. description="Find struct sizes.",
  505. allow_abbrev=False)
  506. parser.add_argument(
  507. 'obj_paths',
  508. nargs='*',
  509. help="Input *.o files.")
  510. parser.add_argument(
  511. '-v', '--verbose',
  512. action='store_true',
  513. help="Output commands that run behind the scenes.")
  514. parser.add_argument(
  515. '-q', '--quiet',
  516. action='store_true',
  517. help="Don't show anything, useful with -o.")
  518. parser.add_argument(
  519. '-o', '--output',
  520. help="Specify CSV file to store results.")
  521. parser.add_argument(
  522. '-u', '--use',
  523. help="Don't parse anything, use this CSV file.")
  524. parser.add_argument(
  525. '-d', '--diff',
  526. help="Specify CSV file to diff against.")
  527. parser.add_argument(
  528. '-a', '--all',
  529. action='store_true',
  530. help="Show all, not just the ones that changed.")
  531. parser.add_argument(
  532. '-p', '--percent',
  533. action='store_true',
  534. help="Only show percentage change, not a full diff.")
  535. parser.add_argument(
  536. '-b', '--by',
  537. action='append',
  538. choices=StructResult._by,
  539. help="Group by this field.")
  540. parser.add_argument(
  541. '-f', '--field',
  542. dest='fields',
  543. action='append',
  544. choices=StructResult._fields,
  545. help="Show this field.")
  546. parser.add_argument(
  547. '-D', '--define',
  548. dest='defines',
  549. action='append',
  550. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  551. help="Only include results where this field is this value.")
  552. class AppendSort(argparse.Action):
  553. def __call__(self, parser, namespace, value, option):
  554. if namespace.sort is None:
  555. namespace.sort = []
  556. namespace.sort.append((value, True if option == '-S' else False))
  557. parser.add_argument(
  558. '-s', '--sort',
  559. nargs='?',
  560. action=AppendSort,
  561. help="Sort by this field.")
  562. parser.add_argument(
  563. '-S', '--reverse-sort',
  564. nargs='?',
  565. action=AppendSort,
  566. help="Sort by this field, but backwards.")
  567. parser.add_argument(
  568. '-Y', '--summary',
  569. action='store_true',
  570. help="Only show the total.")
  571. parser.add_argument(
  572. '-F', '--source',
  573. dest='sources',
  574. action='append',
  575. help="Only consider definitions in this file. Defaults to anything "
  576. "in the current directory.")
  577. parser.add_argument(
  578. '--everything',
  579. action='store_true',
  580. help="Include builtin and libc specific symbols.")
  581. parser.add_argument(
  582. '--internal',
  583. action='store_true',
  584. help="Also show structs in .c files.")
  585. parser.add_argument(
  586. '--objdump-path',
  587. type=lambda x: x.split(),
  588. default=OBJDUMP_PATH,
  589. help="Path to the objdump executable, may include flags. "
  590. "Defaults to %r." % OBJDUMP_PATH)
  591. sys.exit(main(**{k: v
  592. for k, v in vars(parser.parse_intermixed_args()).items()
  593. if v is not None}))