struct_.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. # Example:
  6. # ./scripts/struct_.py lfs.o lfs_util.o -Ssize
  7. #
  8. # Copyright (c) 2022, The littlefs authors.
  9. # SPDX-License-Identifier: BSD-3-Clause
  10. #
  11. import collections as co
  12. import csv
  13. import difflib
  14. import itertools as it
  15. import math as m
  16. import os
  17. import re
  18. import shlex
  19. import subprocess as sp
  20. OBJDUMP_PATH = ['objdump']
  21. # integer fields
  22. class Int(co.namedtuple('Int', 'x')):
  23. __slots__ = ()
  24. def __new__(cls, x=0):
  25. if isinstance(x, Int):
  26. return x
  27. if isinstance(x, str):
  28. try:
  29. x = int(x, 0)
  30. except ValueError:
  31. # also accept +-∞ and +-inf
  32. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  33. x = m.inf
  34. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  35. x = -m.inf
  36. else:
  37. raise
  38. assert isinstance(x, int) or m.isinf(x), x
  39. return super().__new__(cls, x)
  40. def __str__(self):
  41. if self.x == m.inf:
  42. return '∞'
  43. elif self.x == -m.inf:
  44. return '-∞'
  45. else:
  46. return str(self.x)
  47. def __int__(self):
  48. assert not m.isinf(self.x)
  49. return self.x
  50. def __float__(self):
  51. return float(self.x)
  52. none = '%7s' % '-'
  53. def table(self):
  54. return '%7s' % (self,)
  55. diff_none = '%7s' % '-'
  56. diff_table = table
  57. def diff_diff(self, other):
  58. new = self.x if self else 0
  59. old = other.x if other else 0
  60. diff = new - old
  61. if diff == +m.inf:
  62. return '%7s' % '+∞'
  63. elif diff == -m.inf:
  64. return '%7s' % '-∞'
  65. else:
  66. return '%+7d' % diff
  67. def ratio(self, other):
  68. new = self.x if self else 0
  69. old = other.x if other else 0
  70. if m.isinf(new) and m.isinf(old):
  71. return 0.0
  72. elif m.isinf(new):
  73. return +m.inf
  74. elif m.isinf(old):
  75. return -m.inf
  76. elif not old and not new:
  77. return 0.0
  78. elif not old:
  79. return 1.0
  80. else:
  81. return (new-old) / old
  82. def __add__(self, other):
  83. return self.__class__(self.x + other.x)
  84. def __sub__(self, other):
  85. return self.__class__(self.x - other.x)
  86. def __mul__(self, other):
  87. return self.__class__(self.x * other.x)
  88. # struct size results
  89. class StructResult(co.namedtuple('StructResult', ['file', 'struct', 'size'])):
  90. _by = ['file', 'struct']
  91. _fields = ['size']
  92. _types = {'size': Int}
  93. __slots__ = ()
  94. def __new__(cls, file='', struct='', size=0):
  95. return super().__new__(cls, file, struct,
  96. Int(size))
  97. def __add__(self, other):
  98. return StructResult(self.file, self.struct,
  99. self.size + other.size)
  100. def openio(path, mode='r', buffering=-1):
  101. # allow '-' for stdin/stdout
  102. if path == '-':
  103. if mode == 'r':
  104. return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
  105. else:
  106. return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
  107. else:
  108. return open(path, mode, buffering)
  109. def collect(obj_paths, *,
  110. objdump_path=OBJDUMP_PATH,
  111. sources=None,
  112. everything=False,
  113. internal=False,
  114. **args):
  115. line_pattern = re.compile(
  116. '^\s+(?P<no>[0-9]+)'
  117. '(?:\s+(?P<dir>[0-9]+))?'
  118. '\s+.*'
  119. '\s+(?P<path>[^\s]+)$')
  120. info_pattern = re.compile(
  121. '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
  122. '|.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  123. '|.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
  124. '|.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  125. results = []
  126. for path in obj_paths:
  127. # find files, we want to filter by structs in .h files
  128. dirs = {}
  129. files = {}
  130. # note objdump-path may contain extra args
  131. cmd = objdump_path + ['--dwarf=rawline', path]
  132. if args.get('verbose'):
  133. print(' '.join(shlex.quote(c) for c in cmd))
  134. proc = sp.Popen(cmd,
  135. stdout=sp.PIPE,
  136. stderr=sp.PIPE if not args.get('verbose') else None,
  137. universal_newlines=True,
  138. errors='replace',
  139. close_fds=False)
  140. for line in proc.stdout:
  141. # note that files contain references to dirs, which we
  142. # dereference as soon as we see them as each file table follows a
  143. # dir table
  144. m = line_pattern.match(line)
  145. if m:
  146. if not m.group('dir'):
  147. # found a directory entry
  148. dirs[int(m.group('no'))] = m.group('path')
  149. else:
  150. # found a file entry
  151. dir = int(m.group('dir'))
  152. if dir in dirs:
  153. files[int(m.group('no'))] = os.path.join(
  154. dirs[dir],
  155. m.group('path'))
  156. else:
  157. files[int(m.group('no'))] = m.group('path')
  158. proc.wait()
  159. if proc.returncode != 0:
  160. if not args.get('verbose'):
  161. for line in proc.stderr:
  162. sys.stdout.write(line)
  163. sys.exit(-1)
  164. # collect structs as we parse dwarf info
  165. results_ = []
  166. is_struct = False
  167. s_name = None
  168. s_file = None
  169. s_size = None
  170. # note objdump-path may contain extra args
  171. cmd = objdump_path + ['--dwarf=info', path]
  172. if args.get('verbose'):
  173. print(' '.join(shlex.quote(c) for c in cmd))
  174. proc = sp.Popen(cmd,
  175. stdout=sp.PIPE,
  176. stderr=sp.PIPE if not args.get('verbose') else None,
  177. universal_newlines=True,
  178. errors='replace',
  179. close_fds=False)
  180. for line in proc.stdout:
  181. # state machine here to find structs
  182. m = info_pattern.match(line)
  183. if m:
  184. if m.group('tag'):
  185. if is_struct:
  186. file = files.get(s_file, '?')
  187. results_.append(StructResult(file, s_name, s_size))
  188. is_struct = (m.group('tag') == 'DW_TAG_structure_type')
  189. elif m.group('name'):
  190. s_name = m.group('name')
  191. elif m.group('file'):
  192. s_file = int(m.group('file'))
  193. elif m.group('size'):
  194. s_size = int(m.group('size'))
  195. if is_struct:
  196. file = files.get(s_file, '?')
  197. results_.append(StructResult(file, s_name, s_size))
  198. proc.wait()
  199. if proc.returncode != 0:
  200. if not args.get('verbose'):
  201. for line in proc.stderr:
  202. sys.stdout.write(line)
  203. sys.exit(-1)
  204. for r in results_:
  205. # ignore filtered sources
  206. if sources is not None:
  207. if not any(
  208. os.path.abspath(r.file) == os.path.abspath(s)
  209. for s in sources):
  210. continue
  211. else:
  212. # default to only cwd
  213. if not everything and not os.path.commonpath([
  214. os.getcwd(),
  215. os.path.abspath(r.file)]) == os.getcwd():
  216. continue
  217. # limit to .h files unless --internal
  218. if not internal and not r.file.endswith('.h'):
  219. continue
  220. # simplify path
  221. if os.path.commonpath([
  222. os.getcwd(),
  223. os.path.abspath(r.file)]) == os.getcwd():
  224. file = os.path.relpath(r.file)
  225. else:
  226. file = os.path.abspath(r.file)
  227. results.append(r._replace(file=file))
  228. return results
  229. def fold(Result, results, *,
  230. by=None,
  231. defines=None,
  232. **_):
  233. if by is None:
  234. by = Result._by
  235. for k in it.chain(by or [], (k for k, _ in defines or [])):
  236. if k not in Result._by and k not in Result._fields:
  237. print("error: could not find field %r?" % k)
  238. sys.exit(-1)
  239. # filter by matching defines
  240. if defines is not None:
  241. results_ = []
  242. for r in results:
  243. if all(getattr(r, k) in vs for k, vs in defines):
  244. results_.append(r)
  245. results = results_
  246. # organize results into conflicts
  247. folding = co.OrderedDict()
  248. for r in results:
  249. name = tuple(getattr(r, k) for k in by)
  250. if name not in folding:
  251. folding[name] = []
  252. folding[name].append(r)
  253. # merge conflicts
  254. folded = []
  255. for name, rs in folding.items():
  256. folded.append(sum(rs[1:], start=rs[0]))
  257. return folded
  258. def table(Result, results, diff_results=None, *,
  259. by=None,
  260. fields=None,
  261. sort=None,
  262. summary=False,
  263. all=False,
  264. percent=False,
  265. **_):
  266. all_, all = all, __builtins__.all
  267. if by is None:
  268. by = Result._by
  269. if fields is None:
  270. fields = Result._fields
  271. types = Result._types
  272. # fold again
  273. results = fold(Result, results, by=by)
  274. if diff_results is not None:
  275. diff_results = fold(Result, diff_results, by=by)
  276. # organize by name
  277. table = {
  278. ','.join(str(getattr(r, k) or '') for k in by): r
  279. for r in results}
  280. diff_table = {
  281. ','.join(str(getattr(r, k) or '') for k in by): r
  282. for r in diff_results or []}
  283. names = list(table.keys() | diff_table.keys())
  284. # sort again, now with diff info, note that python's sort is stable
  285. names.sort()
  286. if diff_results is not None:
  287. names.sort(key=lambda n: tuple(
  288. types[k].ratio(
  289. getattr(table.get(n), k, None),
  290. getattr(diff_table.get(n), k, None))
  291. for k in fields),
  292. reverse=True)
  293. if sort:
  294. for k, reverse in reversed(sort):
  295. names.sort(key=lambda n: (getattr(table[n], k),)
  296. if getattr(table.get(n), k, None) is not None else (),
  297. reverse=reverse ^ (not k or k in Result._fields))
  298. # build up our lines
  299. lines = []
  300. # header
  301. header = []
  302. header.append('%s%s' % (
  303. ','.join(by),
  304. ' (%d added, %d removed)' % (
  305. sum(1 for n in table if n not in diff_table),
  306. sum(1 for n in diff_table if n not in table))
  307. if diff_results is not None and not percent else '')
  308. if not summary else '')
  309. if diff_results is None:
  310. for k in fields:
  311. header.append(k)
  312. elif percent:
  313. for k in fields:
  314. header.append(k)
  315. else:
  316. for k in fields:
  317. header.append('o'+k)
  318. for k in fields:
  319. header.append('n'+k)
  320. for k in fields:
  321. header.append('d'+k)
  322. header.append('')
  323. lines.append(header)
  324. def table_entry(name, r, diff_r=None, ratios=[]):
  325. entry = []
  326. entry.append(name)
  327. if diff_results is None:
  328. for k in fields:
  329. entry.append(getattr(r, k).table()
  330. if getattr(r, k, None) is not None
  331. else types[k].none)
  332. elif percent:
  333. for k in fields:
  334. entry.append(getattr(r, k).diff_table()
  335. if getattr(r, k, None) is not None
  336. else types[k].diff_none)
  337. else:
  338. for k in fields:
  339. entry.append(getattr(diff_r, k).diff_table()
  340. if getattr(diff_r, k, None) is not None
  341. else types[k].diff_none)
  342. for k in fields:
  343. entry.append(getattr(r, k).diff_table()
  344. if getattr(r, k, None) is not None
  345. else types[k].diff_none)
  346. for k in fields:
  347. entry.append(types[k].diff_diff(
  348. getattr(r, k, None),
  349. getattr(diff_r, k, None)))
  350. if diff_results is None:
  351. entry.append('')
  352. elif percent:
  353. entry.append(' (%s)' % ', '.join(
  354. '+∞%' if t == +m.inf
  355. else '-∞%' if t == -m.inf
  356. else '%+.1f%%' % (100*t)
  357. for t in ratios))
  358. else:
  359. entry.append(' (%s)' % ', '.join(
  360. '+∞%' if t == +m.inf
  361. else '-∞%' if t == -m.inf
  362. else '%+.1f%%' % (100*t)
  363. for t in ratios
  364. if t)
  365. if any(ratios) else '')
  366. return entry
  367. # entries
  368. if not summary:
  369. for name in names:
  370. r = table.get(name)
  371. if diff_results is None:
  372. diff_r = None
  373. ratios = None
  374. else:
  375. diff_r = diff_table.get(name)
  376. ratios = [
  377. types[k].ratio(
  378. getattr(r, k, None),
  379. getattr(diff_r, k, None))
  380. for k in fields]
  381. if not all_ and not any(ratios):
  382. continue
  383. lines.append(table_entry(name, r, diff_r, ratios))
  384. # total
  385. r = next(iter(fold(Result, results, by=[])), None)
  386. if diff_results is None:
  387. diff_r = None
  388. ratios = None
  389. else:
  390. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  391. ratios = [
  392. types[k].ratio(
  393. getattr(r, k, None),
  394. getattr(diff_r, k, None))
  395. for k in fields]
  396. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  397. # find the best widths, note that column 0 contains the names and column -1
  398. # the ratios, so those are handled a bit differently
  399. widths = [
  400. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  401. for w, i in zip(
  402. it.chain([23], it.repeat(7)),
  403. range(len(lines[0])-1))]
  404. # print our table
  405. for line in lines:
  406. print('%-*s %s%s' % (
  407. widths[0], line[0],
  408. ' '.join('%*s' % (w, x)
  409. for w, x in zip(widths[1:], line[1:-1])),
  410. line[-1]))
  411. def main(obj_paths, *,
  412. by=None,
  413. fields=None,
  414. defines=None,
  415. sort=None,
  416. **args):
  417. # find sizes
  418. if not args.get('use', None):
  419. results = collect(obj_paths, **args)
  420. else:
  421. results = []
  422. with openio(args['use']) as f:
  423. reader = csv.DictReader(f, restval='')
  424. for r in reader:
  425. try:
  426. results.append(StructResult(
  427. **{k: r[k] for k in StructResult._by
  428. if k in r and r[k].strip()},
  429. **{k: r['struct_'+k]
  430. for k in StructResult._fields
  431. if 'struct_'+k in r
  432. and r['struct_'+k].strip()}))
  433. except TypeError:
  434. pass
  435. # fold
  436. results = fold(StructResult, results, by=by, defines=defines)
  437. # sort, note that python's sort is stable
  438. results.sort()
  439. if sort:
  440. for k, reverse in reversed(sort):
  441. results.sort(key=lambda r: (getattr(r, k),)
  442. if getattr(r, k) is not None else (),
  443. reverse=reverse ^ (not k or k in StructResult._fields))
  444. # write results to CSV
  445. if args.get('output'):
  446. with openio(args['output'], 'w') as f:
  447. writer = csv.DictWriter(f,
  448. (by if by is not None else StructResult._by)
  449. + ['struct_'+k for k in StructResult._fields])
  450. writer.writeheader()
  451. for r in results:
  452. writer.writerow(
  453. {k: getattr(r, k)
  454. for k in (by if by is not None else StructResult._by)}
  455. | {'struct_'+k: getattr(r, k)
  456. for k in StructResult._fields})
  457. # find previous results?
  458. if args.get('diff'):
  459. diff_results = []
  460. try:
  461. with openio(args['diff']) as f:
  462. reader = csv.DictReader(f, restval='')
  463. for r in reader:
  464. try:
  465. diff_results.append(StructResult(
  466. **{k: r[k] for k in StructResult._by
  467. if k in r and r[k].strip()},
  468. **{k: r['struct_'+k]
  469. for k in StructResult._fields
  470. if 'struct_'+k in r
  471. and r['struct_'+k].strip()}))
  472. except TypeError:
  473. pass
  474. except FileNotFoundError:
  475. pass
  476. # fold
  477. diff_results = fold(StructResult, diff_results, by=by, defines=defines)
  478. # print table
  479. if not args.get('quiet'):
  480. table(StructResult, results,
  481. diff_results if args.get('diff') else None,
  482. by=by if by is not None else ['struct'],
  483. fields=fields,
  484. sort=sort,
  485. **args)
  486. if __name__ == "__main__":
  487. import argparse
  488. import sys
  489. parser = argparse.ArgumentParser(
  490. description="Find struct sizes.",
  491. allow_abbrev=False)
  492. parser.add_argument(
  493. 'obj_paths',
  494. nargs='*',
  495. help="Input *.o files.")
  496. parser.add_argument(
  497. '-v', '--verbose',
  498. action='store_true',
  499. help="Output commands that run behind the scenes.")
  500. parser.add_argument(
  501. '-q', '--quiet',
  502. action='store_true',
  503. help="Don't show anything, useful with -o.")
  504. parser.add_argument(
  505. '-o', '--output',
  506. help="Specify CSV file to store results.")
  507. parser.add_argument(
  508. '-u', '--use',
  509. help="Don't parse anything, use this CSV file.")
  510. parser.add_argument(
  511. '-d', '--diff',
  512. help="Specify CSV file to diff against.")
  513. parser.add_argument(
  514. '-a', '--all',
  515. action='store_true',
  516. help="Show all, not just the ones that changed.")
  517. parser.add_argument(
  518. '-p', '--percent',
  519. action='store_true',
  520. help="Only show percentage change, not a full diff.")
  521. parser.add_argument(
  522. '-b', '--by',
  523. action='append',
  524. choices=StructResult._by,
  525. help="Group by this field.")
  526. parser.add_argument(
  527. '-f', '--field',
  528. dest='fields',
  529. action='append',
  530. choices=StructResult._fields,
  531. help="Show this field.")
  532. parser.add_argument(
  533. '-D', '--define',
  534. dest='defines',
  535. action='append',
  536. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  537. help="Only include results where this field is this value.")
  538. class AppendSort(argparse.Action):
  539. def __call__(self, parser, namespace, value, option):
  540. if namespace.sort is None:
  541. namespace.sort = []
  542. namespace.sort.append((value, True if option == '-S' else False))
  543. parser.add_argument(
  544. '-s', '--sort',
  545. action=AppendSort,
  546. help="Sort by this field.")
  547. parser.add_argument(
  548. '-S', '--reverse-sort',
  549. action=AppendSort,
  550. help="Sort by this field, but backwards.")
  551. parser.add_argument(
  552. '-Y', '--summary',
  553. action='store_true',
  554. help="Only show the total.")
  555. parser.add_argument(
  556. '-F', '--source',
  557. dest='sources',
  558. action='append',
  559. help="Only consider definitions in this file. Defaults to anything "
  560. "in the current directory.")
  561. parser.add_argument(
  562. '--everything',
  563. action='store_true',
  564. help="Include builtin and libc specific symbols.")
  565. parser.add_argument(
  566. '--internal',
  567. action='store_true',
  568. help="Also show structs in .c files.")
  569. parser.add_argument(
  570. '--objdump-path',
  571. type=lambda x: x.split(),
  572. default=OBJDUMP_PATH,
  573. help="Path to the objdump executable, may include flags. "
  574. "Defaults to %r." % OBJDUMP_PATH)
  575. sys.exit(main(**{k: v
  576. for k, v in vars(parser.parse_intermixed_args()).items()
  577. if v is not None}))