struct_.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. # Example:
  6. # ./scripts/struct_.py lfs.o lfs_util.o -Ssize
  7. #
  8. # Copyright (c) 2022, The littlefs authors.
  9. # SPDX-License-Identifier: BSD-3-Clause
  10. #
  11. import collections as co
  12. import csv
  13. import difflib
  14. import itertools as it
  15. import math as m
  16. import os
  17. import re
  18. import shlex
  19. import subprocess as sp
  20. OBJDUMP_TOOL = ['objdump']
  21. # integer fields
  22. class Int(co.namedtuple('Int', 'x')):
  23. __slots__ = ()
  24. def __new__(cls, x=0):
  25. if isinstance(x, Int):
  26. return x
  27. if isinstance(x, str):
  28. try:
  29. x = int(x, 0)
  30. except ValueError:
  31. # also accept +-∞ and +-inf
  32. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  33. x = m.inf
  34. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  35. x = -m.inf
  36. else:
  37. raise
  38. assert isinstance(x, int) or m.isinf(x), x
  39. return super().__new__(cls, x)
  40. def __str__(self):
  41. if self.x == m.inf:
  42. return '∞'
  43. elif self.x == -m.inf:
  44. return '-∞'
  45. else:
  46. return str(self.x)
  47. def __int__(self):
  48. assert not m.isinf(self.x)
  49. return self.x
  50. def __float__(self):
  51. return float(self.x)
  52. none = '%7s' % '-'
  53. def table(self):
  54. return '%7s' % (self,)
  55. diff_none = '%7s' % '-'
  56. diff_table = table
  57. def diff_diff(self, other):
  58. new = self.x if self else 0
  59. old = other.x if other else 0
  60. diff = new - old
  61. if diff == +m.inf:
  62. return '%7s' % '+∞'
  63. elif diff == -m.inf:
  64. return '%7s' % '-∞'
  65. else:
  66. return '%+7d' % diff
  67. def ratio(self, other):
  68. new = self.x if self else 0
  69. old = other.x if other else 0
  70. if m.isinf(new) and m.isinf(old):
  71. return 0.0
  72. elif m.isinf(new):
  73. return +m.inf
  74. elif m.isinf(old):
  75. return -m.inf
  76. elif not old and not new:
  77. return 0.0
  78. elif not old:
  79. return 1.0
  80. else:
  81. return (new-old) / old
  82. def __add__(self, other):
  83. return self.__class__(self.x + other.x)
  84. def __sub__(self, other):
  85. return self.__class__(self.x - other.x)
  86. def __mul__(self, other):
  87. return self.__class__(self.x * other.x)
  88. # struct size results
  89. class StructResult(co.namedtuple('StructResult', ['file', 'struct', 'size'])):
  90. _by = ['file', 'struct']
  91. _fields = ['size']
  92. _types = {'size': Int}
  93. __slots__ = ()
  94. def __new__(cls, file='', struct='', size=0):
  95. return super().__new__(cls, file, struct,
  96. Int(size))
  97. def __add__(self, other):
  98. return StructResult(self.file, self.struct,
  99. self.size + other.size)
  100. def openio(path, mode='r', buffering=-1):
  101. if path == '-':
  102. if mode == 'r':
  103. return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
  104. else:
  105. return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
  106. else:
  107. return open(path, mode, buffering)
  108. def collect(obj_paths, *,
  109. objdump_tool=OBJDUMP_TOOL,
  110. sources=None,
  111. everything=False,
  112. internal=False,
  113. **args):
  114. line_pattern = re.compile(
  115. '^\s+(?P<no>[0-9]+)'
  116. '(?:\s+(?P<dir>[0-9]+))?'
  117. '\s+.*'
  118. '\s+(?P<path>[^\s]+)$')
  119. info_pattern = re.compile(
  120. '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
  121. '|.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  122. '|.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
  123. '|.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  124. results = []
  125. for path in obj_paths:
  126. # find files, we want to filter by structs in .h files
  127. dirs = {}
  128. files = {}
  129. # note objdump-tool may contain extra args
  130. cmd = objdump_tool + ['--dwarf=rawline', path]
  131. if args.get('verbose'):
  132. print(' '.join(shlex.quote(c) for c in cmd))
  133. proc = sp.Popen(cmd,
  134. stdout=sp.PIPE,
  135. stderr=sp.PIPE if not args.get('verbose') else None,
  136. universal_newlines=True,
  137. errors='replace',
  138. close_fds=False)
  139. for line in proc.stdout:
  140. # note that files contain references to dirs, which we
  141. # dereference as soon as we see them as each file table follows a
  142. # dir table
  143. m = line_pattern.match(line)
  144. if m:
  145. if not m.group('dir'):
  146. # found a directory entry
  147. dirs[int(m.group('no'))] = m.group('path')
  148. else:
  149. # found a file entry
  150. dir = int(m.group('dir'))
  151. if dir in dirs:
  152. files[int(m.group('no'))] = os.path.join(
  153. dirs[dir],
  154. m.group('path'))
  155. else:
  156. files[int(m.group('no'))] = m.group('path')
  157. proc.wait()
  158. if proc.returncode != 0:
  159. if not args.get('verbose'):
  160. for line in proc.stderr:
  161. sys.stdout.write(line)
  162. sys.exit(-1)
  163. # collect structs as we parse dwarf info
  164. results_ = []
  165. is_struct = False
  166. s_name = None
  167. s_file = None
  168. s_size = None
  169. # note objdump-tool may contain extra args
  170. cmd = objdump_tool + ['--dwarf=info', path]
  171. if args.get('verbose'):
  172. print(' '.join(shlex.quote(c) for c in cmd))
  173. proc = sp.Popen(cmd,
  174. stdout=sp.PIPE,
  175. stderr=sp.PIPE if not args.get('verbose') else None,
  176. universal_newlines=True,
  177. errors='replace',
  178. close_fds=False)
  179. for line in proc.stdout:
  180. # state machine here to find structs
  181. m = info_pattern.match(line)
  182. if m:
  183. if m.group('tag'):
  184. if is_struct:
  185. file = files.get(s_file, '?')
  186. results_.append(StructResult(file, s_name, s_size))
  187. is_struct = (m.group('tag') == 'DW_TAG_structure_type')
  188. elif m.group('name'):
  189. s_name = m.group('name')
  190. elif m.group('file'):
  191. s_file = int(m.group('file'))
  192. elif m.group('size'):
  193. s_size = int(m.group('size'))
  194. if is_struct:
  195. file = files.get(s_file, '?')
  196. results_.append(StructResult(file, s_name, s_size))
  197. proc.wait()
  198. if proc.returncode != 0:
  199. if not args.get('verbose'):
  200. for line in proc.stderr:
  201. sys.stdout.write(line)
  202. sys.exit(-1)
  203. for r in results_:
  204. # ignore filtered sources
  205. if sources is not None:
  206. if not any(
  207. os.path.abspath(r.file) == os.path.abspath(s)
  208. for s in sources):
  209. continue
  210. else:
  211. # default to only cwd
  212. if not everything and not os.path.commonpath([
  213. os.getcwd(),
  214. os.path.abspath(r.file)]) == os.getcwd():
  215. continue
  216. # limit to .h files unless --internal
  217. if not internal and not r.file.endswith('.h'):
  218. continue
  219. # simplify path
  220. if os.path.commonpath([
  221. os.getcwd(),
  222. os.path.abspath(r.file)]) == os.getcwd():
  223. file = os.path.relpath(r.file)
  224. else:
  225. file = os.path.abspath(r.file)
  226. results.append(r._replace(file=file))
  227. return results
  228. def fold(Result, results, *,
  229. by=None,
  230. defines=None,
  231. **_):
  232. if by is None:
  233. by = Result._by
  234. for k in it.chain(by or [], (k for k, _ in defines or [])):
  235. if k not in Result._by and k not in Result._fields:
  236. print("error: could not find field %r?" % k)
  237. sys.exit(-1)
  238. # filter by matching defines
  239. if defines is not None:
  240. results_ = []
  241. for r in results:
  242. if all(getattr(r, k) in vs for k, vs in defines):
  243. results_.append(r)
  244. results = results_
  245. # organize results into conflicts
  246. folding = co.OrderedDict()
  247. for r in results:
  248. name = tuple(getattr(r, k) for k in by)
  249. if name not in folding:
  250. folding[name] = []
  251. folding[name].append(r)
  252. # merge conflicts
  253. folded = []
  254. for name, rs in folding.items():
  255. folded.append(sum(rs[1:], start=rs[0]))
  256. return folded
  257. def table(Result, results, diff_results=None, *,
  258. by=None,
  259. fields=None,
  260. sort=None,
  261. summary=False,
  262. all=False,
  263. percent=False,
  264. **_):
  265. all_, all = all, __builtins__.all
  266. if by is None:
  267. by = Result._by
  268. if fields is None:
  269. fields = Result._fields
  270. types = Result._types
  271. # fold again
  272. results = fold(Result, results, by=by)
  273. if diff_results is not None:
  274. diff_results = fold(Result, diff_results, by=by)
  275. # organize by name
  276. table = {
  277. ','.join(str(getattr(r, k) or '') for k in by): r
  278. for r in results}
  279. diff_table = {
  280. ','.join(str(getattr(r, k) or '') for k in by): r
  281. for r in diff_results or []}
  282. names = list(table.keys() | diff_table.keys())
  283. # sort again, now with diff info, note that python's sort is stable
  284. names.sort()
  285. if diff_results is not None:
  286. names.sort(key=lambda n: tuple(
  287. types[k].ratio(
  288. getattr(table.get(n), k, None),
  289. getattr(diff_table.get(n), k, None))
  290. for k in fields),
  291. reverse=True)
  292. if sort:
  293. for k, reverse in reversed(sort):
  294. names.sort(key=lambda n: (getattr(table[n], k),)
  295. if getattr(table.get(n), k, None) is not None else (),
  296. reverse=reverse ^ (not k or k in Result._fields))
  297. # build up our lines
  298. lines = []
  299. # header
  300. header = []
  301. header.append('%s%s' % (
  302. ','.join(by),
  303. ' (%d added, %d removed)' % (
  304. sum(1 for n in table if n not in diff_table),
  305. sum(1 for n in diff_table if n not in table))
  306. if diff_results is not None and not percent else '')
  307. if not summary else '')
  308. if diff_results is None:
  309. for k in fields:
  310. header.append(k)
  311. elif percent:
  312. for k in fields:
  313. header.append(k)
  314. else:
  315. for k in fields:
  316. header.append('o'+k)
  317. for k in fields:
  318. header.append('n'+k)
  319. for k in fields:
  320. header.append('d'+k)
  321. header.append('')
  322. lines.append(header)
  323. def table_entry(name, r, diff_r=None, ratios=[]):
  324. entry = []
  325. entry.append(name)
  326. if diff_results is None:
  327. for k in fields:
  328. entry.append(getattr(r, k).table()
  329. if getattr(r, k, None) is not None
  330. else types[k].none)
  331. elif percent:
  332. for k in fields:
  333. entry.append(getattr(r, k).diff_table()
  334. if getattr(r, k, None) is not None
  335. else types[k].diff_none)
  336. else:
  337. for k in fields:
  338. entry.append(getattr(diff_r, k).diff_table()
  339. if getattr(diff_r, k, None) is not None
  340. else types[k].diff_none)
  341. for k in fields:
  342. entry.append(getattr(r, k).diff_table()
  343. if getattr(r, k, None) is not None
  344. else types[k].diff_none)
  345. for k in fields:
  346. entry.append(types[k].diff_diff(
  347. getattr(r, k, None),
  348. getattr(diff_r, k, None)))
  349. if diff_results is None:
  350. entry.append('')
  351. elif percent:
  352. entry.append(' (%s)' % ', '.join(
  353. '+∞%' if t == +m.inf
  354. else '-∞%' if t == -m.inf
  355. else '%+.1f%%' % (100*t)
  356. for t in ratios))
  357. else:
  358. entry.append(' (%s)' % ', '.join(
  359. '+∞%' if t == +m.inf
  360. else '-∞%' if t == -m.inf
  361. else '%+.1f%%' % (100*t)
  362. for t in ratios
  363. if t)
  364. if any(ratios) else '')
  365. return entry
  366. # entries
  367. if not summary:
  368. for name in names:
  369. r = table.get(name)
  370. if diff_results is None:
  371. diff_r = None
  372. ratios = None
  373. else:
  374. diff_r = diff_table.get(name)
  375. ratios = [
  376. types[k].ratio(
  377. getattr(r, k, None),
  378. getattr(diff_r, k, None))
  379. for k in fields]
  380. if not all_ and not any(ratios):
  381. continue
  382. lines.append(table_entry(name, r, diff_r, ratios))
  383. # total
  384. r = next(iter(fold(Result, results, by=[])), None)
  385. if diff_results is None:
  386. diff_r = None
  387. ratios = None
  388. else:
  389. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  390. ratios = [
  391. types[k].ratio(
  392. getattr(r, k, None),
  393. getattr(diff_r, k, None))
  394. for k in fields]
  395. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  396. # find the best widths, note that column 0 contains the names and column -1
  397. # the ratios, so those are handled a bit differently
  398. widths = [
  399. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  400. for w, i in zip(
  401. it.chain([23], it.repeat(7)),
  402. range(len(lines[0])-1))]
  403. # print our table
  404. for line in lines:
  405. print('%-*s %s%s' % (
  406. widths[0], line[0],
  407. ' '.join('%*s' % (w, x)
  408. for w, x in zip(widths[1:], line[1:-1])),
  409. line[-1]))
  410. def main(obj_paths, *,
  411. by=None,
  412. fields=None,
  413. defines=None,
  414. sort=None,
  415. **args):
  416. # find sizes
  417. if not args.get('use', None):
  418. results = collect(obj_paths, **args)
  419. else:
  420. results = []
  421. with openio(args['use']) as f:
  422. reader = csv.DictReader(f, restval='')
  423. for r in reader:
  424. try:
  425. results.append(StructResult(
  426. **{k: r[k] for k in StructResult._by
  427. if k in r and r[k].strip()},
  428. **{k: r['struct_'+k]
  429. for k in StructResult._fields
  430. if 'struct_'+k in r
  431. and r['struct_'+k].strip()}))
  432. except TypeError:
  433. pass
  434. # fold
  435. results = fold(StructResult, results, by=by, defines=defines)
  436. # sort, note that python's sort is stable
  437. results.sort()
  438. if sort:
  439. for k, reverse in reversed(sort):
  440. results.sort(key=lambda r: (getattr(r, k),)
  441. if getattr(r, k) is not None else (),
  442. reverse=reverse ^ (not k or k in StructResult._fields))
  443. # write results to CSV
  444. if args.get('output'):
  445. with openio(args['output'], 'w') as f:
  446. writer = csv.DictWriter(f,
  447. (by if by is not None else StructResult._by)
  448. + ['struct_'+k for k in StructResult._fields])
  449. writer.writeheader()
  450. for r in results:
  451. writer.writerow(
  452. {k: getattr(r, k)
  453. for k in (by if by is not None else StructResult._by)}
  454. | {'struct_'+k: getattr(r, k)
  455. for k in StructResult._fields})
  456. # find previous results?
  457. if args.get('diff'):
  458. diff_results = []
  459. try:
  460. with openio(args['diff']) as f:
  461. reader = csv.DictReader(f, restval='')
  462. for r in reader:
  463. try:
  464. diff_results.append(StructResult(
  465. **{k: r[k] for k in StructResult._by
  466. if k in r and r[k].strip()},
  467. **{k: r['struct_'+k]
  468. for k in StructResult._fields
  469. if 'struct_'+k in r
  470. and r['struct_'+k].strip()}))
  471. except TypeError:
  472. pass
  473. except FileNotFoundError:
  474. pass
  475. # fold
  476. diff_results = fold(StructResult, diff_results, by=by, defines=defines)
  477. # print table
  478. if not args.get('quiet'):
  479. table(StructResult, results,
  480. diff_results if args.get('diff') else None,
  481. by=by if by is not None else ['struct'],
  482. fields=fields,
  483. sort=sort,
  484. **args)
  485. if __name__ == "__main__":
  486. import argparse
  487. import sys
  488. parser = argparse.ArgumentParser(
  489. description="Find struct sizes.",
  490. allow_abbrev=False)
  491. parser.add_argument(
  492. 'obj_paths',
  493. nargs='*',
  494. help="Input *.o files.")
  495. parser.add_argument(
  496. '-v', '--verbose',
  497. action='store_true',
  498. help="Output commands that run behind the scenes.")
  499. parser.add_argument(
  500. '-q', '--quiet',
  501. action='store_true',
  502. help="Don't show anything, useful with -o.")
  503. parser.add_argument(
  504. '-o', '--output',
  505. help="Specify CSV file to store results.")
  506. parser.add_argument(
  507. '-u', '--use',
  508. help="Don't parse anything, use this CSV file.")
  509. parser.add_argument(
  510. '-d', '--diff',
  511. help="Specify CSV file to diff against.")
  512. parser.add_argument(
  513. '-a', '--all',
  514. action='store_true',
  515. help="Show all, not just the ones that changed.")
  516. parser.add_argument(
  517. '-p', '--percent',
  518. action='store_true',
  519. help="Only show percentage change, not a full diff.")
  520. parser.add_argument(
  521. '-b', '--by',
  522. action='append',
  523. choices=StructResult._by,
  524. help="Group by this field.")
  525. parser.add_argument(
  526. '-f', '--field',
  527. dest='fields',
  528. action='append',
  529. choices=StructResult._fields,
  530. help="Show this field.")
  531. parser.add_argument(
  532. '-D', '--define',
  533. dest='defines',
  534. action='append',
  535. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  536. help="Only include results where this field is this value.")
  537. class AppendSort(argparse.Action):
  538. def __call__(self, parser, namespace, value, option):
  539. if namespace.sort is None:
  540. namespace.sort = []
  541. namespace.sort.append((value, True if option == '-S' else False))
  542. parser.add_argument(
  543. '-s', '--sort',
  544. action=AppendSort,
  545. help="Sort by this field.")
  546. parser.add_argument(
  547. '-S', '--reverse-sort',
  548. action=AppendSort,
  549. help="Sort by this field, but backwards.")
  550. parser.add_argument(
  551. '-Y', '--summary',
  552. action='store_true',
  553. help="Only show the total.")
  554. parser.add_argument(
  555. '-F', '--source',
  556. dest='sources',
  557. action='append',
  558. help="Only consider definitions in this file. Defaults to anything "
  559. "in the current directory.")
  560. parser.add_argument(
  561. '--everything',
  562. action='store_true',
  563. help="Include builtin and libc specific symbols.")
  564. parser.add_argument(
  565. '--internal',
  566. action='store_true',
  567. help="Also show structs in .c files.")
  568. parser.add_argument(
  569. '--objdump-tool',
  570. type=lambda x: x.split(),
  571. default=OBJDUMP_TOOL,
  572. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  573. sys.exit(main(**{k: v
  574. for k, v in vars(parser.parse_intermixed_args()).items()
  575. if v is not None}))