struct_.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. # Example:
  6. # ./scripts/struct_.py lfs.o lfs_util.o -Ssize
  7. #
  8. # Copyright (c) 2022, The littlefs authors.
  9. # SPDX-License-Identifier: BSD-3-Clause
  10. #
  11. import collections as co
  12. import csv
  13. import difflib
  14. import glob
  15. import itertools as it
  16. import math as m
  17. import os
  18. import re
  19. import shlex
  20. import subprocess as sp
  21. OBJ_PATHS = ['*.o']
  22. OBJDUMP_TOOL = ['objdump']
  23. # integer fields
  24. class Int(co.namedtuple('Int', 'x')):
  25. __slots__ = ()
  26. def __new__(cls, x=0):
  27. if isinstance(x, Int):
  28. return x
  29. if isinstance(x, str):
  30. try:
  31. x = int(x, 0)
  32. except ValueError:
  33. # also accept +-∞ and +-inf
  34. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  35. x = m.inf
  36. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  37. x = -m.inf
  38. else:
  39. raise
  40. assert isinstance(x, int) or m.isinf(x), x
  41. return super().__new__(cls, x)
  42. def __str__(self):
  43. if self.x == m.inf:
  44. return '∞'
  45. elif self.x == -m.inf:
  46. return '-∞'
  47. else:
  48. return str(self.x)
  49. def __int__(self):
  50. assert not m.isinf(self.x)
  51. return self.x
  52. def __float__(self):
  53. return float(self.x)
  54. none = '%7s' % '-'
  55. def table(self):
  56. return '%7s' % (self,)
  57. diff_none = '%7s' % '-'
  58. diff_table = table
  59. def diff_diff(self, other):
  60. new = self.x if self else 0
  61. old = other.x if other else 0
  62. diff = new - old
  63. if diff == +m.inf:
  64. return '%7s' % '+∞'
  65. elif diff == -m.inf:
  66. return '%7s' % '-∞'
  67. else:
  68. return '%+7d' % diff
  69. def ratio(self, other):
  70. new = self.x if self else 0
  71. old = other.x if other else 0
  72. if m.isinf(new) and m.isinf(old):
  73. return 0.0
  74. elif m.isinf(new):
  75. return +m.inf
  76. elif m.isinf(old):
  77. return -m.inf
  78. elif not old and not new:
  79. return 0.0
  80. elif not old:
  81. return 1.0
  82. else:
  83. return (new-old) / old
  84. def __add__(self, other):
  85. return self.__class__(self.x + other.x)
  86. def __sub__(self, other):
  87. return self.__class__(self.x - other.x)
  88. def __mul__(self, other):
  89. return self.__class__(self.x * other.x)
  90. # struct size results
  91. class StructResult(co.namedtuple('StructResult', ['file', 'struct', 'size'])):
  92. _by = ['file', 'struct']
  93. _fields = ['size']
  94. _types = {'size': Int}
  95. __slots__ = ()
  96. def __new__(cls, file='', struct='', size=0):
  97. return super().__new__(cls, file, struct,
  98. Int(size))
  99. def __add__(self, other):
  100. return StructResult(self.file, self.struct,
  101. self.size + other.size)
  102. def openio(path, mode='r'):
  103. if path == '-':
  104. if mode == 'r':
  105. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  106. else:
  107. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  108. else:
  109. return open(path, mode)
  110. def collect(obj_paths, *,
  111. objdump_tool=OBJDUMP_TOOL,
  112. sources=None,
  113. everything=False,
  114. internal=False,
  115. **args):
  116. line_pattern = re.compile(
  117. '^\s+(?P<no>[0-9]+)\s+'
  118. '(?:(?P<dir>[0-9]+)\s+)?'
  119. '.*\s+'
  120. '(?P<path>[^\s]+)$')
  121. info_pattern = re.compile(
  122. '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
  123. '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  124. '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
  125. '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  126. results = []
  127. for path in obj_paths:
  128. # find files, we want to filter by structs in .h files
  129. dirs = {}
  130. files = {}
  131. # note objdump-tool may contain extra args
  132. cmd = objdump_tool + ['--dwarf=rawline', path]
  133. if args.get('verbose'):
  134. print(' '.join(shlex.quote(c) for c in cmd))
  135. proc = sp.Popen(cmd,
  136. stdout=sp.PIPE,
  137. stderr=sp.PIPE if not args.get('verbose') else None,
  138. universal_newlines=True,
  139. errors='replace',
  140. close_fds=False)
  141. for line in proc.stdout:
  142. # note that files contain references to dirs, which we
  143. # dereference as soon as we see them as each file table follows a
  144. # dir table
  145. m = line_pattern.match(line)
  146. if m:
  147. if not m.group('dir'):
  148. # found a directory entry
  149. dirs[int(m.group('no'))] = m.group('path')
  150. else:
  151. # found a file entry
  152. dir = int(m.group('dir'))
  153. if dir in dirs:
  154. files[int(m.group('no'))] = os.path.join(
  155. dirs[dir],
  156. m.group('path'))
  157. else:
  158. files[int(m.group('no'))] = m.group('path')
  159. proc.wait()
  160. if proc.returncode != 0:
  161. if not args.get('verbose'):
  162. for line in proc.stderr:
  163. sys.stdout.write(line)
  164. sys.exit(-1)
  165. # collect structs as we parse dwarf info
  166. results_ = []
  167. is_struct = False
  168. s_name = None
  169. s_file = None
  170. s_size = None
  171. # note objdump-tool may contain extra args
  172. cmd = objdump_tool + ['--dwarf=info', path]
  173. if args.get('verbose'):
  174. print(' '.join(shlex.quote(c) for c in cmd))
  175. proc = sp.Popen(cmd,
  176. stdout=sp.PIPE,
  177. stderr=sp.PIPE if not args.get('verbose') else None,
  178. universal_newlines=True,
  179. errors='replace',
  180. close_fds=False)
  181. for line in proc.stdout:
  182. # state machine here to find structs
  183. m = info_pattern.match(line)
  184. if m:
  185. if m.group('tag'):
  186. if is_struct:
  187. file = files.get(s_file, '?')
  188. results_.append(StructResult(file, s_name, s_size))
  189. is_struct = (m.group('tag') == 'DW_TAG_structure_type')
  190. elif m.group('name'):
  191. s_name = m.group('name')
  192. elif m.group('file'):
  193. s_file = int(m.group('file'))
  194. elif m.group('size'):
  195. s_size = int(m.group('size'))
  196. proc.wait()
  197. if proc.returncode != 0:
  198. if not args.get('verbose'):
  199. for line in proc.stderr:
  200. sys.stdout.write(line)
  201. sys.exit(-1)
  202. for r in results_:
  203. # ignore filtered sources
  204. if sources is not None:
  205. if not any(
  206. os.path.abspath(r.file) == os.path.abspath(s)
  207. for s in sources):
  208. continue
  209. else:
  210. # default to only cwd
  211. if not everything and not os.path.commonpath([
  212. os.getcwd(),
  213. os.path.abspath(r.file)]) == os.getcwd():
  214. continue
  215. # limit to .h files unless --internal
  216. if not internal and not r.file.endswith('.h'):
  217. continue
  218. # simplify path
  219. if os.path.commonpath([
  220. os.getcwd(),
  221. os.path.abspath(r.file)]) == os.getcwd():
  222. file = os.path.relpath(r.file)
  223. else:
  224. file = os.path.abspath(r.file)
  225. results.append(StructResult(r.file, r.struct, r.size))
  226. return results
  227. def fold(Result, results, *,
  228. by=None,
  229. defines=None,
  230. **_):
  231. if by is None:
  232. by = Result._by
  233. for k in it.chain(by or [], (k for k, _ in defines or [])):
  234. if k not in Result._by and k not in Result._fields:
  235. print("error: could not find field %r?" % k)
  236. sys.exit(-1)
  237. # filter by matching defines
  238. if defines is not None:
  239. results_ = []
  240. for r in results:
  241. if all(getattr(r, k) in vs for k, vs in defines):
  242. results_.append(r)
  243. results = results_
  244. # organize results into conflicts
  245. folding = co.OrderedDict()
  246. for r in results:
  247. name = tuple(getattr(r, k) for k in by)
  248. if name not in folding:
  249. folding[name] = []
  250. folding[name].append(r)
  251. # merge conflicts
  252. folded = []
  253. for name, rs in folding.items():
  254. folded.append(sum(rs[1:], start=rs[0]))
  255. return folded
  256. def table(Result, results, diff_results=None, *,
  257. by=None,
  258. fields=None,
  259. sort=None,
  260. summary=False,
  261. all=False,
  262. percent=False,
  263. **_):
  264. all_, all = all, __builtins__.all
  265. if by is None:
  266. by = Result._by
  267. if fields is None:
  268. fields = Result._fields
  269. types = Result._types
  270. # fold again
  271. results = fold(Result, results, by=by)
  272. if diff_results is not None:
  273. diff_results = fold(Result, diff_results, by=by)
  274. # organize by name
  275. table = {
  276. ','.join(str(getattr(r, k) or '') for k in by): r
  277. for r in results}
  278. diff_table = {
  279. ','.join(str(getattr(r, k) or '') for k in by): r
  280. for r in diff_results or []}
  281. names = list(table.keys() | diff_table.keys())
  282. # sort again, now with diff info, note that python's sort is stable
  283. names.sort()
  284. if diff_results is not None:
  285. names.sort(key=lambda n: tuple(
  286. types[k].ratio(
  287. getattr(table.get(n), k, None),
  288. getattr(diff_table.get(n), k, None))
  289. for k in fields),
  290. reverse=True)
  291. if sort:
  292. for k, reverse in reversed(sort):
  293. names.sort(key=lambda n: (getattr(table[n], k),)
  294. if getattr(table.get(n), k, None) is not None else (),
  295. reverse=reverse ^ (not k or k in Result._fields))
  296. # build up our lines
  297. lines = []
  298. # header
  299. line = []
  300. line.append('%s%s' % (
  301. ','.join(by),
  302. ' (%d added, %d removed)' % (
  303. sum(1 for n in table if n not in diff_table),
  304. sum(1 for n in diff_table if n not in table))
  305. if diff_results is not None and not percent else '')
  306. if not summary else '')
  307. if diff_results is None:
  308. for k in fields:
  309. line.append(k)
  310. elif percent:
  311. for k in fields:
  312. line.append(k)
  313. else:
  314. for k in fields:
  315. line.append('o'+k)
  316. for k in fields:
  317. line.append('n'+k)
  318. for k in fields:
  319. line.append('d'+k)
  320. line.append('')
  321. lines.append(line)
  322. # entries
  323. if not summary:
  324. for name in names:
  325. r = table.get(name)
  326. if diff_results is not None:
  327. diff_r = diff_table.get(name)
  328. ratios = [
  329. types[k].ratio(
  330. getattr(r, k, None),
  331. getattr(diff_r, k, None))
  332. for k in fields]
  333. if not any(ratios) and not all_:
  334. continue
  335. line = []
  336. line.append(name)
  337. if diff_results is None:
  338. for k in fields:
  339. line.append(getattr(r, k).table()
  340. if getattr(r, k, None) is not None
  341. else types[k].none)
  342. elif percent:
  343. for k in fields:
  344. line.append(getattr(r, k).diff_table()
  345. if getattr(r, k, None) is not None
  346. else types[k].diff_none)
  347. else:
  348. for k in fields:
  349. line.append(getattr(diff_r, k).diff_table()
  350. if getattr(diff_r, k, None) is not None
  351. else types[k].diff_none)
  352. for k in fields:
  353. line.append(getattr(r, k).diff_table()
  354. if getattr(r, k, None) is not None
  355. else types[k].diff_none)
  356. for k in fields:
  357. line.append(types[k].diff_diff(
  358. getattr(r, k, None),
  359. getattr(diff_r, k, None)))
  360. if diff_results is None:
  361. line.append('')
  362. elif percent:
  363. line.append(' (%s)' % ', '.join(
  364. '+∞%' if t == +m.inf
  365. else '-∞%' if t == -m.inf
  366. else '%+.1f%%' % (100*t)
  367. for t in ratios))
  368. else:
  369. line.append(' (%s)' % ', '.join(
  370. '+∞%' if t == +m.inf
  371. else '-∞%' if t == -m.inf
  372. else '%+.1f%%' % (100*t)
  373. for t in ratios
  374. if t)
  375. if any(ratios) else '')
  376. lines.append(line)
  377. # total
  378. r = next(iter(fold(Result, results, by=[])), None)
  379. if diff_results is not None:
  380. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  381. ratios = [
  382. types[k].ratio(
  383. getattr(r, k, None),
  384. getattr(diff_r, k, None))
  385. for k in fields]
  386. line = []
  387. line.append('TOTAL')
  388. if diff_results is None:
  389. for k in fields:
  390. line.append(getattr(r, k).table()
  391. if getattr(r, k, None) is not None
  392. else types[k].none)
  393. elif percent:
  394. for k in fields:
  395. line.append(getattr(r, k).diff_table()
  396. if getattr(r, k, None) is not None
  397. else types[k].diff_none)
  398. else:
  399. for k in fields:
  400. line.append(getattr(diff_r, k).diff_table()
  401. if getattr(diff_r, k, None) is not None
  402. else types[k].diff_none)
  403. for k in fields:
  404. line.append(getattr(r, k).diff_table()
  405. if getattr(r, k, None) is not None
  406. else types[k].diff_none)
  407. for k in fields:
  408. line.append(types[k].diff_diff(
  409. getattr(r, k, None),
  410. getattr(diff_r, k, None)))
  411. if diff_results is None:
  412. line.append('')
  413. elif percent:
  414. line.append(' (%s)' % ', '.join(
  415. '+∞%' if t == +m.inf
  416. else '-∞%' if t == -m.inf
  417. else '%+.1f%%' % (100*t)
  418. for t in ratios))
  419. else:
  420. line.append(' (%s)' % ', '.join(
  421. '+∞%' if t == +m.inf
  422. else '-∞%' if t == -m.inf
  423. else '%+.1f%%' % (100*t)
  424. for t in ratios
  425. if t)
  426. if any(ratios) else '')
  427. lines.append(line)
  428. # find the best widths, note that column 0 contains the names and column -1
  429. # the ratios, so those are handled a bit differently
  430. widths = [
  431. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  432. for w, i in zip(
  433. it.chain([23], it.repeat(7)),
  434. range(len(lines[0])-1))]
  435. # print our table
  436. for line in lines:
  437. print('%-*s %s%s' % (
  438. widths[0], line[0],
  439. ' '.join('%*s' % (w, x)
  440. for w, x in zip(widths[1:], line[1:-1])),
  441. line[-1]))
  442. def main(obj_paths, *,
  443. by=None,
  444. fields=None,
  445. defines=None,
  446. sort=None,
  447. **args):
  448. # find sizes
  449. if not args.get('use', None):
  450. # find .o files
  451. paths = []
  452. for path in obj_paths:
  453. if os.path.isdir(path):
  454. path = path + '/*.o'
  455. for path in glob.glob(path):
  456. paths.append(path)
  457. if not paths:
  458. print("error: no .o files found in %r?" % obj_paths)
  459. sys.exit(-1)
  460. results = collect(paths, **args)
  461. else:
  462. results = []
  463. with openio(args['use']) as f:
  464. reader = csv.DictReader(f, restval='')
  465. for r in reader:
  466. try:
  467. results.append(StructResult(
  468. **{k: r[k] for k in StructResult._by
  469. if k in r and r[k].strip()},
  470. **{k: r['struct_'+k]
  471. for k in StructResult._fields
  472. if 'struct_'+k in r
  473. and r['struct_'+k].strip()}))
  474. except TypeError:
  475. pass
  476. # fold
  477. results = fold(StructResult, results, by=by, defines=defines)
  478. # sort, note that python's sort is stable
  479. results.sort()
  480. if sort:
  481. for k, reverse in reversed(sort):
  482. results.sort(key=lambda r: (getattr(r, k),)
  483. if getattr(r, k) is not None else (),
  484. reverse=reverse ^ (not k or k in StructResult._fields))
  485. # write results to CSV
  486. if args.get('output'):
  487. with openio(args['output'], 'w') as f:
  488. writer = csv.DictWriter(f,
  489. (by if by is not None else StructResult._by)
  490. + ['struct_'+k for k in StructResult._fields])
  491. writer.writeheader()
  492. for r in results:
  493. writer.writerow(
  494. {k: getattr(r, k)
  495. for k in (by if by is not None else StructResult._by)}
  496. | {'struct_'+k: getattr(r, k)
  497. for k in StructResult._fields})
  498. # find previous results?
  499. if args.get('diff'):
  500. diff_results = []
  501. try:
  502. with openio(args['diff']) as f:
  503. reader = csv.DictReader(f, restval='')
  504. for r in reader:
  505. try:
  506. diff_results.append(StructResult(
  507. **{k: r[k] for k in StructResult._by
  508. if k in r and r[k].strip()},
  509. **{k: r['struct_'+k]
  510. for k in StructResult._fields
  511. if 'struct_'+k in r
  512. and r['struct_'+k].strip()}))
  513. except TypeError:
  514. pass
  515. except FileNotFoundError:
  516. pass
  517. # fold
  518. diff_results = fold(StructResult, diff_results, by=by, defines=defines)
  519. # print table
  520. if not args.get('quiet'):
  521. table(StructResult, results,
  522. diff_results if args.get('diff') else None,
  523. by=by if by is not None else ['struct'],
  524. fields=fields,
  525. sort=sort,
  526. **args)
  527. if __name__ == "__main__":
  528. import argparse
  529. import sys
  530. parser = argparse.ArgumentParser(
  531. description="Find struct sizes.",
  532. allow_abbrev=False)
  533. parser.add_argument(
  534. 'obj_paths',
  535. nargs='*',
  536. default=OBJ_PATHS,
  537. help="Description of where to find *.o files. May be a directory "
  538. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  539. parser.add_argument(
  540. '-v', '--verbose',
  541. action='store_true',
  542. help="Output commands that run behind the scenes.")
  543. parser.add_argument(
  544. '-q', '--quiet',
  545. action='store_true',
  546. help="Don't show anything, useful with -o.")
  547. parser.add_argument(
  548. '-o', '--output',
  549. help="Specify CSV file to store results.")
  550. parser.add_argument(
  551. '-u', '--use',
  552. help="Don't parse anything, use this CSV file.")
  553. parser.add_argument(
  554. '-d', '--diff',
  555. help="Specify CSV file to diff against.")
  556. parser.add_argument(
  557. '-a', '--all',
  558. action='store_true',
  559. help="Show all, not just the ones that changed.")
  560. parser.add_argument(
  561. '-p', '--percent',
  562. action='store_true',
  563. help="Only show percentage change, not a full diff.")
  564. parser.add_argument(
  565. '-b', '--by',
  566. action='append',
  567. choices=StructResult._by,
  568. help="Group by this field.")
  569. parser.add_argument(
  570. '-f', '--field',
  571. dest='fields',
  572. action='append',
  573. choices=StructResult._fields,
  574. help="Show this field.")
  575. parser.add_argument(
  576. '-D', '--define',
  577. dest='defines',
  578. action='append',
  579. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  580. help="Only include results where this field is this value.")
  581. class AppendSort(argparse.Action):
  582. def __call__(self, parser, namespace, value, option):
  583. if namespace.sort is None:
  584. namespace.sort = []
  585. namespace.sort.append((value, True if option == '-S' else False))
  586. parser.add_argument(
  587. '-s', '--sort',
  588. action=AppendSort,
  589. help="Sort by this field.")
  590. parser.add_argument(
  591. '-S', '--reverse-sort',
  592. action=AppendSort,
  593. help="Sort by this field, but backwards.")
  594. parser.add_argument(
  595. '-Y', '--summary',
  596. action='store_true',
  597. help="Only show the total.")
  598. parser.add_argument(
  599. '-F', '--source',
  600. dest='sources',
  601. action='append',
  602. help="Only consider definitions in this file. Defaults to anything "
  603. "in the current directory.")
  604. parser.add_argument(
  605. '--everything',
  606. action='store_true',
  607. help="Include builtin and libc specific symbols.")
  608. parser.add_argument(
  609. '--internal',
  610. action='store_true',
  611. help="Also show structs in .c files.")
  612. parser.add_argument(
  613. '--objdump-tool',
  614. type=lambda x: x.split(),
  615. default=OBJDUMP_TOOL,
  616. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  617. sys.exit(main(**{k: v
  618. for k, v in vars(parser.parse_intermixed_args()).items()
  619. if v is not None}))