struct_.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. # Example:
  6. # ./scripts/struct_.py lfs.o lfs_util.o -S
  7. #
  8. # Copyright (c) 2022, The littlefs authors.
  9. # SPDX-License-Identifier: BSD-3-Clause
  10. #
  11. import collections as co
  12. import csv
  13. import glob
  14. import itertools as it
  15. import math as m
  16. import os
  17. import re
  18. import shlex
  19. import subprocess as sp
  20. OBJ_PATHS = ['*.o']
  21. OBJDUMP_TOOL = ['objdump']
  22. # integer fields
  23. class Int(co.namedtuple('Int', 'x')):
  24. __slots__ = ()
  25. def __new__(cls, x=0):
  26. if isinstance(x, Int):
  27. return x
  28. if isinstance(x, str):
  29. try:
  30. x = int(x, 0)
  31. except ValueError:
  32. # also accept +-∞ and +-inf
  33. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  34. x = m.inf
  35. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  36. x = -m.inf
  37. else:
  38. raise
  39. assert isinstance(x, int) or m.isinf(x), x
  40. return super().__new__(cls, x)
  41. def __str__(self):
  42. if self.x == m.inf:
  43. return '∞'
  44. elif self.x == -m.inf:
  45. return '-∞'
  46. else:
  47. return str(self.x)
  48. def __int__(self):
  49. assert not m.isinf(self.x)
  50. return self.x
  51. def __float__(self):
  52. return float(self.x)
  53. none = '%7s' % '-'
  54. def table(self):
  55. return '%7s' % (self,)
  56. diff_none = '%7s' % '-'
  57. diff_table = table
  58. def diff_diff(self, other):
  59. new = self.x if self else 0
  60. old = other.x if other else 0
  61. diff = new - old
  62. if diff == +m.inf:
  63. return '%7s' % '+∞'
  64. elif diff == -m.inf:
  65. return '%7s' % '-∞'
  66. else:
  67. return '%+7d' % diff
  68. def ratio(self, other):
  69. new = self.x if self else 0
  70. old = other.x if other else 0
  71. if m.isinf(new) and m.isinf(old):
  72. return 0.0
  73. elif m.isinf(new):
  74. return +m.inf
  75. elif m.isinf(old):
  76. return -m.inf
  77. elif not old and not new:
  78. return 0.0
  79. elif not old:
  80. return 1.0
  81. else:
  82. return (new-old) / old
  83. def __add__(self, other):
  84. return self.__class__(self.x + other.x)
  85. def __sub__(self, other):
  86. return self.__class__(self.x - other.x)
  87. def __mul__(self, other):
  88. return self.__class__(self.x * other.x)
  89. # struct size results
  90. class StructResult(co.namedtuple('StructResult', ['file', 'struct', 'size'])):
  91. _by = ['file', 'struct']
  92. _fields = ['size']
  93. _types = {'size': Int}
  94. __slots__ = ()
  95. def __new__(cls, file='', struct='', size=0):
  96. return super().__new__(cls, file, struct,
  97. Int(size))
  98. def __add__(self, other):
  99. return StructResult(self.file, self.struct,
  100. self.size + other.size)
  101. def openio(path, mode='r'):
  102. if path == '-':
  103. if mode == 'r':
  104. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  105. else:
  106. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  107. else:
  108. return open(path, mode)
  109. def collect(paths, *,
  110. objdump_tool=OBJDUMP_TOOL,
  111. build_dir=None,
  112. everything=False,
  113. **args):
  114. decl_pattern = re.compile(
  115. '^\s+(?P<no>[0-9]+)'
  116. '\s+(?P<dir>[0-9]+)'
  117. '\s+.*'
  118. '\s+(?P<file>[^\s]+)$')
  119. struct_pattern = re.compile(
  120. '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
  121. '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  122. '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
  123. '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  124. results = []
  125. for path in paths:
  126. # find decl, we want to filter by structs in .h files
  127. decls = {}
  128. # note objdump-tool may contain extra args
  129. cmd = objdump_tool + ['--dwarf=rawline', path]
  130. if args.get('verbose'):
  131. print(' '.join(shlex.quote(c) for c in cmd))
  132. proc = sp.Popen(cmd,
  133. stdout=sp.PIPE,
  134. stderr=sp.PIPE if not args.get('verbose') else None,
  135. universal_newlines=True,
  136. errors='replace')
  137. for line in proc.stdout:
  138. # find file numbers
  139. m = decl_pattern.match(line)
  140. if m:
  141. decls[int(m.group('no'))] = m.group('file')
  142. proc.wait()
  143. if proc.returncode != 0:
  144. if not args.get('verbose'):
  145. for line in proc.stderr:
  146. sys.stdout.write(line)
  147. sys.exit(-1)
  148. # collect structs as we parse dwarf info
  149. found = False
  150. name = None
  151. decl = None
  152. size = None
  153. # note objdump-tool may contain extra args
  154. cmd = objdump_tool + ['--dwarf=info', path]
  155. if args.get('verbose'):
  156. print(' '.join(shlex.quote(c) for c in cmd))
  157. proc = sp.Popen(cmd,
  158. stdout=sp.PIPE,
  159. stderr=sp.PIPE if not args.get('verbose') else None,
  160. universal_newlines=True,
  161. errors='replace')
  162. for line in proc.stdout:
  163. # state machine here to find structs
  164. m = struct_pattern.match(line)
  165. if m:
  166. if m.group('tag'):
  167. if (name is not None
  168. and decl is not None
  169. and size is not None):
  170. file = decls.get(decl, '?')
  171. # map to source file
  172. file = re.sub('\.o$', '.c', file)
  173. if build_dir:
  174. file = re.sub(
  175. '%s/*' % re.escape(build_dir), '',
  176. file)
  177. # only include structs declared in header files in the
  178. # current directory, ignore internal-only structs (
  179. # these are represented in other measurements)
  180. if everything or file.endswith('.h'):
  181. results.append(StructResult(file, name, size))
  182. found = (m.group('tag') == 'structure_type')
  183. name = None
  184. decl = None
  185. size = None
  186. elif found and m.group('name'):
  187. name = m.group('name')
  188. elif found and name and m.group('decl'):
  189. decl = int(m.group('decl'))
  190. elif found and name and m.group('size'):
  191. size = int(m.group('size'))
  192. proc.wait()
  193. if proc.returncode != 0:
  194. if not args.get('verbose'):
  195. for line in proc.stderr:
  196. sys.stdout.write(line)
  197. sys.exit(-1)
  198. return results
  199. def fold(Result, results, *,
  200. by=None,
  201. defines=None,
  202. **_):
  203. if by is None:
  204. by = Result._by
  205. for k in it.chain(by or [], (k for k, _ in defines or [])):
  206. if k not in Result._by and k not in Result._fields:
  207. print("error: could not find field %r?" % k)
  208. sys.exit(-1)
  209. # filter by matching defines
  210. if defines is not None:
  211. results_ = []
  212. for r in results:
  213. if all(getattr(r, k) in vs for k, vs in defines):
  214. results_.append(r)
  215. results = results_
  216. # organize results into conflicts
  217. folding = co.OrderedDict()
  218. for r in results:
  219. name = tuple(getattr(r, k) for k in by)
  220. if name not in folding:
  221. folding[name] = []
  222. folding[name].append(r)
  223. # merge conflicts
  224. folded = []
  225. for name, rs in folding.items():
  226. folded.append(sum(rs[1:], start=rs[0]))
  227. return folded
  228. def table(Result, results, diff_results=None, *,
  229. by=None,
  230. fields=None,
  231. sort=None,
  232. summary=False,
  233. all=False,
  234. percent=False,
  235. **_):
  236. all_, all = all, __builtins__.all
  237. if by is None:
  238. by = Result._by
  239. if fields is None:
  240. fields = Result._fields
  241. types = Result._types
  242. # fold again
  243. results = fold(Result, results, by=by)
  244. if diff_results is not None:
  245. diff_results = fold(Result, diff_results, by=by)
  246. # organize by name
  247. table = {
  248. ','.join(str(getattr(r, k) or '') for k in by): r
  249. for r in results}
  250. diff_table = {
  251. ','.join(str(getattr(r, k) or '') for k in by): r
  252. for r in diff_results or []}
  253. names = list(table.keys() | diff_table.keys())
  254. # sort again, now with diff info, note that python's sort is stable
  255. names.sort()
  256. if diff_results is not None:
  257. names.sort(key=lambda n: tuple(
  258. types[k].ratio(
  259. getattr(table.get(n), k, None),
  260. getattr(diff_table.get(n), k, None))
  261. for k in fields),
  262. reverse=True)
  263. if sort:
  264. for k, reverse in reversed(sort):
  265. names.sort(key=lambda n: (getattr(table[n], k),)
  266. if getattr(table.get(n), k, None) is not None else (),
  267. reverse=reverse ^ (not k or k in Result._fields))
  268. # build up our lines
  269. lines = []
  270. # header
  271. line = []
  272. line.append('%s%s' % (
  273. ','.join(by),
  274. ' (%d added, %d removed)' % (
  275. sum(1 for n in table if n not in diff_table),
  276. sum(1 for n in diff_table if n not in table))
  277. if diff_results is not None and not percent else '')
  278. if not summary else '')
  279. if diff_results is None:
  280. for k in fields:
  281. line.append(k)
  282. elif percent:
  283. for k in fields:
  284. line.append(k)
  285. else:
  286. for k in fields:
  287. line.append('o'+k)
  288. for k in fields:
  289. line.append('n'+k)
  290. for k in fields:
  291. line.append('d'+k)
  292. line.append('')
  293. lines.append(line)
  294. # entries
  295. if not summary:
  296. for name in names:
  297. r = table.get(name)
  298. if diff_results is not None:
  299. diff_r = diff_table.get(name)
  300. ratios = [
  301. types[k].ratio(
  302. getattr(r, k, None),
  303. getattr(diff_r, k, None))
  304. for k in fields]
  305. if not any(ratios) and not all_:
  306. continue
  307. line = []
  308. line.append(name)
  309. if diff_results is None:
  310. for k in fields:
  311. line.append(getattr(r, k).table()
  312. if getattr(r, k, None) is not None
  313. else types[k].none)
  314. elif percent:
  315. for k in fields:
  316. line.append(getattr(r, k).diff_table()
  317. if getattr(r, k, None) is not None
  318. else types[k].diff_none)
  319. else:
  320. for k in fields:
  321. line.append(getattr(diff_r, k).diff_table()
  322. if getattr(diff_r, k, None) is not None
  323. else types[k].diff_none)
  324. for k in fields:
  325. line.append(getattr(r, k).diff_table()
  326. if getattr(r, k, None) is not None
  327. else types[k].diff_none)
  328. for k in fields:
  329. line.append(types[k].diff_diff(
  330. getattr(r, k, None),
  331. getattr(diff_r, k, None)))
  332. if diff_results is None:
  333. line.append('')
  334. elif percent:
  335. line.append(' (%s)' % ', '.join(
  336. '+∞%' if t == +m.inf
  337. else '-∞%' if t == -m.inf
  338. else '%+.1f%%' % (100*t)
  339. for t in ratios))
  340. else:
  341. line.append(' (%s)' % ', '.join(
  342. '+∞%' if t == +m.inf
  343. else '-∞%' if t == -m.inf
  344. else '%+.1f%%' % (100*t)
  345. for t in ratios
  346. if t)
  347. if any(ratios) else '')
  348. lines.append(line)
  349. # total
  350. r = next(iter(fold(Result, results, by=[])), None)
  351. if diff_results is not None:
  352. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  353. ratios = [
  354. types[k].ratio(
  355. getattr(r, k, None),
  356. getattr(diff_r, k, None))
  357. for k in fields]
  358. line = []
  359. line.append('TOTAL')
  360. if diff_results is None:
  361. for k in fields:
  362. line.append(getattr(r, k).table()
  363. if getattr(r, k, None) is not None
  364. else types[k].none)
  365. elif percent:
  366. for k in fields:
  367. line.append(getattr(r, k).diff_table()
  368. if getattr(r, k, None) is not None
  369. else types[k].diff_none)
  370. else:
  371. for k in fields:
  372. line.append(getattr(diff_r, k).diff_table()
  373. if getattr(diff_r, k, None) is not None
  374. else types[k].diff_none)
  375. for k in fields:
  376. line.append(getattr(r, k).diff_table()
  377. if getattr(r, k, None) is not None
  378. else types[k].diff_none)
  379. for k in fields:
  380. line.append(types[k].diff_diff(
  381. getattr(r, k, None),
  382. getattr(diff_r, k, None)))
  383. if diff_results is None:
  384. line.append('')
  385. elif percent:
  386. line.append(' (%s)' % ', '.join(
  387. '+∞%' if t == +m.inf
  388. else '-∞%' if t == -m.inf
  389. else '%+.1f%%' % (100*t)
  390. for t in ratios))
  391. else:
  392. line.append(' (%s)' % ', '.join(
  393. '+∞%' if t == +m.inf
  394. else '-∞%' if t == -m.inf
  395. else '%+.1f%%' % (100*t)
  396. for t in ratios
  397. if t)
  398. if any(ratios) else '')
  399. lines.append(line)
  400. # find the best widths, note that column 0 contains the names and column -1
  401. # the ratios, so those are handled a bit differently
  402. widths = [
  403. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  404. for w, i in zip(
  405. it.chain([23], it.repeat(7)),
  406. range(len(lines[0])-1))]
  407. # print our table
  408. for line in lines:
  409. print('%-*s %s%s' % (
  410. widths[0], line[0],
  411. ' '.join('%*s' % (w, x)
  412. for w, x in zip(widths[1:], line[1:-1])),
  413. line[-1]))
  414. def main(obj_paths, *,
  415. by=None,
  416. fields=None,
  417. defines=None,
  418. sort=None,
  419. **args):
  420. # find sizes
  421. if not args.get('use', None):
  422. # find .o files
  423. paths = []
  424. for path in obj_paths:
  425. if os.path.isdir(path):
  426. path = path + '/*.o'
  427. for path in glob.glob(path):
  428. paths.append(path)
  429. if not paths:
  430. print("error: no .obj files found in %r?" % obj_paths)
  431. sys.exit(-1)
  432. results = collect(paths, **args)
  433. else:
  434. results = []
  435. with openio(args['use']) as f:
  436. reader = csv.DictReader(f, restval='')
  437. for r in reader:
  438. try:
  439. results.append(StructResult(
  440. **{k: r[k] for k in StructResult._by
  441. if k in r and r[k].strip()},
  442. **{k: r['struct_'+k]
  443. for k in StructResult._fields
  444. if 'struct_'+k in r
  445. and r['struct_'+k].strip()}))
  446. except TypeError:
  447. pass
  448. # fold
  449. results = fold(StructResult, results, by=by, defines=defines)
  450. # sort, note that python's sort is stable
  451. results.sort()
  452. if sort:
  453. for k, reverse in reversed(sort):
  454. results.sort(key=lambda r: (getattr(r, k),)
  455. if getattr(r, k) is not None else (),
  456. reverse=reverse ^ (not k or k in StructResult._fields))
  457. # write results to CSV
  458. if args.get('output'):
  459. with openio(args['output'], 'w') as f:
  460. writer = csv.DictWriter(f, StructResult._by
  461. + ['struct_'+k for k in StructResult._fields])
  462. writer.writeheader()
  463. for r in results:
  464. writer.writerow(
  465. {k: getattr(r, k) for k in StructResult._by}
  466. | {'struct_'+k: getattr(r, k)
  467. for k in StructResult._fields})
  468. # find previous results?
  469. if args.get('diff'):
  470. diff_results = []
  471. try:
  472. with openio(args['diff']) as f:
  473. reader = csv.DictReader(f, restval='')
  474. for r in reader:
  475. try:
  476. diff_results.append(StructResult(
  477. **{k: r[k] for k in StructResult._by
  478. if k in r and r[k].strip()},
  479. **{k: r['struct_'+k]
  480. for k in StructResult._fields
  481. if 'struct_'+k in r
  482. and r['struct_'+k].strip()}))
  483. except TypeError:
  484. pass
  485. except FileNotFoundError:
  486. pass
  487. # fold
  488. diff_results = fold(StructResult, diff_results, by=by, defines=defines)
  489. # print table
  490. if not args.get('quiet'):
  491. table(StructResult, results,
  492. diff_results if args.get('diff') else None,
  493. by=by if by is not None else ['struct'],
  494. fields=fields,
  495. sort=sort,
  496. **args)
  497. if __name__ == "__main__":
  498. import argparse
  499. import sys
  500. parser = argparse.ArgumentParser(
  501. description="Find struct sizes.")
  502. parser.add_argument(
  503. 'obj_paths',
  504. nargs='*',
  505. default=OBJ_PATHS,
  506. help="Description of where to find *.o files. May be a directory "
  507. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  508. parser.add_argument(
  509. '-v', '--verbose',
  510. action='store_true',
  511. help="Output commands that run behind the scenes.")
  512. parser.add_argument(
  513. '-q', '--quiet',
  514. action='store_true',
  515. help="Don't show anything, useful with -o.")
  516. parser.add_argument(
  517. '-o', '--output',
  518. help="Specify CSV file to store results.")
  519. parser.add_argument(
  520. '-u', '--use',
  521. help="Don't parse anything, use this CSV file.")
  522. parser.add_argument(
  523. '-d', '--diff',
  524. help="Specify CSV file to diff against.")
  525. parser.add_argument(
  526. '-a', '--all',
  527. action='store_true',
  528. help="Show all, not just the ones that changed.")
  529. parser.add_argument(
  530. '-p', '--percent',
  531. action='store_true',
  532. help="Only show percentage change, not a full diff.")
  533. parser.add_argument(
  534. '-b', '--by',
  535. action='append',
  536. choices=StructResult._by,
  537. help="Group by this field.")
  538. parser.add_argument(
  539. '-f', '--field',
  540. dest='fields',
  541. action='append',
  542. choices=StructResult._fields,
  543. help="Show this field.")
  544. parser.add_argument(
  545. '-D', '--define',
  546. dest='defines',
  547. action='append',
  548. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  549. help="Only include results where this field is this value.")
  550. class AppendSort(argparse.Action):
  551. def __call__(self, parser, namespace, value, option):
  552. if namespace.sort is None:
  553. namespace.sort = []
  554. namespace.sort.append((value, True if option == '-S' else False))
  555. parser.add_argument(
  556. '-s', '--sort',
  557. action=AppendSort,
  558. help="Sort by this field.")
  559. parser.add_argument(
  560. '-S', '--reverse-sort',
  561. action=AppendSort,
  562. help="Sort by this field, but backwards.")
  563. parser.add_argument(
  564. '-Y', '--summary',
  565. action='store_true',
  566. help="Only show the total.")
  567. parser.add_argument(
  568. '-A', '--everything',
  569. action='store_true',
  570. help="Include builtin and libc specific symbols.")
  571. parser.add_argument(
  572. '--objdump-tool',
  573. type=lambda x: x.split(),
  574. default=OBJDUMP_TOOL,
  575. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  576. parser.add_argument(
  577. '--build-dir',
  578. help="Specify the relative build directory. Used to map object files "
  579. "to the correct source files.")
  580. sys.exit(main(**{k: v
  581. for k, v in vars(parser.parse_intermixed_args()).items()
  582. if v is not None}))