struct_.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. # Example:
  6. # ./scripts/struct_.py lfs.o lfs_util.o -Ssize
  7. #
  8. # Copyright (c) 2022, The littlefs authors.
  9. # SPDX-License-Identifier: BSD-3-Clause
  10. #
  11. import collections as co
  12. import csv
  13. import difflib
  14. import glob
  15. import itertools as it
  16. import math as m
  17. import os
  18. import re
  19. import shlex
  20. import subprocess as sp
  21. OBJ_PATHS = ['*.o']
  22. OBJDUMP_TOOL = ['objdump']
  23. # integer fields
  24. class Int(co.namedtuple('Int', 'x')):
  25. __slots__ = ()
  26. def __new__(cls, x=0):
  27. if isinstance(x, Int):
  28. return x
  29. if isinstance(x, str):
  30. try:
  31. x = int(x, 0)
  32. except ValueError:
  33. # also accept +-∞ and +-inf
  34. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  35. x = m.inf
  36. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  37. x = -m.inf
  38. else:
  39. raise
  40. assert isinstance(x, int) or m.isinf(x), x
  41. return super().__new__(cls, x)
  42. def __str__(self):
  43. if self.x == m.inf:
  44. return '∞'
  45. elif self.x == -m.inf:
  46. return '-∞'
  47. else:
  48. return str(self.x)
  49. def __int__(self):
  50. assert not m.isinf(self.x)
  51. return self.x
  52. def __float__(self):
  53. return float(self.x)
  54. none = '%7s' % '-'
  55. def table(self):
  56. return '%7s' % (self,)
  57. diff_none = '%7s' % '-'
  58. diff_table = table
  59. def diff_diff(self, other):
  60. new = self.x if self else 0
  61. old = other.x if other else 0
  62. diff = new - old
  63. if diff == +m.inf:
  64. return '%7s' % '+∞'
  65. elif diff == -m.inf:
  66. return '%7s' % '-∞'
  67. else:
  68. return '%+7d' % diff
  69. def ratio(self, other):
  70. new = self.x if self else 0
  71. old = other.x if other else 0
  72. if m.isinf(new) and m.isinf(old):
  73. return 0.0
  74. elif m.isinf(new):
  75. return +m.inf
  76. elif m.isinf(old):
  77. return -m.inf
  78. elif not old and not new:
  79. return 0.0
  80. elif not old:
  81. return 1.0
  82. else:
  83. return (new-old) / old
  84. def __add__(self, other):
  85. return self.__class__(self.x + other.x)
  86. def __sub__(self, other):
  87. return self.__class__(self.x - other.x)
  88. def __mul__(self, other):
  89. return self.__class__(self.x * other.x)
  90. # struct size results
  91. class StructResult(co.namedtuple('StructResult', ['file', 'struct', 'size'])):
  92. _by = ['file', 'struct']
  93. _fields = ['size']
  94. _types = {'size': Int}
  95. __slots__ = ()
  96. def __new__(cls, file='', struct='', size=0):
  97. return super().__new__(cls, file, struct,
  98. Int(size))
  99. def __add__(self, other):
  100. return StructResult(self.file, self.struct,
  101. self.size + other.size)
  102. def openio(path, mode='r'):
  103. if path == '-':
  104. if mode == 'r':
  105. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  106. else:
  107. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  108. else:
  109. return open(path, mode)
  110. def collect(obj_paths, *,
  111. objdump_tool=OBJDUMP_TOOL,
  112. sources=None,
  113. everything=False,
  114. internal=False,
  115. **args):
  116. line_pattern = re.compile(
  117. '^\s+(?P<no>[0-9]+)\s+'
  118. '(?:(?P<dir>[0-9]+)\s+)?'
  119. '.*\s+'
  120. '(?P<path>[^\s]+)$')
  121. info_pattern = re.compile(
  122. '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
  123. '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  124. '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
  125. '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  126. results = []
  127. for path in obj_paths:
  128. # find files, we want to filter by structs in .h files
  129. dirs = {}
  130. files = {}
  131. # note objdump-tool may contain extra args
  132. cmd = objdump_tool + ['--dwarf=rawline', path]
  133. if args.get('verbose'):
  134. print(' '.join(shlex.quote(c) for c in cmd))
  135. proc = sp.Popen(cmd,
  136. stdout=sp.PIPE,
  137. stderr=sp.PIPE if not args.get('verbose') else None,
  138. universal_newlines=True,
  139. errors='replace',
  140. close_fds=False)
  141. for line in proc.stdout:
  142. # note that files contain references to dirs, which we
  143. # dereference as soon as we see them as each file table follows a
  144. # dir table
  145. m = line_pattern.match(line)
  146. if m:
  147. if not m.group('dir'):
  148. # found a directory entry
  149. dirs[int(m.group('no'))] = m.group('path')
  150. else:
  151. # found a file entry
  152. dir = int(m.group('dir'))
  153. if dir in dirs:
  154. files[int(m.group('no'))] = os.path.join(
  155. dirs[dir],
  156. m.group('path'))
  157. else:
  158. files[int(m.group('no'))] = m.group('path')
  159. proc.wait()
  160. if proc.returncode != 0:
  161. if not args.get('verbose'):
  162. for line in proc.stderr:
  163. sys.stdout.write(line)
  164. sys.exit(-1)
  165. # collect structs as we parse dwarf info
  166. results_ = []
  167. is_struct = False
  168. s_name = None
  169. s_file = None
  170. s_size = None
  171. # note objdump-tool may contain extra args
  172. cmd = objdump_tool + ['--dwarf=info', path]
  173. if args.get('verbose'):
  174. print(' '.join(shlex.quote(c) for c in cmd))
  175. proc = sp.Popen(cmd,
  176. stdout=sp.PIPE,
  177. stderr=sp.PIPE if not args.get('verbose') else None,
  178. universal_newlines=True,
  179. errors='replace',
  180. close_fds=False)
  181. for line in proc.stdout:
  182. # state machine here to find structs
  183. m = info_pattern.match(line)
  184. if m:
  185. if m.group('tag'):
  186. if is_struct:
  187. file = files.get(s_file, '?')
  188. results_.append(StructResult(file, s_name, s_size))
  189. is_struct = (m.group('tag') == 'DW_TAG_structure_type')
  190. elif m.group('name'):
  191. s_name = m.group('name')
  192. elif m.group('file'):
  193. s_file = int(m.group('file'))
  194. elif m.group('size'):
  195. s_size = int(m.group('size'))
  196. if is_struct:
  197. file = files.get(s_file, '?')
  198. results_.append(StructResult(file, s_name, s_size))
  199. proc.wait()
  200. if proc.returncode != 0:
  201. if not args.get('verbose'):
  202. for line in proc.stderr:
  203. sys.stdout.write(line)
  204. sys.exit(-1)
  205. for r in results_:
  206. # ignore filtered sources
  207. if sources is not None:
  208. if not any(
  209. os.path.abspath(r.file) == os.path.abspath(s)
  210. for s in sources):
  211. continue
  212. else:
  213. # default to only cwd
  214. if not everything and not os.path.commonpath([
  215. os.getcwd(),
  216. os.path.abspath(r.file)]) == os.getcwd():
  217. continue
  218. # limit to .h files unless --internal
  219. if not internal and not r.file.endswith('.h'):
  220. continue
  221. # simplify path
  222. if os.path.commonpath([
  223. os.getcwd(),
  224. os.path.abspath(r.file)]) == os.getcwd():
  225. file = os.path.relpath(r.file)
  226. else:
  227. file = os.path.abspath(r.file)
  228. results.append(r._replace(file=file))
  229. return results
  230. def fold(Result, results, *,
  231. by=None,
  232. defines=None,
  233. **_):
  234. if by is None:
  235. by = Result._by
  236. for k in it.chain(by or [], (k for k, _ in defines or [])):
  237. if k not in Result._by and k not in Result._fields:
  238. print("error: could not find field %r?" % k)
  239. sys.exit(-1)
  240. # filter by matching defines
  241. if defines is not None:
  242. results_ = []
  243. for r in results:
  244. if all(getattr(r, k) in vs for k, vs in defines):
  245. results_.append(r)
  246. results = results_
  247. # organize results into conflicts
  248. folding = co.OrderedDict()
  249. for r in results:
  250. name = tuple(getattr(r, k) for k in by)
  251. if name not in folding:
  252. folding[name] = []
  253. folding[name].append(r)
  254. # merge conflicts
  255. folded = []
  256. for name, rs in folding.items():
  257. folded.append(sum(rs[1:], start=rs[0]))
  258. return folded
  259. def table(Result, results, diff_results=None, *,
  260. by=None,
  261. fields=None,
  262. sort=None,
  263. summary=False,
  264. all=False,
  265. percent=False,
  266. **_):
  267. all_, all = all, __builtins__.all
  268. if by is None:
  269. by = Result._by
  270. if fields is None:
  271. fields = Result._fields
  272. types = Result._types
  273. # fold again
  274. results = fold(Result, results, by=by)
  275. if diff_results is not None:
  276. diff_results = fold(Result, diff_results, by=by)
  277. # organize by name
  278. table = {
  279. ','.join(str(getattr(r, k) or '') for k in by): r
  280. for r in results}
  281. diff_table = {
  282. ','.join(str(getattr(r, k) or '') for k in by): r
  283. for r in diff_results or []}
  284. names = list(table.keys() | diff_table.keys())
  285. # sort again, now with diff info, note that python's sort is stable
  286. names.sort()
  287. if diff_results is not None:
  288. names.sort(key=lambda n: tuple(
  289. types[k].ratio(
  290. getattr(table.get(n), k, None),
  291. getattr(diff_table.get(n), k, None))
  292. for k in fields),
  293. reverse=True)
  294. if sort:
  295. for k, reverse in reversed(sort):
  296. names.sort(key=lambda n: (getattr(table[n], k),)
  297. if getattr(table.get(n), k, None) is not None else (),
  298. reverse=reverse ^ (not k or k in Result._fields))
  299. # build up our lines
  300. lines = []
  301. # header
  302. header = []
  303. header.append('%s%s' % (
  304. ','.join(by),
  305. ' (%d added, %d removed)' % (
  306. sum(1 for n in table if n not in diff_table),
  307. sum(1 for n in diff_table if n not in table))
  308. if diff_results is not None and not percent else '')
  309. if not summary else '')
  310. if diff_results is None:
  311. for k in fields:
  312. header.append(k)
  313. elif percent:
  314. for k in fields:
  315. header.append(k)
  316. else:
  317. for k in fields:
  318. header.append('o'+k)
  319. for k in fields:
  320. header.append('n'+k)
  321. for k in fields:
  322. header.append('d'+k)
  323. header.append('')
  324. lines.append(header)
  325. def table_entry(name, r, diff_r=None, ratios=[]):
  326. entry = []
  327. entry.append(name)
  328. if diff_results is None:
  329. for k in fields:
  330. entry.append(getattr(r, k).table()
  331. if getattr(r, k, None) is not None
  332. else types[k].none)
  333. elif percent:
  334. for k in fields:
  335. entry.append(getattr(r, k).diff_table()
  336. if getattr(r, k, None) is not None
  337. else types[k].diff_none)
  338. else:
  339. for k in fields:
  340. entry.append(getattr(diff_r, k).diff_table()
  341. if getattr(diff_r, k, None) is not None
  342. else types[k].diff_none)
  343. for k in fields:
  344. entry.append(getattr(r, k).diff_table()
  345. if getattr(r, k, None) is not None
  346. else types[k].diff_none)
  347. for k in fields:
  348. entry.append(types[k].diff_diff(
  349. getattr(r, k, None),
  350. getattr(diff_r, k, None)))
  351. if diff_results is None:
  352. entry.append('')
  353. elif percent:
  354. entry.append(' (%s)' % ', '.join(
  355. '+∞%' if t == +m.inf
  356. else '-∞%' if t == -m.inf
  357. else '%+.1f%%' % (100*t)
  358. for t in ratios))
  359. else:
  360. entry.append(' (%s)' % ', '.join(
  361. '+∞%' if t == +m.inf
  362. else '-∞%' if t == -m.inf
  363. else '%+.1f%%' % (100*t)
  364. for t in ratios
  365. if t)
  366. if any(ratios) else '')
  367. return entry
  368. # entries
  369. if not summary:
  370. for name in names:
  371. r = table.get(name)
  372. if diff_results is None:
  373. diff_r = None
  374. ratios = None
  375. else:
  376. diff_r = diff_table.get(name)
  377. ratios = [
  378. types[k].ratio(
  379. getattr(r, k, None),
  380. getattr(diff_r, k, None))
  381. for k in fields]
  382. if not all_ and not any(ratios):
  383. continue
  384. lines.append(table_entry(name, r, diff_r, ratios))
  385. # total
  386. r = next(iter(fold(Result, results, by=[])), None)
  387. if diff_results is None:
  388. diff_r = None
  389. ratios = None
  390. else:
  391. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  392. ratios = [
  393. types[k].ratio(
  394. getattr(r, k, None),
  395. getattr(diff_r, k, None))
  396. for k in fields]
  397. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  398. # find the best widths, note that column 0 contains the names and column -1
  399. # the ratios, so those are handled a bit differently
  400. widths = [
  401. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  402. for w, i in zip(
  403. it.chain([23], it.repeat(7)),
  404. range(len(lines[0])-1))]
  405. # print our table
  406. for line in lines:
  407. print('%-*s %s%s' % (
  408. widths[0], line[0],
  409. ' '.join('%*s' % (w, x)
  410. for w, x in zip(widths[1:], line[1:-1])),
  411. line[-1]))
  412. def main(obj_paths, *,
  413. by=None,
  414. fields=None,
  415. defines=None,
  416. sort=None,
  417. **args):
  418. # find sizes
  419. if not args.get('use', None):
  420. # find .o files
  421. paths = []
  422. for path in obj_paths:
  423. if os.path.isdir(path):
  424. path = path + '/*.o'
  425. for path in glob.glob(path):
  426. paths.append(path)
  427. if not paths:
  428. print("error: no .o files found in %r?" % obj_paths)
  429. sys.exit(-1)
  430. results = collect(paths, **args)
  431. else:
  432. results = []
  433. with openio(args['use']) as f:
  434. reader = csv.DictReader(f, restval='')
  435. for r in reader:
  436. try:
  437. results.append(StructResult(
  438. **{k: r[k] for k in StructResult._by
  439. if k in r and r[k].strip()},
  440. **{k: r['struct_'+k]
  441. for k in StructResult._fields
  442. if 'struct_'+k in r
  443. and r['struct_'+k].strip()}))
  444. except TypeError:
  445. pass
  446. # fold
  447. results = fold(StructResult, results, by=by, defines=defines)
  448. # sort, note that python's sort is stable
  449. results.sort()
  450. if sort:
  451. for k, reverse in reversed(sort):
  452. results.sort(key=lambda r: (getattr(r, k),)
  453. if getattr(r, k) is not None else (),
  454. reverse=reverse ^ (not k or k in StructResult._fields))
  455. # write results to CSV
  456. if args.get('output'):
  457. with openio(args['output'], 'w') as f:
  458. writer = csv.DictWriter(f,
  459. (by if by is not None else StructResult._by)
  460. + ['struct_'+k for k in StructResult._fields])
  461. writer.writeheader()
  462. for r in results:
  463. writer.writerow(
  464. {k: getattr(r, k)
  465. for k in (by if by is not None else StructResult._by)}
  466. | {'struct_'+k: getattr(r, k)
  467. for k in StructResult._fields})
  468. # find previous results?
  469. if args.get('diff'):
  470. diff_results = []
  471. try:
  472. with openio(args['diff']) as f:
  473. reader = csv.DictReader(f, restval='')
  474. for r in reader:
  475. try:
  476. diff_results.append(StructResult(
  477. **{k: r[k] for k in StructResult._by
  478. if k in r and r[k].strip()},
  479. **{k: r['struct_'+k]
  480. for k in StructResult._fields
  481. if 'struct_'+k in r
  482. and r['struct_'+k].strip()}))
  483. except TypeError:
  484. pass
  485. except FileNotFoundError:
  486. pass
  487. # fold
  488. diff_results = fold(StructResult, diff_results, by=by, defines=defines)
  489. # print table
  490. if not args.get('quiet'):
  491. table(StructResult, results,
  492. diff_results if args.get('diff') else None,
  493. by=by if by is not None else ['struct'],
  494. fields=fields,
  495. sort=sort,
  496. **args)
  497. if __name__ == "__main__":
  498. import argparse
  499. import sys
  500. parser = argparse.ArgumentParser(
  501. description="Find struct sizes.",
  502. allow_abbrev=False)
  503. parser.add_argument(
  504. 'obj_paths',
  505. nargs='*',
  506. default=OBJ_PATHS,
  507. help="Description of where to find *.o files. May be a directory "
  508. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  509. parser.add_argument(
  510. '-v', '--verbose',
  511. action='store_true',
  512. help="Output commands that run behind the scenes.")
  513. parser.add_argument(
  514. '-q', '--quiet',
  515. action='store_true',
  516. help="Don't show anything, useful with -o.")
  517. parser.add_argument(
  518. '-o', '--output',
  519. help="Specify CSV file to store results.")
  520. parser.add_argument(
  521. '-u', '--use',
  522. help="Don't parse anything, use this CSV file.")
  523. parser.add_argument(
  524. '-d', '--diff',
  525. help="Specify CSV file to diff against.")
  526. parser.add_argument(
  527. '-a', '--all',
  528. action='store_true',
  529. help="Show all, not just the ones that changed.")
  530. parser.add_argument(
  531. '-p', '--percent',
  532. action='store_true',
  533. help="Only show percentage change, not a full diff.")
  534. parser.add_argument(
  535. '-b', '--by',
  536. action='append',
  537. choices=StructResult._by,
  538. help="Group by this field.")
  539. parser.add_argument(
  540. '-f', '--field',
  541. dest='fields',
  542. action='append',
  543. choices=StructResult._fields,
  544. help="Show this field.")
  545. parser.add_argument(
  546. '-D', '--define',
  547. dest='defines',
  548. action='append',
  549. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  550. help="Only include results where this field is this value.")
  551. class AppendSort(argparse.Action):
  552. def __call__(self, parser, namespace, value, option):
  553. if namespace.sort is None:
  554. namespace.sort = []
  555. namespace.sort.append((value, True if option == '-S' else False))
  556. parser.add_argument(
  557. '-s', '--sort',
  558. action=AppendSort,
  559. help="Sort by this field.")
  560. parser.add_argument(
  561. '-S', '--reverse-sort',
  562. action=AppendSort,
  563. help="Sort by this field, but backwards.")
  564. parser.add_argument(
  565. '-Y', '--summary',
  566. action='store_true',
  567. help="Only show the total.")
  568. parser.add_argument(
  569. '-F', '--source',
  570. dest='sources',
  571. action='append',
  572. help="Only consider definitions in this file. Defaults to anything "
  573. "in the current directory.")
  574. parser.add_argument(
  575. '--everything',
  576. action='store_true',
  577. help="Include builtin and libc specific symbols.")
  578. parser.add_argument(
  579. '--internal',
  580. action='store_true',
  581. help="Also show structs in .c files.")
  582. parser.add_argument(
  583. '--objdump-tool',
  584. type=lambda x: x.split(),
  585. default=OBJDUMP_TOOL,
  586. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  587. sys.exit(main(**{k: v
  588. for k, v in vars(parser.parse_intermixed_args()).items()
  589. if v is not None}))