struct_.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. # Example:
  6. # ./scripts/struct_.py lfs.o lfs_util.o -S
  7. #
  8. # Copyright (c) 2022, The littlefs authors.
  9. # SPDX-License-Identifier: BSD-3-Clause
  10. #
  11. import collections as co
  12. import csv
  13. import glob
  14. import itertools as it
  15. import math as m
  16. import os
  17. import re
  18. import shlex
  19. import subprocess as sp
  20. OBJ_PATHS = ['*.o']
  21. OBJDUMP_TOOL = ['objdump']
  22. # integer fields
  23. class IntField(co.namedtuple('IntField', 'x')):
  24. __slots__ = ()
  25. def __new__(cls, x=0):
  26. if isinstance(x, IntField):
  27. return x
  28. if isinstance(x, str):
  29. try:
  30. x = int(x, 0)
  31. except ValueError:
  32. # also accept +-∞ and +-inf
  33. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  34. x = m.inf
  35. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  36. x = -m.inf
  37. else:
  38. raise
  39. assert isinstance(x, int) or m.isinf(x), x
  40. return super().__new__(cls, x)
  41. def __str__(self):
  42. if self.x == m.inf:
  43. return '∞'
  44. elif self.x == -m.inf:
  45. return '-∞'
  46. else:
  47. return str(self.x)
  48. def __int__(self):
  49. assert not m.isinf(self.x)
  50. return self.x
  51. def __float__(self):
  52. return float(self.x)
  53. none = '%7s' % '-'
  54. def table(self):
  55. return '%7s' % (self,)
  56. diff_none = '%7s' % '-'
  57. diff_table = table
  58. def diff_diff(self, other):
  59. new = self.x if self else 0
  60. old = other.x if other else 0
  61. diff = new - old
  62. if diff == +m.inf:
  63. return '%7s' % '+∞'
  64. elif diff == -m.inf:
  65. return '%7s' % '-∞'
  66. else:
  67. return '%+7d' % diff
  68. def ratio(self, other):
  69. new = self.x if self else 0
  70. old = other.x if other else 0
  71. if m.isinf(new) and m.isinf(old):
  72. return 0.0
  73. elif m.isinf(new):
  74. return +m.inf
  75. elif m.isinf(old):
  76. return -m.inf
  77. elif not old and not new:
  78. return 0.0
  79. elif not old:
  80. return 1.0
  81. else:
  82. return (new-old) / old
  83. def __add__(self, other):
  84. return IntField(self.x + other.x)
  85. def __sub__(self, other):
  86. return IntField(self.x - other.x)
  87. def __mul__(self, other):
  88. return IntField(self.x * other.x)
  89. def __lt__(self, other):
  90. return self.x < other.x
  91. def __gt__(self, other):
  92. return self.__class__.__lt__(other, self)
  93. def __le__(self, other):
  94. return not self.__gt__(other)
  95. def __ge__(self, other):
  96. return not self.__lt__(other)
  97. # struct size results
  98. class StructResult(co.namedtuple('StructResult', 'file,struct,struct_size')):
  99. __slots__ = ()
  100. def __new__(cls, file, struct, struct_size):
  101. return super().__new__(cls, file, struct, IntField(struct_size))
  102. def __add__(self, other):
  103. return StructResult(self.file, self.struct,
  104. self.struct_size + other.struct_size)
  105. def openio(path, mode='r'):
  106. if path == '-':
  107. if mode == 'r':
  108. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  109. else:
  110. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  111. else:
  112. return open(path, mode)
  113. def collect(paths, *,
  114. objdump_tool=OBJDUMP_TOOL,
  115. build_dir=None,
  116. everything=False,
  117. **args):
  118. decl_pattern = re.compile(
  119. '^\s+(?P<no>[0-9]+)'
  120. '\s+(?P<dir>[0-9]+)'
  121. '\s+.*'
  122. '\s+(?P<file>[^\s]+)$')
  123. struct_pattern = re.compile(
  124. '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
  125. '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  126. '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
  127. '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  128. results = []
  129. for path in paths:
  130. # find decl, we want to filter by structs in .h files
  131. decls = {}
  132. # note objdump-tool may contain extra args
  133. cmd = objdump_tool + ['--dwarf=rawline', path]
  134. if args.get('verbose'):
  135. print(' '.join(shlex.quote(c) for c in cmd))
  136. proc = sp.Popen(cmd,
  137. stdout=sp.PIPE,
  138. stderr=sp.PIPE if not args.get('verbose') else None,
  139. universal_newlines=True,
  140. errors='replace')
  141. for line in proc.stdout:
  142. # find file numbers
  143. m = decl_pattern.match(line)
  144. if m:
  145. decls[int(m.group('no'))] = m.group('file')
  146. proc.wait()
  147. if proc.returncode != 0:
  148. if not args.get('verbose'):
  149. for line in proc.stderr:
  150. sys.stdout.write(line)
  151. sys.exit(-1)
  152. # collect structs as we parse dwarf info
  153. found = False
  154. name = None
  155. decl = None
  156. size = None
  157. # note objdump-tool may contain extra args
  158. cmd = objdump_tool + ['--dwarf=info', path]
  159. if args.get('verbose'):
  160. print(' '.join(shlex.quote(c) for c in cmd))
  161. proc = sp.Popen(cmd,
  162. stdout=sp.PIPE,
  163. stderr=sp.PIPE if not args.get('verbose') else None,
  164. universal_newlines=True,
  165. errors='replace')
  166. for line in proc.stdout:
  167. # state machine here to find structs
  168. m = struct_pattern.match(line)
  169. if m:
  170. if m.group('tag'):
  171. if (name is not None
  172. and decl is not None
  173. and size is not None):
  174. file = decls.get(decl, '?')
  175. # map to source file
  176. file = re.sub('\.o$', '.c', file)
  177. if build_dir:
  178. file = re.sub(
  179. '%s/*' % re.escape(build_dir), '',
  180. file)
  181. # only include structs declared in header files in the
  182. # current directory, ignore internal-only structs (
  183. # these are represented in other measurements)
  184. if everything or file.endswith('.h'):
  185. results.append(StructResult(file, name, size))
  186. found = (m.group('tag') == 'structure_type')
  187. name = None
  188. decl = None
  189. size = None
  190. elif found and m.group('name'):
  191. name = m.group('name')
  192. elif found and name and m.group('decl'):
  193. decl = int(m.group('decl'))
  194. elif found and name and m.group('size'):
  195. size = int(m.group('size'))
  196. proc.wait()
  197. if proc.returncode != 0:
  198. if not args.get('verbose'):
  199. for line in proc.stderr:
  200. sys.stdout.write(line)
  201. sys.exit(-1)
  202. return results
  203. def fold(results, *,
  204. by=['file', 'struct'],
  205. **_):
  206. folding = co.OrderedDict()
  207. for r in results:
  208. name = tuple(getattr(r, k) for k in by)
  209. if name not in folding:
  210. folding[name] = []
  211. folding[name].append(r)
  212. folded = []
  213. for rs in folding.values():
  214. folded.append(sum(rs[1:], start=rs[0]))
  215. return folded
  216. def table(results, diff_results=None, *,
  217. by_file=False,
  218. size_sort=False,
  219. reverse_size_sort=False,
  220. summary=False,
  221. all=False,
  222. percent=False,
  223. **_):
  224. all_, all = all, __builtins__.all
  225. # fold
  226. results = fold(results, by=['file' if by_file else 'struct'])
  227. if diff_results is not None:
  228. diff_results = fold(diff_results,
  229. by=['file' if by_file else 'struct'])
  230. table = {
  231. r.file if by_file else r.struct: r
  232. for r in results}
  233. diff_table = {
  234. r.file if by_file else r.struct: r
  235. for r in diff_results or []}
  236. # sort, note that python's sort is stable
  237. names = list(table.keys() | diff_table.keys())
  238. names.sort()
  239. if diff_results is not None:
  240. names.sort(key=lambda n: -IntField.ratio(
  241. table[n].struct_size if n in table else None,
  242. diff_table[n].struct_size if n in diff_table else None))
  243. if size_sort:
  244. names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
  245. reverse=True)
  246. elif reverse_size_sort:
  247. names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
  248. reverse=False)
  249. # print header
  250. if not summary:
  251. title = '%s%s' % (
  252. 'file' if by_file else 'struct',
  253. ' (%d added, %d removed)' % (
  254. sum(1 for n in table if n not in diff_table),
  255. sum(1 for n in diff_table if n not in table))
  256. if diff_results is not None and not percent else '')
  257. name_width = max(it.chain([23, len(title)], (len(n) for n in names)))
  258. else:
  259. title = ''
  260. name_width = 23
  261. name_width = 4*((name_width+1+4-1)//4)-1
  262. print('%-*s ' % (name_width, title), end='')
  263. if diff_results is None:
  264. print(' %s' % ('size'.rjust(len(IntField.none))))
  265. elif percent:
  266. print(' %s' % ('size'.rjust(len(IntField.diff_none))))
  267. else:
  268. print(' %s %s %s' % (
  269. 'old'.rjust(len(IntField.diff_none)),
  270. 'new'.rjust(len(IntField.diff_none)),
  271. 'diff'.rjust(len(IntField.diff_none))))
  272. # print entries
  273. if not summary:
  274. for name in names:
  275. r = table.get(name)
  276. if diff_results is not None:
  277. diff_r = diff_table.get(name)
  278. ratio = IntField.ratio(
  279. r.struct_size if r else None,
  280. diff_r.struct_size if diff_r else None)
  281. if not ratio and not all_:
  282. continue
  283. print('%-*s ' % (name_width, name), end='')
  284. if diff_results is None:
  285. print(' %s' % (
  286. r.struct_size.table()
  287. if r else IntField.none))
  288. elif percent:
  289. print(' %s%s' % (
  290. r.struct_size.diff_table()
  291. if r else IntField.diff_none,
  292. ' (%s)' % (
  293. '+∞%' if ratio == +m.inf
  294. else '-∞%' if ratio == -m.inf
  295. else '%+.1f%%' % (100*ratio))))
  296. else:
  297. print(' %s %s %s%s' % (
  298. diff_r.struct_size.diff_table()
  299. if diff_r else IntField.diff_none,
  300. r.struct_size.diff_table()
  301. if r else IntField.diff_none,
  302. IntField.diff_diff(
  303. r.struct_size if r else None,
  304. diff_r.struct_size if diff_r else None)
  305. if r or diff_r else IntField.diff_none,
  306. ' (%s)' % (
  307. '+∞%' if ratio == +m.inf
  308. else '-∞%' if ratio == -m.inf
  309. else '%+.1f%%' % (100*ratio))
  310. if ratio else ''))
  311. # print total
  312. total = fold(results, by=[])
  313. r = total[0] if total else None
  314. if diff_results is not None:
  315. diff_total = fold(diff_results, by=[])
  316. diff_r = diff_total[0] if diff_total else None
  317. ratio = IntField.ratio(
  318. r.struct_size if r else None,
  319. diff_r.struct_size if diff_r else None)
  320. print('%-*s ' % (name_width, 'TOTAL'), end='')
  321. if diff_results is None:
  322. print(' %s' % (
  323. r.struct_size.table()
  324. if r else IntField.none))
  325. elif percent:
  326. print(' %s%s' % (
  327. r.struct_size.diff_table()
  328. if r else IntField.diff_none,
  329. ' (%s)' % (
  330. '+∞%' if ratio == +m.inf
  331. else '-∞%' if ratio == -m.inf
  332. else '%+.1f%%' % (100*ratio))))
  333. else:
  334. print(' %s %s %s%s' % (
  335. diff_r.struct_size.diff_table()
  336. if diff_r else IntField.diff_none,
  337. r.struct_size.diff_table()
  338. if r else IntField.diff_none,
  339. IntField.diff_diff(
  340. r.struct_size if r else None,
  341. diff_r.struct_size if diff_r else None)
  342. if r or diff_r else IntField.diff_none,
  343. ' (%s)' % (
  344. '+∞%' if ratio == +m.inf
  345. else '-∞%' if ratio == -m.inf
  346. else '%+.1f%%' % (100*ratio))
  347. if ratio else ''))
  348. def main(obj_paths, **args):
  349. # find sizes
  350. if not args.get('use', None):
  351. # find .o files
  352. paths = []
  353. for path in obj_paths:
  354. if os.path.isdir(path):
  355. path = path + '/*.o'
  356. for path in glob.glob(path):
  357. paths.append(path)
  358. if not paths:
  359. print('no .obj files found in %r?' % obj_paths)
  360. sys.exit(-1)
  361. results = collect(paths, **args)
  362. else:
  363. results = []
  364. with openio(args['use']) as f:
  365. reader = csv.DictReader(f, restval='')
  366. for r in reader:
  367. try:
  368. results.append(StructResult(**{
  369. k: v for k, v in r.items()
  370. if k in StructResult._fields}))
  371. except TypeError:
  372. pass
  373. # fold to remove duplicates
  374. results = fold(results)
  375. # sort because why not
  376. results.sort()
  377. # write results to CSV
  378. if args.get('output'):
  379. with openio(args['output'], 'w') as f:
  380. writer = csv.DictWriter(f, StructResult._fields)
  381. writer.writeheader()
  382. for r in results:
  383. writer.writerow(r._asdict())
  384. # find previous results?
  385. if args.get('diff'):
  386. diff_results = []
  387. try:
  388. with openio(args['diff']) as f:
  389. reader = csv.DictReader(f, restval='')
  390. for r in reader:
  391. try:
  392. diff_results.append(StructResult(**{
  393. k: v for k, v in r.items()
  394. if k in StructResult._fields}))
  395. except TypeError:
  396. pass
  397. except FileNotFoundError:
  398. pass
  399. # fold to remove duplicates
  400. diff_results = fold(diff_results)
  401. # print table
  402. if not args.get('quiet'):
  403. table(
  404. results,
  405. diff_results if args.get('diff') else None,
  406. **args)
  407. if __name__ == "__main__":
  408. import argparse
  409. import sys
  410. parser = argparse.ArgumentParser(
  411. description="Find struct sizes.")
  412. parser.add_argument(
  413. 'obj_paths',
  414. nargs='*',
  415. default=OBJ_PATHS,
  416. help="Description of where to find *.o files. May be a directory "
  417. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  418. parser.add_argument(
  419. '-v', '--verbose',
  420. action='store_true',
  421. help="Output commands that run behind the scenes.")
  422. parser.add_argument(
  423. '-q', '--quiet',
  424. action='store_true',
  425. help="Don't show anything, useful with -o.")
  426. parser.add_argument(
  427. '-o', '--output',
  428. help="Specify CSV file to store results.")
  429. parser.add_argument(
  430. '-u', '--use',
  431. help="Don't parse anything, use this CSV file.")
  432. parser.add_argument(
  433. '-d', '--diff',
  434. help="Specify CSV file to diff against.")
  435. parser.add_argument(
  436. '-a', '--all',
  437. action='store_true',
  438. help="Show all, not just the ones that changed.")
  439. parser.add_argument(
  440. '-p', '--percent',
  441. action='store_true',
  442. help="Only show percentage change, not a full diff.")
  443. parser.add_argument(
  444. '-b', '--by-file',
  445. action='store_true',
  446. help="Group by file. Note this does not include padding "
  447. "so sizes may differ from other tools.")
  448. parser.add_argument(
  449. '-s', '--size-sort',
  450. action='store_true',
  451. help="Sort by size.")
  452. parser.add_argument(
  453. '-S', '--reverse-size-sort',
  454. action='store_true',
  455. help="Sort by size, but backwards.")
  456. parser.add_argument(
  457. '-Y', '--summary',
  458. action='store_true',
  459. help="Only show the total size.")
  460. parser.add_argument(
  461. '-A', '--everything',
  462. action='store_true',
  463. help="Include builtin and libc specific symbols.")
  464. parser.add_argument(
  465. '--objdump-tool',
  466. type=lambda x: x.split(),
  467. default=OBJDUMP_TOOL,
  468. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  469. parser.add_argument(
  470. '--build-dir',
  471. help="Specify the relative build directory. Used to map object files "
  472. "to the correct source files.")
  473. sys.exit(main(**{k: v
  474. for k, v in vars(parser.parse_intermixed_args()).items()
  475. if v is not None}))