struct_.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. # Example:
  6. # ./scripts/struct_.py lfs.o lfs_util.o -S
  7. #
  8. # Copyright (c) 2022, The littlefs authors.
  9. # SPDX-License-Identifier: BSD-3-Clause
  10. #
  11. import collections as co
  12. import csv
  13. import glob
  14. import itertools as it
  15. import math as m
  16. import os
  17. import re
  18. import shlex
  19. import subprocess as sp
  20. OBJ_PATHS = ['*.o']
  21. OBJDUMP_TOOL = ['objdump']
  22. # integer fields
  23. class IntField(co.namedtuple('IntField', 'x')):
  24. __slots__ = ()
  25. def __new__(cls, x=0):
  26. if isinstance(x, IntField):
  27. return x
  28. if isinstance(x, str):
  29. try:
  30. x = int(x, 0)
  31. except ValueError:
  32. # also accept +-∞ and +-inf
  33. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  34. x = m.inf
  35. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  36. x = -m.inf
  37. else:
  38. raise
  39. assert isinstance(x, int) or m.isinf(x), x
  40. return super().__new__(cls, x)
  41. def __str__(self):
  42. if self.x == m.inf:
  43. return '∞'
  44. elif self.x == -m.inf:
  45. return '-∞'
  46. else:
  47. return str(self.x)
  48. def __int__(self):
  49. assert not m.isinf(self.x)
  50. return self.x
  51. def __float__(self):
  52. return float(self.x)
  53. none = '%7s' % '-'
  54. def table(self):
  55. return '%7s' % (self,)
  56. diff_none = '%7s' % '-'
  57. diff_table = table
  58. def diff_diff(self, other):
  59. new = self.x if self else 0
  60. old = other.x if other else 0
  61. diff = new - old
  62. if diff == +m.inf:
  63. return '%7s' % '+∞'
  64. elif diff == -m.inf:
  65. return '%7s' % '-∞'
  66. else:
  67. return '%+7d' % diff
  68. def ratio(self, other):
  69. new = self.x if self else 0
  70. old = other.x if other else 0
  71. if m.isinf(new) and m.isinf(old):
  72. return 0.0
  73. elif m.isinf(new):
  74. return +m.inf
  75. elif m.isinf(old):
  76. return -m.inf
  77. elif not old and not new:
  78. return 0.0
  79. elif not old:
  80. return 1.0
  81. else:
  82. return (new-old) / old
  83. def __add__(self, other):
  84. return IntField(self.x + other.x)
  85. def __sub__(self, other):
  86. return IntField(self.x - other.x)
  87. def __mul__(self, other):
  88. return IntField(self.x * other.x)
  89. def __lt__(self, other):
  90. return self.x < other.x
  91. def __gt__(self, other):
  92. return self.__class__.__lt__(other, self)
  93. def __le__(self, other):
  94. return not self.__gt__(other)
  95. def __ge__(self, other):
  96. return not self.__lt__(other)
  97. # struct size results
  98. class StructResult(co.namedtuple('StructResult', 'file,struct,struct_size')):
  99. __slots__ = ()
  100. def __new__(cls, file, struct, struct_size):
  101. return super().__new__(cls, file, struct, IntField(struct_size))
  102. def __add__(self, other):
  103. return StructResult(self.file, self.struct,
  104. self.struct_size + other.struct_size)
  105. def openio(path, mode='r'):
  106. if path == '-':
  107. if mode == 'r':
  108. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  109. else:
  110. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  111. else:
  112. return open(path, mode)
  113. def collect(paths, *,
  114. objdump_tool=OBJDUMP_TOOL,
  115. build_dir=None,
  116. everything=False,
  117. **args):
  118. decl_pattern = re.compile(
  119. '^\s+(?P<no>[0-9]+)'
  120. '\s+(?P<dir>[0-9]+)'
  121. '\s+.*'
  122. '\s+(?P<file>[^\s]+)$')
  123. struct_pattern = re.compile(
  124. '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
  125. '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  126. '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
  127. '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  128. results = []
  129. for path in paths:
  130. # find decl, we want to filter by structs in .h files
  131. decls = {}
  132. # note objdump-tool may contain extra args
  133. cmd = objdump_tool + ['--dwarf=rawline', path]
  134. if args.get('verbose'):
  135. print(' '.join(shlex.quote(c) for c in cmd))
  136. proc = sp.Popen(cmd,
  137. stdout=sp.PIPE,
  138. stderr=sp.PIPE if not args.get('verbose') else None,
  139. universal_newlines=True,
  140. errors='replace')
  141. for line in proc.stdout:
  142. # find file numbers
  143. m = decl_pattern.match(line)
  144. if m:
  145. decls[int(m.group('no'))] = m.group('file')
  146. proc.wait()
  147. if proc.returncode != 0:
  148. if not args.get('verbose'):
  149. for line in proc.stderr:
  150. sys.stdout.write(line)
  151. sys.exit(-1)
  152. # collect structs as we parse dwarf info
  153. found = False
  154. name = None
  155. decl = None
  156. size = None
  157. # note objdump-tool may contain extra args
  158. cmd = objdump_tool + ['--dwarf=info', path]
  159. if args.get('verbose'):
  160. print(' '.join(shlex.quote(c) for c in cmd))
  161. proc = sp.Popen(cmd,
  162. stdout=sp.PIPE,
  163. stderr=sp.PIPE if not args.get('verbose') else None,
  164. universal_newlines=True,
  165. errors='replace')
  166. for line in proc.stdout:
  167. # state machine here to find structs
  168. m = struct_pattern.match(line)
  169. if m:
  170. if m.group('tag'):
  171. if (name is not None
  172. and decl is not None
  173. and size is not None):
  174. file = decls.get(decl, '?')
  175. # map to source file
  176. file = re.sub('\.o$', '.c', file)
  177. if build_dir:
  178. file = re.sub(
  179. '%s/*' % re.escape(build_dir), '',
  180. file)
  181. # only include structs declared in header files in the
  182. # current directory, ignore internal-only structs (
  183. # these are represented in other measurements)
  184. if everything or file.endswith('.h'):
  185. results.append(StructResult(file, name, size))
  186. found = (m.group('tag') == 'structure_type')
  187. name = None
  188. decl = None
  189. size = None
  190. elif found and m.group('name'):
  191. name = m.group('name')
  192. elif found and name and m.group('decl'):
  193. decl = int(m.group('decl'))
  194. elif found and name and m.group('size'):
  195. size = int(m.group('size'))
  196. proc.wait()
  197. if proc.returncode != 0:
  198. if not args.get('verbose'):
  199. for line in proc.stderr:
  200. sys.stdout.write(line)
  201. sys.exit(-1)
  202. return results
  203. def fold(results, *,
  204. by=['file', 'struct'],
  205. **_):
  206. folding = co.OrderedDict()
  207. for r in results:
  208. name = tuple(getattr(r, k) for k in by)
  209. if name not in folding:
  210. folding[name] = []
  211. folding[name].append(r)
  212. folded = []
  213. for rs in folding.values():
  214. folded.append(sum(rs[1:], start=rs[0]))
  215. return folded
  216. def table(results, diff_results=None, *,
  217. by_file=False,
  218. size_sort=False,
  219. reverse_size_sort=False,
  220. summary=False,
  221. all=False,
  222. percent=False,
  223. **_):
  224. all_, all = all, __builtins__.all
  225. # fold
  226. results = fold(results, by=['file' if by_file else 'struct'])
  227. if diff_results is not None:
  228. diff_results = fold(diff_results,
  229. by=['file' if by_file else 'struct'])
  230. table = {
  231. r.file if by_file else r.struct: r
  232. for r in results}
  233. diff_table = {
  234. r.file if by_file else r.struct: r
  235. for r in diff_results or []}
  236. # sort, note that python's sort is stable
  237. names = list(table.keys() | diff_table.keys())
  238. names.sort()
  239. if diff_results is not None:
  240. names.sort(key=lambda n: -IntField.ratio(
  241. table[n].struct_size if n in table else None,
  242. diff_table[n].struct_size if n in diff_table else None))
  243. if size_sort:
  244. names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
  245. reverse=True)
  246. elif reverse_size_sort:
  247. names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
  248. reverse=False)
  249. # print header
  250. print('%-36s' % ('%s%s' % (
  251. 'file' if by_file else 'struct',
  252. ' (%d added, %d removed)' % (
  253. sum(1 for n in table if n not in diff_table),
  254. sum(1 for n in diff_table if n not in table))
  255. if diff_results is not None and not percent else '')
  256. if not summary else ''),
  257. end='')
  258. if diff_results is None:
  259. print(' %s' % ('size'.rjust(len(IntField.none))))
  260. elif percent:
  261. print(' %s' % ('size'.rjust(len(IntField.diff_none))))
  262. else:
  263. print(' %s %s %s' % (
  264. 'old'.rjust(len(IntField.diff_none)),
  265. 'new'.rjust(len(IntField.diff_none)),
  266. 'diff'.rjust(len(IntField.diff_none))))
  267. # print entries
  268. if not summary:
  269. for name in names:
  270. r = table.get(name)
  271. if diff_results is not None:
  272. diff_r = diff_table.get(name)
  273. ratio = IntField.ratio(
  274. r.struct_size if r else None,
  275. diff_r.struct_size if diff_r else None)
  276. if not ratio and not all_:
  277. continue
  278. print('%-36s' % name, end='')
  279. if diff_results is None:
  280. print(' %s' % (
  281. r.struct_size.table()
  282. if r else IntField.none))
  283. elif percent:
  284. print(' %s%s' % (
  285. r.struct_size.diff_table()
  286. if r else IntField.diff_none,
  287. ' (%s)' % (
  288. '+∞%' if ratio == +m.inf
  289. else '-∞%' if ratio == -m.inf
  290. else '%+.1f%%' % (100*ratio))))
  291. else:
  292. print(' %s %s %s%s' % (
  293. diff_r.struct_size.diff_table()
  294. if diff_r else IntField.diff_none,
  295. r.struct_size.diff_table()
  296. if r else IntField.diff_none,
  297. IntField.diff_diff(
  298. r.struct_size if r else None,
  299. diff_r.struct_size if diff_r else None)
  300. if r or diff_r else IntField.diff_none,
  301. ' (%s)' % (
  302. '+∞%' if ratio == +m.inf
  303. else '-∞%' if ratio == -m.inf
  304. else '%+.1f%%' % (100*ratio))
  305. if ratio else ''))
  306. # print total
  307. total = fold(results, by=[])
  308. r = total[0] if total else None
  309. if diff_results is not None:
  310. diff_total = fold(diff_results, by=[])
  311. diff_r = diff_total[0] if diff_total else None
  312. ratio = IntField.ratio(
  313. r.struct_size if r else None,
  314. diff_r.struct_size if diff_r else None)
  315. print('%-36s' % 'TOTAL', end='')
  316. if diff_results is None:
  317. print(' %s' % (
  318. r.struct_size.table()
  319. if r else IntField.none))
  320. elif percent:
  321. print(' %s%s' % (
  322. r.struct_size.diff_table()
  323. if r else IntField.diff_none,
  324. ' (%s)' % (
  325. '+∞%' if ratio == +m.inf
  326. else '-∞%' if ratio == -m.inf
  327. else '%+.1f%%' % (100*ratio))))
  328. else:
  329. print(' %s %s %s%s' % (
  330. diff_r.struct_size.diff_table()
  331. if diff_r else IntField.diff_none,
  332. r.struct_size.diff_table()
  333. if r else IntField.diff_none,
  334. IntField.diff_diff(
  335. r.struct_size if r else None,
  336. diff_r.struct_size if diff_r else None)
  337. if r or diff_r else IntField.diff_none,
  338. ' (%s)' % (
  339. '+∞%' if ratio == +m.inf
  340. else '-∞%' if ratio == -m.inf
  341. else '%+.1f%%' % (100*ratio))
  342. if ratio else ''))
  343. def main(obj_paths, **args):
  344. # find sizes
  345. if not args.get('use', None):
  346. # find .o files
  347. paths = []
  348. for path in obj_paths:
  349. if os.path.isdir(path):
  350. path = path + '/*.o'
  351. for path in glob.glob(path):
  352. paths.append(path)
  353. if not paths:
  354. print('no .obj files found in %r?' % obj_paths)
  355. sys.exit(-1)
  356. results = collect(paths, **args)
  357. else:
  358. results = []
  359. with openio(args['use']) as f:
  360. reader = csv.DictReader(f, restval='')
  361. for r in reader:
  362. try:
  363. results.append(StructResult(**{
  364. k: v for k, v in r.items()
  365. if k in StructResult._fields}))
  366. except TypeError:
  367. pass
  368. # fold to remove duplicates
  369. results = fold(results)
  370. # sort because why not
  371. results.sort()
  372. # write results to CSV
  373. if args.get('output'):
  374. with openio(args['output'], 'w') as f:
  375. writer = csv.DictWriter(f, StructResult._fields)
  376. writer.writeheader()
  377. for r in results:
  378. writer.writerow(r._asdict())
  379. # find previous results?
  380. if args.get('diff'):
  381. diff_results = []
  382. try:
  383. with openio(args['diff']) as f:
  384. reader = csv.DictReader(f, restval='')
  385. for r in reader:
  386. try:
  387. diff_results.append(StructResult(**{
  388. k: v for k, v in r.items()
  389. if k in StructResult._fields}))
  390. except TypeError:
  391. pass
  392. except FileNotFoundError:
  393. pass
  394. # fold to remove duplicates
  395. diff_results = fold(diff_results)
  396. # print table
  397. if not args.get('quiet'):
  398. table(
  399. results,
  400. diff_results if args.get('diff') else None,
  401. **args)
  402. if __name__ == "__main__":
  403. import argparse
  404. import sys
  405. parser = argparse.ArgumentParser(
  406. description="Find struct sizes.")
  407. parser.add_argument(
  408. 'obj_paths',
  409. nargs='*',
  410. default=OBJ_PATHS,
  411. help="Description of where to find *.o files. May be a directory "
  412. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  413. parser.add_argument(
  414. '-v', '--verbose',
  415. action='store_true',
  416. help="Output commands that run behind the scenes.")
  417. parser.add_argument(
  418. '-q', '--quiet',
  419. action='store_true',
  420. help="Don't show anything, useful with -o.")
  421. parser.add_argument(
  422. '-o', '--output',
  423. help="Specify CSV file to store results.")
  424. parser.add_argument(
  425. '-u', '--use',
  426. help="Don't parse anything, use this CSV file.")
  427. parser.add_argument(
  428. '-d', '--diff',
  429. help="Specify CSV file to diff against.")
  430. parser.add_argument(
  431. '-a', '--all',
  432. action='store_true',
  433. help="Show all, not just the ones that changed.")
  434. parser.add_argument(
  435. '-p', '--percent',
  436. action='store_true',
  437. help="Only show percentage change, not a full diff.")
  438. parser.add_argument(
  439. '-b', '--by-file',
  440. action='store_true',
  441. help="Group by file. Note this does not include padding "
  442. "so sizes may differ from other tools.")
  443. parser.add_argument(
  444. '-s', '--size-sort',
  445. action='store_true',
  446. help="Sort by size.")
  447. parser.add_argument(
  448. '-S', '--reverse-size-sort',
  449. action='store_true',
  450. help="Sort by size, but backwards.")
  451. parser.add_argument(
  452. '-Y', '--summary',
  453. action='store_true',
  454. help="Only show the total size.")
  455. parser.add_argument(
  456. '-A', '--everything',
  457. action='store_true',
  458. help="Include builtin and libc specific symbols.")
  459. parser.add_argument(
  460. '--objdump-tool',
  461. type=lambda x: x.split(),
  462. default=OBJDUMP_TOOL,
  463. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  464. parser.add_argument(
  465. '--build-dir',
  466. help="Specify the relative build directory. Used to map object files "
  467. "to the correct source files.")
  468. sys.exit(main(**{k: v
  469. for k, v in vars(parser.parse_intermixed_args()).items()
  470. if v is not None}))