struct_.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. # Example:
  6. # ./scripts/struct_.py lfs.o lfs_util.o -S
  7. #
  8. # Copyright (c) 2022, The littlefs authors.
  9. # SPDX-License-Identifier: BSD-3-Clause
  10. #
  11. import collections as co
  12. import csv
  13. import glob
  14. import itertools as it
  15. import math as m
  16. import os
  17. import re
  18. import shlex
  19. import subprocess as sp
  20. OBJ_PATHS = ['*.o']
  21. OBJDUMP_TOOL = ['objdump']
  22. # integer fields
  23. class IntField(co.namedtuple('IntField', 'x')):
  24. __slots__ = ()
  25. def __new__(cls, x):
  26. if isinstance(x, IntField):
  27. return x
  28. if isinstance(x, str):
  29. try:
  30. x = int(x, 0)
  31. except ValueError:
  32. # also accept +-∞ and +-inf
  33. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  34. x = float('inf')
  35. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  36. x = float('-inf')
  37. else:
  38. raise
  39. return super().__new__(cls, x)
  40. def __int__(self):
  41. assert not m.isinf(self.x)
  42. return self.x
  43. def __float__(self):
  44. return float(self.x)
  45. def __str__(self):
  46. if self.x == float('inf'):
  47. return '∞'
  48. elif self.x == float('-inf'):
  49. return '-∞'
  50. else:
  51. return str(self.x)
  52. none = '%7s' % '-'
  53. def table(self):
  54. return '%7s' % (self,)
  55. diff_none = '%7s' % '-'
  56. diff_table = table
  57. def diff_diff(self, other):
  58. new = self.x if self else 0
  59. old = other.x if other else 0
  60. diff = new - old
  61. if diff == float('+inf'):
  62. return '%7s' % '+∞'
  63. elif diff == float('-inf'):
  64. return '%7s' % '-∞'
  65. else:
  66. return '%+7d' % diff
  67. def ratio(self, other):
  68. new = self.x if self else 0
  69. old = other.x if other else 0
  70. if m.isinf(new) and m.isinf(old):
  71. return 0.0
  72. elif m.isinf(new):
  73. return float('+inf')
  74. elif m.isinf(old):
  75. return float('-inf')
  76. elif not old and not new:
  77. return 0.0
  78. elif not old:
  79. return 1.0
  80. else:
  81. return (new-old) / old
  82. def __add__(self, other):
  83. return IntField(self.x + other.x)
  84. def __mul__(self, other):
  85. return IntField(self.x * other.x)
  86. def __lt__(self, other):
  87. return self.x < other.x
  88. def __gt__(self, other):
  89. return self.__class__.__lt__(other, self)
  90. def __le__(self, other):
  91. return not self.__gt__(other)
  92. def __ge__(self, other):
  93. return not self.__lt__(other)
  94. def __truediv__(self, n):
  95. if m.isinf(self.x):
  96. return self
  97. else:
  98. return IntField(round(self.x / n))
  99. # struct size results
  100. class StructResult(co.namedtuple('StructResult', 'file,struct,struct_size')):
  101. __slots__ = ()
  102. def __new__(cls, file, struct, struct_size):
  103. return super().__new__(cls, file, struct, IntField(struct_size))
  104. def __add__(self, other):
  105. return StructResult(self.file, self.struct,
  106. self.struct_size + other.struct_size)
  107. def openio(path, mode='r'):
  108. if path == '-':
  109. if mode == 'r':
  110. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  111. else:
  112. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  113. else:
  114. return open(path, mode)
  115. def collect(paths, *,
  116. objdump_tool=OBJDUMP_TOOL,
  117. build_dir=None,
  118. everything=False,
  119. **args):
  120. decl_pattern = re.compile(
  121. '^\s+(?P<no>[0-9]+)'
  122. '\s+(?P<dir>[0-9]+)'
  123. '\s+.*'
  124. '\s+(?P<file>[^\s]+)$')
  125. struct_pattern = re.compile(
  126. '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
  127. '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  128. '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
  129. '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  130. results = []
  131. for path in paths:
  132. # find decl, we want to filter by structs in .h files
  133. decls = {}
  134. # note objdump-tool may contain extra args
  135. cmd = objdump_tool + ['--dwarf=rawline', path]
  136. if args.get('verbose'):
  137. print(' '.join(shlex.quote(c) for c in cmd))
  138. proc = sp.Popen(cmd,
  139. stdout=sp.PIPE,
  140. stderr=sp.PIPE if not args.get('verbose') else None,
  141. universal_newlines=True,
  142. errors='replace')
  143. for line in proc.stdout:
  144. # find file numbers
  145. m = decl_pattern.match(line)
  146. if m:
  147. decls[int(m.group('no'))] = m.group('file')
  148. proc.wait()
  149. if proc.returncode != 0:
  150. if not args.get('verbose'):
  151. for line in proc.stderr:
  152. sys.stdout.write(line)
  153. sys.exit(-1)
  154. # collect structs as we parse dwarf info
  155. found = False
  156. name = None
  157. decl = None
  158. size = None
  159. # note objdump-tool may contain extra args
  160. cmd = objdump_tool + ['--dwarf=info', path]
  161. if args.get('verbose'):
  162. print(' '.join(shlex.quote(c) for c in cmd))
  163. proc = sp.Popen(cmd,
  164. stdout=sp.PIPE,
  165. stderr=sp.PIPE if not args.get('verbose') else None,
  166. universal_newlines=True,
  167. errors='replace')
  168. for line in proc.stdout:
  169. # state machine here to find structs
  170. m = struct_pattern.match(line)
  171. if m:
  172. if m.group('tag'):
  173. if (name is not None
  174. and decl is not None
  175. and size is not None):
  176. file = decls.get(decl, '?')
  177. # map to source file
  178. file = re.sub('\.o$', '.c', file)
  179. if build_dir:
  180. file = re.sub(
  181. '%s/*' % re.escape(build_dir), '',
  182. file)
  183. # only include structs declared in header files in the
  184. # current directory, ignore internal-only structs (
  185. # these are represented in other measurements)
  186. if everything or file.endswith('.h'):
  187. results.append(StructResult(file, name, size))
  188. found = (m.group('tag') == 'structure_type')
  189. name = None
  190. decl = None
  191. size = None
  192. elif found and m.group('name'):
  193. name = m.group('name')
  194. elif found and name and m.group('decl'):
  195. decl = int(m.group('decl'))
  196. elif found and name and m.group('size'):
  197. size = int(m.group('size'))
  198. proc.wait()
  199. if proc.returncode != 0:
  200. if not args.get('verbose'):
  201. for line in proc.stderr:
  202. sys.stdout.write(line)
  203. sys.exit(-1)
  204. return results
  205. def fold(results, *,
  206. by=['file', 'struct'],
  207. **_):
  208. folding = co.OrderedDict()
  209. for r in results:
  210. name = tuple(getattr(r, k) for k in by)
  211. if name not in folding:
  212. folding[name] = []
  213. folding[name].append(r)
  214. folded = []
  215. for rs in folding.values():
  216. folded.append(sum(rs[1:], start=rs[0]))
  217. return folded
  218. def table(results, diff_results=None, *,
  219. by_file=False,
  220. size_sort=False,
  221. reverse_size_sort=False,
  222. summary=False,
  223. all=False,
  224. percent=False,
  225. **_):
  226. all_, all = all, __builtins__.all
  227. # fold
  228. results = fold(results, by=['file' if by_file else 'struct'])
  229. if diff_results is not None:
  230. diff_results = fold(diff_results,
  231. by=['file' if by_file else 'struct'])
  232. table = {
  233. r.file if by_file else r.struct: r
  234. for r in results}
  235. diff_table = {
  236. r.file if by_file else r.struct: r
  237. for r in diff_results or []}
  238. # sort, note that python's sort is stable
  239. names = list(table.keys() | diff_table.keys())
  240. names.sort()
  241. if diff_results is not None:
  242. names.sort(key=lambda n: -IntField.ratio(
  243. table[n].struct_size if n in table else None,
  244. diff_table[n].struct_size if n in diff_table else None))
  245. if size_sort:
  246. names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
  247. reverse=True)
  248. elif reverse_size_sort:
  249. names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
  250. reverse=False)
  251. # print header
  252. print('%-36s' % ('%s%s' % (
  253. 'file' if by_file else 'struct',
  254. ' (%d added, %d removed)' % (
  255. sum(1 for n in table if n not in diff_table),
  256. sum(1 for n in diff_table if n not in table))
  257. if diff_results is not None and not percent else '')
  258. if not summary else ''),
  259. end='')
  260. if diff_results is None:
  261. print(' %s' % ('size'.rjust(len(IntField.none))))
  262. elif percent:
  263. print(' %s' % ('size'.rjust(len(IntField.diff_none))))
  264. else:
  265. print(' %s %s %s' % (
  266. 'old'.rjust(len(IntField.diff_none)),
  267. 'new'.rjust(len(IntField.diff_none)),
  268. 'diff'.rjust(len(IntField.diff_none))))
  269. # print entries
  270. if not summary:
  271. for name in names:
  272. r = table.get(name)
  273. if diff_results is not None:
  274. diff_r = diff_table.get(name)
  275. ratio = IntField.ratio(
  276. r.struct_size if r else None,
  277. diff_r.struct_size if diff_r else None)
  278. if not ratio and not all_:
  279. continue
  280. print('%-36s' % name, end='')
  281. if diff_results is None:
  282. print(' %s' % (
  283. r.struct_size.table()
  284. if r else IntField.none))
  285. elif percent:
  286. print(' %s%s' % (
  287. r.struct_size.diff_table()
  288. if r else IntField.diff_none,
  289. ' (%s)' % (
  290. '+∞%' if ratio == float('+inf')
  291. else '-∞%' if ratio == float('-inf')
  292. else '%+.1f%%' % (100*ratio))))
  293. else:
  294. print(' %s %s %s%s' % (
  295. diff_r.struct_size.diff_table()
  296. if diff_r else IntField.diff_none,
  297. r.struct_size.diff_table()
  298. if r else IntField.diff_none,
  299. IntField.diff_diff(
  300. r.struct_size if r else None,
  301. diff_r.struct_size if diff_r else None)
  302. if r or diff_r else IntField.diff_none,
  303. ' (%s)' % (
  304. '+∞%' if ratio == float('+inf')
  305. else '-∞%' if ratio == float('-inf')
  306. else '%+.1f%%' % (100*ratio))
  307. if ratio else ''))
  308. # print total
  309. total = fold(results, by=[])
  310. r = total[0] if total else None
  311. if diff_results is not None:
  312. diff_total = fold(diff_results, by=[])
  313. diff_r = diff_total[0] if diff_total else None
  314. ratio = IntField.ratio(
  315. r.struct_size if r else None,
  316. diff_r.struct_size if diff_r else None)
  317. print('%-36s' % 'TOTAL', end='')
  318. if diff_results is None:
  319. print(' %s' % (
  320. r.struct_size.table()
  321. if r else IntField.none))
  322. elif percent:
  323. print(' %s%s' % (
  324. r.struct_size.diff_table()
  325. if r else IntField.diff_none,
  326. ' (%s)' % (
  327. '+∞%' if ratio == float('+inf')
  328. else '-∞%' if ratio == float('-inf')
  329. else '%+.1f%%' % (100*ratio))))
  330. else:
  331. print(' %s %s %s%s' % (
  332. diff_r.struct_size.diff_table()
  333. if diff_r else IntField.diff_none,
  334. r.struct_size.diff_table()
  335. if r else IntField.diff_none,
  336. IntField.diff_diff(
  337. r.struct_size if r else None,
  338. diff_r.struct_size if diff_r else None)
  339. if r or diff_r else IntField.diff_none,
  340. ' (%s)' % (
  341. '+∞%' if ratio == float('+inf')
  342. else '-∞%' if ratio == float('-inf')
  343. else '%+.1f%%' % (100*ratio))
  344. if ratio else ''))
  345. def main(obj_paths, **args):
  346. # find sizes
  347. if not args.get('use', None):
  348. # find .o files
  349. paths = []
  350. for path in obj_paths:
  351. if os.path.isdir(path):
  352. path = path + '/*.o'
  353. for path in glob.glob(path):
  354. paths.append(path)
  355. if not paths:
  356. print('no .obj files found in %r?' % obj_paths)
  357. sys.exit(-1)
  358. results = collect(paths, **args)
  359. else:
  360. results = []
  361. with openio(args['use']) as f:
  362. reader = csv.DictReader(f)
  363. for r in reader:
  364. try:
  365. results.append(StructResult(**{
  366. k: v for k, v in r.items()
  367. if k in StructResult._fields}))
  368. except TypeError:
  369. pass
  370. # fold to remove duplicates
  371. results = fold(results)
  372. # sort because why not
  373. results.sort()
  374. # write results to CSV
  375. if args.get('output'):
  376. with openio(args['output'], 'w') as f:
  377. writer = csv.DictWriter(f, StructResult._fields)
  378. writer.writeheader()
  379. for r in results:
  380. writer.writerow(r._asdict())
  381. # find previous results?
  382. if args.get('diff'):
  383. diff_results = []
  384. try:
  385. with openio(args['diff']) as f:
  386. reader = csv.DictReader(f)
  387. for r in reader:
  388. try:
  389. diff_results.append(StructResult(**{
  390. k: v for k, v in r.items()
  391. if k in StructResult._fields}))
  392. except TypeError:
  393. pass
  394. except FileNotFoundError:
  395. pass
  396. # fold to remove duplicates
  397. diff_results = fold(diff_results)
  398. # print table
  399. if not args.get('quiet'):
  400. table(
  401. results,
  402. diff_results if args.get('diff') else None,
  403. **args)
  404. if __name__ == "__main__":
  405. import argparse
  406. import sys
  407. parser = argparse.ArgumentParser(
  408. description="Find struct sizes.")
  409. parser.add_argument(
  410. 'obj_paths',
  411. nargs='*',
  412. default=OBJ_PATHS,
  413. help="Description of where to find *.o files. May be a directory "
  414. "or a list of paths. Defaults to %r." % OBJ_PATHS)
  415. parser.add_argument(
  416. '-v', '--verbose',
  417. action='store_true',
  418. help="Output commands that run behind the scenes.")
  419. parser.add_argument(
  420. '-q', '--quiet',
  421. action='store_true',
  422. help="Don't show anything, useful with -o.")
  423. parser.add_argument(
  424. '-o', '--output',
  425. help="Specify CSV file to store results.")
  426. parser.add_argument(
  427. '-u', '--use',
  428. help="Don't parse anything, use this CSV file.")
  429. parser.add_argument(
  430. '-d', '--diff',
  431. help="Specify CSV file to diff against.")
  432. parser.add_argument(
  433. '-a', '--all',
  434. action='store_true',
  435. help="Show all, not just the ones that changed.")
  436. parser.add_argument(
  437. '-p', '--percent',
  438. action='store_true',
  439. help="Only show percentage change, not a full diff.")
  440. parser.add_argument(
  441. '-b', '--by-file',
  442. action='store_true',
  443. help="Group by file. Note this does not include padding "
  444. "so sizes may differ from other tools.")
  445. parser.add_argument(
  446. '-s', '--size-sort',
  447. action='store_true',
  448. help="Sort by size.")
  449. parser.add_argument(
  450. '-S', '--reverse-size-sort',
  451. action='store_true',
  452. help="Sort by size, but backwards.")
  453. parser.add_argument(
  454. '-Y', '--summary',
  455. action='store_true',
  456. help="Only show the total size.")
  457. parser.add_argument(
  458. '-A', '--everything',
  459. action='store_true',
  460. help="Include builtin and libc specific symbols.")
  461. parser.add_argument(
  462. '--objdump-tool',
  463. type=lambda x: x.split(),
  464. default=OBJDUMP_TOOL,
  465. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  466. parser.add_argument(
  467. '--build-dir',
  468. help="Specify the relative build directory. Used to map object files "
  469. "to the correct source files.")
  470. sys.exit(main(**{k: v
  471. for k, v in vars(parser.parse_intermixed_args()).items()
  472. if v is not None}))