struct.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. import collections as co
  6. import csv
  7. import glob
  8. import itertools as it
  9. import math as m
  10. import os
  11. import re
  12. import shlex
  13. import subprocess as sp
  14. OBJ_PATHS = ['*.o']
  15. OBJDUMP_TOOL = ['objdump']
  16. # integer fields
  17. class IntField(co.namedtuple('IntField', 'x')):
  18. __slots__ = ()
  19. def __new__(cls, x):
  20. if isinstance(x, IntField):
  21. return x
  22. if isinstance(x, str):
  23. try:
  24. x = int(x, 0)
  25. except ValueError:
  26. # also accept +-∞ and +-inf
  27. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  28. x = float('inf')
  29. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  30. x = float('-inf')
  31. else:
  32. raise
  33. return super().__new__(cls, x)
  34. def __int__(self):
  35. assert not m.isinf(self.x)
  36. return self.x
  37. def __float__(self):
  38. return float(self.x)
  39. def __str__(self):
  40. if self.x == float('inf'):
  41. return '∞'
  42. elif self.x == float('-inf'):
  43. return '-∞'
  44. else:
  45. return str(self.x)
  46. none = '%7s' % '-'
  47. def table(self):
  48. return '%7s' % (self,)
  49. diff_none = '%7s' % '-'
  50. diff_table = table
  51. def diff_diff(self, other):
  52. new = self.x if self else 0
  53. old = other.x if other else 0
  54. diff = new - old
  55. if diff == float('+inf'):
  56. return '%7s' % '+∞'
  57. elif diff == float('-inf'):
  58. return '%7s' % '-∞'
  59. else:
  60. return '%+7d' % diff
  61. def ratio(self, other):
  62. new = self.x if self else 0
  63. old = other.x if other else 0
  64. if m.isinf(new) and m.isinf(old):
  65. return 0.0
  66. elif m.isinf(new):
  67. return float('+inf')
  68. elif m.isinf(old):
  69. return float('-inf')
  70. elif not old and not new:
  71. return 0.0
  72. elif not old:
  73. return 1.0
  74. else:
  75. return (new-old) / old
  76. def __add__(self, other):
  77. return IntField(self.x + other.x)
  78. def __mul__(self, other):
  79. return IntField(self.x * other.x)
  80. def __lt__(self, other):
  81. return self.x < other.x
  82. def __gt__(self, other):
  83. return self.__class__.__lt__(other, self)
  84. def __le__(self, other):
  85. return not self.__gt__(other)
  86. def __ge__(self, other):
  87. return not self.__lt__(other)
  88. def __truediv__(self, n):
  89. if m.isinf(self.x):
  90. return self
  91. else:
  92. return IntField(round(self.x / n))
  93. # struct size results
  94. class StructResult(co.namedtuple('StructResult', 'file,struct,struct_size')):
  95. __slots__ = ()
  96. def __new__(cls, file, struct, struct_size):
  97. return super().__new__(cls, file, struct, IntField(struct_size))
  98. def __add__(self, other):
  99. return StructResult(self.file, self.struct,
  100. self.struct_size + other.struct_size)
  101. def openio(path, mode='r'):
  102. if path == '-':
  103. if 'r' in mode:
  104. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  105. else:
  106. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  107. else:
  108. return open(path, mode)
  109. def collect(paths, *,
  110. objdump_tool=OBJDUMP_TOOL,
  111. build_dir=None,
  112. everything=False,
  113. **args):
  114. decl_pattern = re.compile(
  115. '^\s+(?P<no>[0-9]+)'
  116. '\s+(?P<dir>[0-9]+)'
  117. '\s+.*'
  118. '\s+(?P<file>[^\s]+)$')
  119. struct_pattern = re.compile(
  120. '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
  121. '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  122. '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
  123. '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  124. results = []
  125. for path in paths:
  126. # find decl, we want to filter by structs in .h files
  127. decls = {}
  128. # note objdump-tool may contain extra args
  129. cmd = objdump_tool + ['--dwarf=rawline', path]
  130. if args.get('verbose'):
  131. print(' '.join(shlex.quote(c) for c in cmd))
  132. proc = sp.Popen(cmd,
  133. stdout=sp.PIPE,
  134. stderr=sp.PIPE if not args.get('verbose') else None,
  135. universal_newlines=True,
  136. errors='replace')
  137. for line in proc.stdout:
  138. # find file numbers
  139. m = decl_pattern.match(line)
  140. if m:
  141. decls[int(m.group('no'))] = m.group('file')
  142. proc.wait()
  143. if proc.returncode != 0:
  144. if not args.get('verbose'):
  145. for line in proc.stderr:
  146. sys.stdout.write(line)
  147. sys.exit(-1)
  148. # collect structs as we parse dwarf info
  149. found = False
  150. name = None
  151. decl = None
  152. size = None
  153. # note objdump-tool may contain extra args
  154. cmd = objdump_tool + ['--dwarf=info', path]
  155. if args.get('verbose'):
  156. print(' '.join(shlex.quote(c) for c in cmd))
  157. proc = sp.Popen(cmd,
  158. stdout=sp.PIPE,
  159. stderr=sp.PIPE if not args.get('verbose') else None,
  160. universal_newlines=True,
  161. errors='replace')
  162. for line in proc.stdout:
  163. # state machine here to find structs
  164. m = struct_pattern.match(line)
  165. if m:
  166. if m.group('tag'):
  167. if (name is not None
  168. and decl is not None
  169. and size is not None):
  170. file = decls.get(decl, '?')
  171. # map to source file
  172. file = re.sub('\.o$', '.c', file)
  173. if build_dir:
  174. file = re.sub(
  175. '%s/*' % re.escape(build_dir), '',
  176. file)
  177. # only include structs declared in header files in the
  178. # current directory, ignore internal-only structs (
  179. # these are represented in other measurements)
  180. if everything or file.endswith('.h'):
  181. results.append(StructResult(file, name, size))
  182. found = (m.group('tag') == 'structure_type')
  183. name = None
  184. decl = None
  185. size = None
  186. elif found and m.group('name'):
  187. name = m.group('name')
  188. elif found and name and m.group('decl'):
  189. decl = int(m.group('decl'))
  190. elif found and name and m.group('size'):
  191. size = int(m.group('size'))
  192. proc.wait()
  193. if proc.returncode != 0:
  194. if not args.get('verbose'):
  195. for line in proc.stderr:
  196. sys.stdout.write(line)
  197. sys.exit(-1)
  198. return results
  199. def fold(results, *,
  200. by=['file', 'struct'],
  201. **_):
  202. folding = co.OrderedDict()
  203. for r in results:
  204. name = tuple(getattr(r, k) for k in by)
  205. if name not in folding:
  206. folding[name] = []
  207. folding[name].append(r)
  208. folded = []
  209. for rs in folding.values():
  210. folded.append(sum(rs[1:], start=rs[0]))
  211. return folded
  212. def table(results, diff_results=None, *,
  213. by_file=False,
  214. size_sort=False,
  215. reverse_size_sort=False,
  216. summary=False,
  217. all=False,
  218. percent=False,
  219. **_):
  220. all_, all = all, __builtins__.all
  221. # fold
  222. results = fold(results, by=['file' if by_file else 'struct'])
  223. if diff_results is not None:
  224. diff_results = fold(diff_results,
  225. by=['file' if by_file else 'struct'])
  226. table = {
  227. r.file if by_file else r.struct: r
  228. for r in results}
  229. diff_table = {
  230. r.file if by_file else r.struct: r
  231. for r in diff_results or []}
  232. # sort, note that python's sort is stable
  233. names = list(table.keys() | diff_table.keys())
  234. names.sort()
  235. if diff_results is not None:
  236. names.sort(key=lambda n: -IntField.ratio(
  237. table[n].struct_size if n in table else None,
  238. diff_table[n].struct_size if n in diff_table else None))
  239. if size_sort:
  240. names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
  241. reverse=True)
  242. elif reverse_size_sort:
  243. names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
  244. reverse=False)
  245. # print header
  246. print('%-36s' % ('%s%s' % (
  247. 'file' if by_file else 'struct',
  248. ' (%d added, %d removed)' % (
  249. sum(1 for n in table if n not in diff_table),
  250. sum(1 for n in diff_table if n not in table))
  251. if diff_results is not None and not percent else '')
  252. if not summary else ''),
  253. end='')
  254. if diff_results is None:
  255. print(' %s' % ('size'.rjust(len(IntField.none))))
  256. elif percent:
  257. print(' %s' % ('size'.rjust(len(IntField.diff_none))))
  258. else:
  259. print(' %s %s %s' % (
  260. 'old'.rjust(len(IntField.diff_none)),
  261. 'new'.rjust(len(IntField.diff_none)),
  262. 'diff'.rjust(len(IntField.diff_none))))
  263. # print entries
  264. if not summary:
  265. for name in names:
  266. r = table.get(name)
  267. if diff_results is not None:
  268. diff_r = diff_table.get(name)
  269. ratio = IntField.ratio(
  270. r.struct_size if r else None,
  271. diff_r.struct_size if diff_r else None)
  272. if not ratio and not all_:
  273. continue
  274. print('%-36s' % name, end='')
  275. if diff_results is None:
  276. print(' %s' % (
  277. r.struct_size.table()
  278. if r else IntField.none))
  279. elif percent:
  280. print(' %s%s' % (
  281. r.struct_size.diff_table()
  282. if r else IntField.diff_none,
  283. ' (%s)' % (
  284. '+∞%' if ratio == float('+inf')
  285. else '-∞%' if ratio == float('-inf')
  286. else '%+.1f%%' % (100*ratio))))
  287. else:
  288. print(' %s %s %s%s' % (
  289. diff_r.struct_size.diff_table()
  290. if diff_r else IntField.diff_none,
  291. r.struct_size.diff_table()
  292. if r else IntField.diff_none,
  293. IntField.diff_diff(
  294. r.struct_size if r else None,
  295. diff_r.struct_size if diff_r else None)
  296. if r or diff_r else IntField.diff_none,
  297. ' (%s)' % (
  298. '+∞%' if ratio == float('+inf')
  299. else '-∞%' if ratio == float('-inf')
  300. else '%+.1f%%' % (100*ratio))
  301. if ratio else ''))
  302. # print total
  303. total = fold(results, by=[])
  304. r = total[0] if total else None
  305. if diff_results is not None:
  306. diff_total = fold(diff_results, by=[])
  307. diff_r = diff_total[0] if diff_total else None
  308. ratio = IntField.ratio(
  309. r.struct_size if r else None,
  310. diff_r.struct_size if diff_r else None)
  311. print('%-36s' % 'TOTAL', end='')
  312. if diff_results is None:
  313. print(' %s' % (
  314. r.struct_size.table()
  315. if r else IntField.none))
  316. elif percent:
  317. print(' %s%s' % (
  318. r.struct_size.diff_table()
  319. if r else IntField.diff_none,
  320. ' (%s)' % (
  321. '+∞%' if ratio == float('+inf')
  322. else '-∞%' if ratio == float('-inf')
  323. else '%+.1f%%' % (100*ratio))))
  324. else:
  325. print(' %s %s %s%s' % (
  326. diff_r.struct_size.diff_table()
  327. if diff_r else IntField.diff_none,
  328. r.struct_size.diff_table()
  329. if r else IntField.diff_none,
  330. IntField.diff_diff(
  331. r.struct_size if r else None,
  332. diff_r.struct_size if diff_r else None)
  333. if r or diff_r else IntField.diff_none,
  334. ' (%s)' % (
  335. '+∞%' if ratio == float('+inf')
  336. else '-∞%' if ratio == float('-inf')
  337. else '%+.1f%%' % (100*ratio))
  338. if ratio else ''))
  339. def main(obj_paths, **args):
  340. # find sizes
  341. if not args.get('use', None):
  342. # find .o files
  343. paths = []
  344. for path in obj_paths:
  345. if os.path.isdir(path):
  346. path = path + '/*.o'
  347. for path in glob.glob(path):
  348. paths.append(path)
  349. if not paths:
  350. print('no .obj files found in %r?' % obj_paths)
  351. sys.exit(-1)
  352. results = collect(paths, **args)
  353. else:
  354. results = []
  355. with openio(args['use']) as f:
  356. reader = csv.DictReader(f)
  357. for r in reader:
  358. try:
  359. results.append(StructResult(**{
  360. k: v for k, v in r.items()
  361. if k in StructResult._fields}))
  362. except TypeError:
  363. pass
  364. # fold to remove duplicates
  365. results = fold(results)
  366. # sort because why not
  367. results.sort()
  368. # write results to CSV
  369. if args.get('output'):
  370. with openio(args['output'], 'w') as f:
  371. writer = csv.DictWriter(f, StructResult._fields)
  372. writer.writeheader()
  373. for r in results:
  374. writer.writerow(r._asdict())
  375. # find previous results?
  376. if args.get('diff'):
  377. diff_results = []
  378. try:
  379. with openio(args['diff']) as f:
  380. reader = csv.DictReader(f)
  381. for r in reader:
  382. try:
  383. diff_results.append(StructResult(**{
  384. k: v for k, v in r.items()
  385. if k in StructResult._fields}))
  386. except TypeError:
  387. pass
  388. except FileNotFoundError:
  389. pass
  390. # fold to remove duplicates
  391. diff_results = fold(diff_results)
  392. # print table
  393. if not args.get('quiet'):
  394. table(
  395. results,
  396. diff_results if args.get('diff') else None,
  397. **args)
  398. if __name__ == "__main__":
  399. import argparse
  400. import sys
  401. parser = argparse.ArgumentParser(
  402. description="Find struct sizes.")
  403. parser.add_argument(
  404. 'obj_paths',
  405. nargs='*',
  406. default=OBJ_PATHS,
  407. help="Description of where to find *.o files. May be a directory "
  408. "or a list of paths. Defaults to %(default)r.")
  409. parser.add_argument(
  410. '-v', '--verbose',
  411. action='store_true',
  412. help="Output commands that run behind the scenes.")
  413. parser.add_argument(
  414. '-q', '--quiet',
  415. action='store_true',
  416. help="Don't show anything, useful with -o.")
  417. parser.add_argument(
  418. '-o', '--output',
  419. help="Specify CSV file to store results.")
  420. parser.add_argument(
  421. '-u', '--use',
  422. help="Don't parse anything, use this CSV file.")
  423. parser.add_argument(
  424. '-d', '--diff',
  425. help="Specify CSV file to diff against.")
  426. parser.add_argument(
  427. '-a', '--all',
  428. action='store_true',
  429. help="Show all, not just the ones that changed.")
  430. parser.add_argument(
  431. '-p', '--percent',
  432. action='store_true',
  433. help="Only show percentage change, not a full diff.")
  434. parser.add_argument(
  435. '-b', '--by-file',
  436. action='store_true',
  437. help="Group by file. Note this does not include padding "
  438. "so sizes may differ from other tools.")
  439. parser.add_argument(
  440. '-s', '--size-sort',
  441. action='store_true',
  442. help="Sort by size.")
  443. parser.add_argument(
  444. '-S', '--reverse-size-sort',
  445. action='store_true',
  446. help="Sort by size, but backwards.")
  447. parser.add_argument(
  448. '-Y', '--summary',
  449. action='store_true',
  450. help="Only show the total size.")
  451. parser.add_argument(
  452. '-A', '--everything',
  453. action='store_true',
  454. help="Include builtin and libc specific symbols.")
  455. parser.add_argument(
  456. '--objdump-tool',
  457. type=lambda x: x.split(),
  458. default=OBJDUMP_TOOL,
  459. help="Path to the objdump tool to use.")
  460. parser.add_argument(
  461. '--build-dir',
  462. help="Specify the relative build directory. Used to map object files "
  463. "to the correct source files.")
  464. sys.exit(main(**{k: v
  465. for k, v in vars(parser.parse_args()).items()
  466. if v is not None}))