structs.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. import os
  6. import glob
  7. import itertools as it
  8. import subprocess as sp
  9. import shlex
  10. import re
  11. import csv
  12. import collections as co
  13. OBJ_PATHS = ['*.o']
  14. def openio(path, mode='r'):
  15. if path == '-':
  16. if 'r' in mode:
  17. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  18. else:
  19. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  20. else:
  21. return open(path, mode)
  22. class StructsResult(co.namedtuple('StructsResult', 'struct_size')):
  23. __slots__ = ()
  24. def __new__(cls, struct_size=0):
  25. return super().__new__(cls, int(struct_size))
  26. def __add__(self, other):
  27. return self.__class__(self.struct_size + other.struct_size)
  28. def __sub__(self, other):
  29. return StructsDiff(other, self)
  30. def __rsub__(self, other):
  31. return self.__class__.__sub__(other, self)
  32. def key(self, **args):
  33. if args.get('size_sort'):
  34. return -self.struct_size
  35. elif args.get('reverse_size_sort'):
  36. return +self.struct_size
  37. else:
  38. return None
  39. _header = '%7s' % 'size'
  40. def __str__(self):
  41. return '%7d' % self.struct_size
  42. class StructsDiff(co.namedtuple('StructsDiff', 'old,new')):
  43. __slots__ = ()
  44. def ratio(self):
  45. old = self.old.struct_size if self.old is not None else 0
  46. new = self.new.struct_size if self.new is not None else 0
  47. return (new-old) / old if old else 1.0
  48. def key(self, **args):
  49. return (
  50. self.new.key(**args) if self.new is not None else 0,
  51. -self.ratio())
  52. def __bool__(self):
  53. return bool(self.ratio())
  54. _header = '%7s %7s %7s' % ('old', 'new', 'diff')
  55. def __str__(self):
  56. old = self.old.struct_size if self.old is not None else 0
  57. new = self.new.struct_size if self.new is not None else 0
  58. diff = new - old
  59. ratio = self.ratio()
  60. return '%7s %7s %+7d%s' % (
  61. old or "-",
  62. new or "-",
  63. diff,
  64. ' (%+.1f%%)' % (100*ratio) if ratio else '')
  65. def collect(paths, **args):
  66. decl_pattern = re.compile(
  67. '^\s+(?P<no>[0-9]+)'
  68. '\s+(?P<dir>[0-9]+)'
  69. '\s+.*'
  70. '\s+(?P<file>[^\s]+)$')
  71. struct_pattern = re.compile(
  72. '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
  73. '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  74. '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
  75. '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  76. results = {}
  77. for path in paths:
  78. # find decl, we want to filter by structs in .h files
  79. decls = {}
  80. # note objdump-tool may contain extra args
  81. cmd = args['objdump_tool'] + ['--dwarf=rawline', path]
  82. if args.get('verbose'):
  83. print(' '.join(shlex.quote(c) for c in cmd))
  84. proc = sp.Popen(cmd,
  85. stdout=sp.PIPE,
  86. stderr=sp.PIPE if not args.get('verbose') else None,
  87. universal_newlines=True,
  88. errors='replace')
  89. for line in proc.stdout:
  90. # find file numbers
  91. m = decl_pattern.match(line)
  92. if m:
  93. decls[int(m.group('no'))] = m.group('file')
  94. proc.wait()
  95. if proc.returncode != 0:
  96. if not args.get('verbose'):
  97. for line in proc.stderr:
  98. sys.stdout.write(line)
  99. sys.exit(-1)
  100. # collect structs as we parse dwarf info
  101. found = False
  102. name = None
  103. decl = None
  104. size = None
  105. # note objdump-tool may contain extra args
  106. cmd = args['objdump_tool'] + ['--dwarf=info', path]
  107. if args.get('verbose'):
  108. print(' '.join(shlex.quote(c) for c in cmd))
  109. proc = sp.Popen(cmd,
  110. stdout=sp.PIPE,
  111. stderr=sp.PIPE if not args.get('verbose') else None,
  112. universal_newlines=True,
  113. errors='replace')
  114. for line in proc.stdout:
  115. # state machine here to find structs
  116. m = struct_pattern.match(line)
  117. if m:
  118. if m.group('tag'):
  119. if (name is not None
  120. and decl is not None
  121. and size is not None):
  122. file = decls.get(decl, '?')
  123. # map to source file
  124. file = re.sub('\.o$', '.c', file)
  125. if args.get('build_dir'):
  126. file = re.sub(
  127. '%s/*' % re.escape(args['build_dir']), '',
  128. file)
  129. # only include structs declared in header files in the
  130. # current directory, ignore internal-only structs (
  131. # these are represented in other measurements)
  132. if args.get('everything') or file.endswith('.h'):
  133. results[(file, name)] = StructsResult(size)
  134. found = (m.group('tag') == 'structure_type')
  135. name = None
  136. decl = None
  137. size = None
  138. elif found and m.group('name'):
  139. name = m.group('name')
  140. elif found and name and m.group('decl'):
  141. decl = int(m.group('decl'))
  142. elif found and name and m.group('size'):
  143. size = int(m.group('size'))
  144. proc.wait()
  145. if proc.returncode != 0:
  146. if not args.get('verbose'):
  147. for line in proc.stderr:
  148. sys.stdout.write(line)
  149. sys.exit(-1)
  150. return results
  151. def main(**args):
  152. # find sizes
  153. if not args.get('use', None):
  154. # find .o files
  155. paths = []
  156. for path in args['obj_paths']:
  157. if os.path.isdir(path):
  158. path = path + '/*.o'
  159. for path in glob.glob(path):
  160. paths.append(path)
  161. if not paths:
  162. print('no .obj files found in %r?' % args['obj_paths'])
  163. sys.exit(-1)
  164. results = collect(paths, **args)
  165. else:
  166. with openio(args['use']) as f:
  167. r = csv.DictReader(f)
  168. results = {
  169. (result['file'], result['name']): StructsResult(
  170. *(result[f] for f in StructsResult._fields))
  171. for result in r
  172. if all(result.get(f) not in {None, ''}
  173. for f in StructsResult._fields)}
  174. # find previous results?
  175. if args.get('diff'):
  176. try:
  177. with openio(args['diff']) as f:
  178. r = csv.DictReader(f)
  179. prev_results = {
  180. (result['file'], result['name']): StructsResult(
  181. *(result[f] for f in StructsResult._fields))
  182. for result in r
  183. if all(result.get(f) not in {None, ''}
  184. for f in StructsResult._fields)}
  185. except FileNotFoundError:
  186. prev_results = []
  187. # write results to CSV
  188. if args.get('output'):
  189. merged_results = co.defaultdict(lambda: {})
  190. other_fields = []
  191. # merge?
  192. if args.get('merge'):
  193. try:
  194. with openio(args['merge']) as f:
  195. r = csv.DictReader(f)
  196. for result in r:
  197. file = result.pop('file', '')
  198. func = result.pop('name', '')
  199. for f in StructsResult._fields:
  200. result.pop(f, None)
  201. merged_results[(file, func)] = result
  202. other_fields = result.keys()
  203. except FileNotFoundError:
  204. pass
  205. for (file, func), result in results.items():
  206. merged_results[(file, func)] |= result._asdict()
  207. with openio(args['output'], 'w') as f:
  208. w = csv.DictWriter(f, ['file', 'name',
  209. *other_fields, *StructsResult._fields])
  210. w.writeheader()
  211. for (file, func), result in sorted(merged_results.items()):
  212. w.writerow({'file': file, 'name': func, **result})
  213. # print results
  214. def print_header(by):
  215. if by == 'total':
  216. entry = lambda k: 'TOTAL'
  217. elif by == 'file':
  218. entry = lambda k: k[0]
  219. else:
  220. entry = lambda k: k[1]
  221. if not args.get('diff'):
  222. print('%-36s %s' % (by, StructsResult._header))
  223. else:
  224. old = {entry(k) for k in results.keys()}
  225. new = {entry(k) for k in prev_results.keys()}
  226. print('%-36s %s' % (
  227. '%s (%d added, %d removed)' % (by,
  228. sum(1 for k in new if k not in old),
  229. sum(1 for k in old if k not in new))
  230. if by else '',
  231. StructsDiff._header))
  232. def print_entries(by):
  233. if by == 'total':
  234. entry = lambda k: 'TOTAL'
  235. elif by == 'file':
  236. entry = lambda k: k[0]
  237. else:
  238. entry = lambda k: k[1]
  239. entries = co.defaultdict(lambda: StructsResult())
  240. for k, result in results.items():
  241. entries[entry(k)] += result
  242. if not args.get('diff'):
  243. for name, result in sorted(entries.items(),
  244. key=lambda p: (p[1].key(**args), p)):
  245. print('%-36s %s' % (name, result))
  246. else:
  247. prev_entries = co.defaultdict(lambda: StructsResult())
  248. for k, result in prev_results.items():
  249. prev_entries[entry(k)] += result
  250. diff_entries = {name: entries.get(name) - prev_entries.get(name)
  251. for name in (entries.keys() | prev_entries.keys())}
  252. for name, diff in sorted(diff_entries.items(),
  253. key=lambda p: (p[1].key(**args), p)):
  254. if diff or args.get('all'):
  255. print('%-36s %s' % (name, diff))
  256. if args.get('quiet'):
  257. pass
  258. elif args.get('summary'):
  259. print_header('')
  260. print_entries('total')
  261. elif args.get('files'):
  262. print_header('file')
  263. print_entries('file')
  264. print_entries('total')
  265. else:
  266. print_header('struct')
  267. print_entries('struct')
  268. print_entries('total')
  269. if __name__ == "__main__":
  270. import argparse
  271. import sys
  272. parser = argparse.ArgumentParser(
  273. description="Find struct sizes.")
  274. parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
  275. help="Description of where to find *.o files. May be a directory \
  276. or a list of paths. Defaults to %r." % OBJ_PATHS)
  277. parser.add_argument('-v', '--verbose', action='store_true',
  278. help="Output commands that run behind the scenes.")
  279. parser.add_argument('-q', '--quiet', action='store_true',
  280. help="Don't show anything, useful with -o.")
  281. parser.add_argument('-o', '--output',
  282. help="Specify CSV file to store results.")
  283. parser.add_argument('-u', '--use',
  284. help="Don't compile and find struct sizes, instead use this CSV file.")
  285. parser.add_argument('-d', '--diff',
  286. help="Specify CSV file to diff struct size against.")
  287. parser.add_argument('-m', '--merge',
  288. help="Merge with an existing CSV file when writing to output.")
  289. parser.add_argument('-a', '--all', action='store_true',
  290. help="Show all structs, not just the ones that changed.")
  291. parser.add_argument('-A', '--everything', action='store_true',
  292. help="Include builtin and libc specific symbols.")
  293. parser.add_argument('-s', '--size-sort', action='store_true',
  294. help="Sort by size.")
  295. parser.add_argument('-S', '--reverse-size-sort', action='store_true',
  296. help="Sort by size, but backwards.")
  297. parser.add_argument('-F', '--files', action='store_true',
  298. help="Show file-level struct sizes.")
  299. parser.add_argument('-Y', '--summary', action='store_true',
  300. help="Only show the total struct size.")
  301. parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(),
  302. help="Path to the objdump tool to use.")
  303. parser.add_argument('--build-dir',
  304. help="Specify the relative build directory. Used to map object files \
  305. to the correct source files.")
  306. sys.exit(main(**{k: v
  307. for k, v in vars(parser.parse_args()).items()
  308. if v is not None}))