structs.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find struct sizes.
  4. #
  5. import os
  6. import glob
  7. import itertools as it
  8. import subprocess as sp
  9. import shlex
  10. import re
  11. import csv
  12. import collections as co
  13. OBJ_PATHS = ['*.o']
  14. def collect(paths, **args):
  15. decl_pattern = re.compile(
  16. '^\s+(?P<no>[0-9]+)'
  17. '\s+(?P<dir>[0-9]+)'
  18. '\s+.*'
  19. '\s+(?P<file>[^\s]+)$')
  20. struct_pattern = re.compile(
  21. '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
  22. '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
  23. '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
  24. '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
  25. results = co.defaultdict(lambda: 0)
  26. for path in paths:
  27. # find decl, we want to filter by structs in .h files
  28. decls = {}
  29. # note objdump-tool may contain extra args
  30. cmd = args['objdump_tool'] + ['--dwarf=rawline', path]
  31. if args.get('verbose'):
  32. print(' '.join(shlex.quote(c) for c in cmd))
  33. proc = sp.Popen(cmd,
  34. stdout=sp.PIPE,
  35. stderr=sp.PIPE if not args.get('verbose') else None,
  36. universal_newlines=True,
  37. errors='replace')
  38. for line in proc.stdout:
  39. # find file numbers
  40. m = decl_pattern.match(line)
  41. if m:
  42. decls[int(m.group('no'))] = (
  43. m.group('file'),
  44. int(m.group('dir')))
  45. proc.wait()
  46. if proc.returncode != 0:
  47. if not args.get('verbose'):
  48. for line in proc.stderr:
  49. sys.stdout.write(line)
  50. sys.exit(-1)
  51. # collect structs as we parse dwarf info
  52. found = False
  53. name = None
  54. decl = None
  55. size = None
  56. # note objdump-tool may contain extra args
  57. cmd = args['objdump_tool'] + ['--dwarf=info', path]
  58. if args.get('verbose'):
  59. print(' '.join(shlex.quote(c) for c in cmd))
  60. proc = sp.Popen(cmd,
  61. stdout=sp.PIPE,
  62. stderr=sp.PIPE if not args.get('verbose') else None,
  63. universal_newlines=True,
  64. errors='replace')
  65. for line in proc.stdout:
  66. # state machine here to find structs
  67. m = struct_pattern.match(line)
  68. if m:
  69. if m.group('tag'):
  70. if (name is not None
  71. and decl is not None
  72. and size is not None):
  73. decl_file, decl_dir = decls.get(decl, ('', 0))
  74. results[(path, name)] = (size, decl_file, decl_dir)
  75. found = (m.group('tag') == 'structure_type')
  76. name = None
  77. decl = None
  78. size = None
  79. elif found and m.group('name'):
  80. name = m.group('name')
  81. elif found and name and m.group('decl'):
  82. decl = int(m.group('decl'))
  83. elif found and name and m.group('size'):
  84. size = int(m.group('size'))
  85. proc.wait()
  86. if proc.returncode != 0:
  87. if not args.get('verbose'):
  88. for line in proc.stderr:
  89. sys.stdout.write(line)
  90. sys.exit(-1)
  91. flat_results = []
  92. for (path, struct), (size, decl_file, decl_dir) in results.items():
  93. # map to source files
  94. if args.get('build_dir'):
  95. path = re.sub('%s/*' % re.escape(args['build_dir']), '', path)
  96. # only include structs declared in header files in the current
  97. # directory, ignore internal-only # structs (these are represented
  98. # in other measurements)
  99. if not args.get('everything'):
  100. if not (decl_file.endswith('.h') and decl_dir == 0):
  101. continue
  102. # replace .o with .c, different scripts report .o/.c, we need to
  103. # choose one if we want to deduplicate csv files
  104. path = re.sub('\.o$', '.c', path)
  105. flat_results.append((path, struct, size))
  106. return flat_results
  107. def main(**args):
  108. def openio(path, mode='r'):
  109. if path == '-':
  110. if 'r' in mode:
  111. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  112. else:
  113. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  114. else:
  115. return open(path, mode)
  116. # find sizes
  117. if not args.get('use', None):
  118. # find .o files
  119. paths = []
  120. for path in args['obj_paths']:
  121. if os.path.isdir(path):
  122. path = path + '/*.o'
  123. for path in glob.glob(path):
  124. paths.append(path)
  125. if not paths:
  126. print('no .obj files found in %r?' % args['obj_paths'])
  127. sys.exit(-1)
  128. results = collect(paths, **args)
  129. else:
  130. with openio(args['use']) as f:
  131. r = csv.DictReader(f)
  132. results = [
  133. ( result['file'],
  134. result['name'],
  135. int(result['struct_size']))
  136. for result in r
  137. if result.get('struct_size') not in {None, ''}]
  138. total = 0
  139. for _, _, size in results:
  140. total += size
  141. # find previous results?
  142. if args.get('diff'):
  143. try:
  144. with openio(args['diff']) as f:
  145. r = csv.DictReader(f)
  146. prev_results = [
  147. ( result['file'],
  148. result['name'],
  149. int(result['struct_size']))
  150. for result in r
  151. if result.get('struct_size') not in {None, ''}]
  152. except FileNotFoundError:
  153. prev_results = []
  154. prev_total = 0
  155. for _, _, size in prev_results:
  156. prev_total += size
  157. # write results to CSV
  158. if args.get('output'):
  159. merged_results = co.defaultdict(lambda: {})
  160. other_fields = []
  161. # merge?
  162. if args.get('merge'):
  163. try:
  164. with openio(args['merge']) as f:
  165. r = csv.DictReader(f)
  166. for result in r:
  167. file = result.pop('file', '')
  168. struct = result.pop('name', '')
  169. result.pop('struct_size', None)
  170. merged_results[(file, struct)] = result
  171. other_fields = result.keys()
  172. except FileNotFoundError:
  173. pass
  174. for file, struct, size in results:
  175. merged_results[(file, struct)]['struct_size'] = size
  176. with openio(args['output'], 'w') as f:
  177. w = csv.DictWriter(f, ['file', 'name', *other_fields, 'struct_size'])
  178. w.writeheader()
  179. for (file, struct), result in sorted(merged_results.items()):
  180. w.writerow({'file': file, 'name': struct, **result})
  181. # print results
  182. def dedup_entries(results, by='name'):
  183. entries = co.defaultdict(lambda: 0)
  184. for file, struct, size in results:
  185. entry = (file if by == 'file' else struct)
  186. entries[entry] += size
  187. return entries
  188. def diff_entries(olds, news):
  189. diff = co.defaultdict(lambda: (0, 0, 0, 0))
  190. for name, new in news.items():
  191. diff[name] = (0, new, new, 1.0)
  192. for name, old in olds.items():
  193. _, new, _, _ = diff[name]
  194. diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
  195. return diff
  196. def sorted_entries(entries):
  197. if args.get('size_sort'):
  198. return sorted(entries, key=lambda x: (-x[1], x))
  199. elif args.get('reverse_size_sort'):
  200. return sorted(entries, key=lambda x: (+x[1], x))
  201. else:
  202. return sorted(entries)
  203. def sorted_diff_entries(entries):
  204. if args.get('size_sort'):
  205. return sorted(entries, key=lambda x: (-x[1][1], x))
  206. elif args.get('reverse_size_sort'):
  207. return sorted(entries, key=lambda x: (+x[1][1], x))
  208. else:
  209. return sorted(entries, key=lambda x: (-x[1][3], x))
  210. def print_header(by=''):
  211. if not args.get('diff'):
  212. print('%-36s %7s' % (by, 'size'))
  213. else:
  214. print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))
  215. def print_entry(name, size):
  216. print("%-36s %7d" % (name, size))
  217. def print_diff_entry(name, old, new, diff, ratio):
  218. print("%-36s %7s %7s %+7d%s" % (name,
  219. old or "-",
  220. new or "-",
  221. diff,
  222. ' (%+.1f%%)' % (100*ratio) if ratio else ''))
  223. def print_entries(by='name'):
  224. entries = dedup_entries(results, by=by)
  225. if not args.get('diff'):
  226. print_header(by=by)
  227. for name, size in sorted_entries(entries.items()):
  228. print_entry(name, size)
  229. else:
  230. prev_entries = dedup_entries(prev_results, by=by)
  231. diff = diff_entries(prev_entries, entries)
  232. print_header(by='%s (%d added, %d removed)' % (by,
  233. sum(1 for old, _, _, _ in diff.values() if not old),
  234. sum(1 for _, new, _, _ in diff.values() if not new)))
  235. for name, (old, new, diff, ratio) in sorted_diff_entries(
  236. diff.items()):
  237. if ratio or args.get('all'):
  238. print_diff_entry(name, old, new, diff, ratio)
  239. def print_totals():
  240. if not args.get('diff'):
  241. print_entry('TOTAL', total)
  242. else:
  243. ratio = (0.0 if not prev_total and not total
  244. else 1.0 if not prev_total
  245. else (total-prev_total)/prev_total)
  246. print_diff_entry('TOTAL',
  247. prev_total, total,
  248. total-prev_total,
  249. ratio)
  250. if args.get('quiet'):
  251. pass
  252. elif args.get('summary'):
  253. print_header()
  254. print_totals()
  255. elif args.get('files'):
  256. print_entries(by='file')
  257. print_totals()
  258. else:
  259. print_entries(by='name')
  260. print_totals()
  261. if __name__ == "__main__":
  262. import argparse
  263. import sys
  264. parser = argparse.ArgumentParser(
  265. description="Find struct sizes.")
  266. parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
  267. help="Description of where to find *.o files. May be a directory \
  268. or a list of paths. Defaults to %r." % OBJ_PATHS)
  269. parser.add_argument('-v', '--verbose', action='store_true',
  270. help="Output commands that run behind the scenes.")
  271. parser.add_argument('-q', '--quiet', action='store_true',
  272. help="Don't show anything, useful with -o.")
  273. parser.add_argument('-o', '--output',
  274. help="Specify CSV file to store results.")
  275. parser.add_argument('-u', '--use',
  276. help="Don't compile and find struct sizes, instead use this CSV file.")
  277. parser.add_argument('-d', '--diff',
  278. help="Specify CSV file to diff struct size against.")
  279. parser.add_argument('-m', '--merge',
  280. help="Merge with an existing CSV file when writing to output.")
  281. parser.add_argument('-a', '--all', action='store_true',
  282. help="Show all functions, not just the ones that changed.")
  283. parser.add_argument('-A', '--everything', action='store_true',
  284. help="Include builtin and libc specific symbols.")
  285. parser.add_argument('-s', '--size-sort', action='store_true',
  286. help="Sort by size.")
  287. parser.add_argument('-S', '--reverse-size-sort', action='store_true',
  288. help="Sort by size, but backwards.")
  289. parser.add_argument('-F', '--files', action='store_true',
  290. help="Show file-level struct sizes.")
  291. parser.add_argument('-Y', '--summary', action='store_true',
  292. help="Only show the total struct size.")
  293. parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(),
  294. help="Path to the objdump tool to use.")
  295. parser.add_argument('--build-dir',
  296. help="Specify the relative build directory. Used to map object files \
  297. to the correct source files.")
  298. sys.exit(main(**vars(parser.parse_args())))