data.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find data size at the function level. Basically just a bit wrapper
  4. # around nm with some extra conveniences for comparing builds. Heavily inspired
  5. # by Linux's Bloat-O-Meter.
  6. #
  7. import os
  8. import glob
  9. import itertools as it
  10. import subprocess as sp
  11. import shlex
  12. import re
  13. import csv
  14. import collections as co
  15. OBJ_PATHS = ['*.o']
  16. class DataResult(co.namedtuple('DataResult', 'data_size')):
  17. __slots__ = ()
  18. def __new__(cls, data_size=0):
  19. return super().__new__(cls, int(data_size))
  20. def __add__(self, other):
  21. return self.__class__(self.data_size + other.data_size)
  22. def __sub__(self, other):
  23. return DataDiff(other, self)
  24. def __rsub__(self, other):
  25. return self.__class__.__sub__(other, self)
  26. def key(self, **args):
  27. if args.get('size_sort'):
  28. return -self.data_size
  29. elif args.get('reverse_size_sort'):
  30. return +self.data_size
  31. else:
  32. return None
  33. _header = '%7s' % 'size'
  34. def __str__(self):
  35. return '%7d' % self.data_size
  36. class DataDiff(co.namedtuple('DataDiff', 'old,new')):
  37. __slots__ = ()
  38. def ratio(self):
  39. old = self.old.data_size if self.old is not None else 0
  40. new = self.new.data_size if self.new is not None else 0
  41. return (new-old) / old if old else 1.0
  42. def key(self, **args):
  43. return (
  44. self.new.key(**args) if self.new is not None else 0,
  45. -self.ratio())
  46. def __bool__(self):
  47. return bool(self.ratio())
  48. _header = '%7s %7s %7s' % ('old', 'new', 'diff')
  49. def __str__(self):
  50. old = self.old.data_size if self.old is not None else 0
  51. new = self.new.data_size if self.new is not None else 0
  52. diff = new - old
  53. ratio = self.ratio()
  54. return '%7s %7s %+7d%s' % (
  55. old or "-",
  56. new or "-",
  57. diff,
  58. ' (%+.1f%%)' % (100*ratio) if ratio else '')
  59. def openio(path, mode='r'):
  60. if path == '-':
  61. if 'r' in mode:
  62. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  63. else:
  64. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  65. else:
  66. return open(path, mode)
  67. def collect(paths, **args):
  68. results = co.defaultdict(lambda: DataResult())
  69. pattern = re.compile(
  70. '^(?P<size>[0-9a-fA-F]+)' +
  71. ' (?P<type>[%s])' % re.escape(args['type']) +
  72. ' (?P<func>.+?)$')
  73. for path in paths:
  74. # map to source file
  75. src_path = re.sub('\.o$', '.c', path)
  76. if args.get('build_dir'):
  77. src_path = re.sub('%s/*' % re.escape(args['build_dir']), '',
  78. src_path)
  79. # note nm-tool may contain extra args
  80. cmd = args['nm_tool'] + ['--size-sort', path]
  81. if args.get('verbose'):
  82. print(' '.join(shlex.quote(c) for c in cmd))
  83. proc = sp.Popen(cmd,
  84. stdout=sp.PIPE,
  85. stderr=sp.PIPE if not args.get('verbose') else None,
  86. universal_newlines=True,
  87. errors='replace')
  88. for line in proc.stdout:
  89. m = pattern.match(line)
  90. if m:
  91. func = m.group('func')
  92. # discard internal functions
  93. if not args.get('everything') and func.startswith('__'):
  94. continue
  95. # discard .8449 suffixes created by optimizer
  96. func = re.sub('\.[0-9]+', '', func)
  97. results[(src_path, func)] += DataResult(
  98. int(m.group('size'), 16))
  99. proc.wait()
  100. if proc.returncode != 0:
  101. if not args.get('verbose'):
  102. for line in proc.stderr:
  103. sys.stdout.write(line)
  104. sys.exit(-1)
  105. return results
  106. def main(**args):
  107. # find sizes
  108. if not args.get('use', None):
  109. # find .o files
  110. paths = []
  111. for path in args['obj_paths']:
  112. if os.path.isdir(path):
  113. path = path + '/*.o'
  114. for path in glob.glob(path):
  115. paths.append(path)
  116. if not paths:
  117. print('no .obj files found in %r?' % args['obj_paths'])
  118. sys.exit(-1)
  119. results = collect(paths, **args)
  120. else:
  121. with openio(args['use']) as f:
  122. r = csv.DictReader(f)
  123. results = {
  124. (result['file'], result['name']): DataResult(
  125. *(result[f] for f in DataResult._fields))
  126. for result in r
  127. if all(result.get(f) not in {None, ''}
  128. for f in DataResult._fields)}
  129. # find previous results?
  130. if args.get('diff'):
  131. try:
  132. with openio(args['diff']) as f:
  133. r = csv.DictReader(f)
  134. prev_results = {
  135. (result['file'], result['name']): DataResult(
  136. *(result[f] for f in DataResult._fields))
  137. for result in r
  138. if all(result.get(f) not in {None, ''}
  139. for f in DataResult._fields)}
  140. except FileNotFoundError:
  141. prev_results = []
  142. # write results to CSV
  143. if args.get('output'):
  144. merged_results = co.defaultdict(lambda: {})
  145. other_fields = []
  146. # merge?
  147. if args.get('merge'):
  148. try:
  149. with openio(args['merge']) as f:
  150. r = csv.DictReader(f)
  151. for result in r:
  152. file = result.pop('file', '')
  153. func = result.pop('name', '')
  154. for f in DataResult._fields:
  155. result.pop(f, None)
  156. merged_results[(file, func)] = result
  157. other_fields = result.keys()
  158. except FileNotFoundError:
  159. pass
  160. for (file, func), result in results.items():
  161. merged_results[(file, func)] |= result._asdict()
  162. with openio(args['output'], 'w') as f:
  163. w = csv.DictWriter(f, ['file', 'name',
  164. *other_fields, *DataResult._fields])
  165. w.writeheader()
  166. for (file, func), result in sorted(merged_results.items()):
  167. w.writerow({'file': file, 'name': func, **result})
  168. # print results
  169. def print_header(by):
  170. if by == 'total':
  171. entry = lambda k: 'TOTAL'
  172. elif by == 'file':
  173. entry = lambda k: k[0]
  174. else:
  175. entry = lambda k: k[1]
  176. if not args.get('diff'):
  177. print('%-36s %s' % (by, DataResult._header))
  178. else:
  179. old = {entry(k) for k in results.keys()}
  180. new = {entry(k) for k in prev_results.keys()}
  181. print('%-36s %s' % (
  182. '%s (%d added, %d removed)' % (by,
  183. sum(1 for k in new if k not in old),
  184. sum(1 for k in old if k not in new))
  185. if by else '',
  186. DataDiff._header))
  187. def print_entries(by):
  188. if by == 'total':
  189. entry = lambda k: 'TOTAL'
  190. elif by == 'file':
  191. entry = lambda k: k[0]
  192. else:
  193. entry = lambda k: k[1]
  194. entries = co.defaultdict(lambda: DataResult())
  195. for k, result in results.items():
  196. entries[entry(k)] += result
  197. if not args.get('diff'):
  198. for name, result in sorted(entries.items(),
  199. key=lambda p: (p[1].key(**args), p)):
  200. print('%-36s %s' % (name, result))
  201. else:
  202. prev_entries = co.defaultdict(lambda: DataResult())
  203. for k, result in prev_results.items():
  204. prev_entries[entry(k)] += result
  205. diff_entries = {name: entries.get(name) - prev_entries.get(name)
  206. for name in (entries.keys() | prev_entries.keys())}
  207. for name, diff in sorted(diff_entries.items(),
  208. key=lambda p: (p[1].key(**args), p)):
  209. if diff or args.get('all'):
  210. print('%-36s %s' % (name, diff))
  211. if args.get('quiet'):
  212. pass
  213. elif args.get('summary'):
  214. print_header('')
  215. print_entries('total')
  216. elif args.get('files'):
  217. print_header('file')
  218. print_entries('file')
  219. print_entries('total')
  220. else:
  221. print_header('function')
  222. print_entries('function')
  223. print_entries('total')
  224. if __name__ == "__main__":
  225. import argparse
  226. import sys
  227. parser = argparse.ArgumentParser(
  228. description="Find data size at the function level.")
  229. parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
  230. help="Description of where to find *.o files. May be a directory \
  231. or a list of paths. Defaults to %r." % OBJ_PATHS)
  232. parser.add_argument('-v', '--verbose', action='store_true',
  233. help="Output commands that run behind the scenes.")
  234. parser.add_argument('-q', '--quiet', action='store_true',
  235. help="Don't show anything, useful with -o.")
  236. parser.add_argument('-o', '--output',
  237. help="Specify CSV file to store results.")
  238. parser.add_argument('-u', '--use',
  239. help="Don't compile and find data sizes, instead use this CSV file.")
  240. parser.add_argument('-d', '--diff',
  241. help="Specify CSV file to diff data size against.")
  242. parser.add_argument('-m', '--merge',
  243. help="Merge with an existing CSV file when writing to output.")
  244. parser.add_argument('-a', '--all', action='store_true',
  245. help="Show all functions, not just the ones that changed.")
  246. parser.add_argument('-A', '--everything', action='store_true',
  247. help="Include builtin and libc specific symbols.")
  248. parser.add_argument('-s', '--size-sort', action='store_true',
  249. help="Sort by size.")
  250. parser.add_argument('-S', '--reverse-size-sort', action='store_true',
  251. help="Sort by size, but backwards.")
  252. parser.add_argument('-F', '--files', action='store_true',
  253. help="Show file-level data sizes. Note this does not include padding! "
  254. "So sizes may differ from other tools.")
  255. parser.add_argument('-Y', '--summary', action='store_true',
  256. help="Only show the total data size.")
  257. parser.add_argument('--type', default='dDbB',
  258. help="Type of symbols to report, this uses the same single-character "
  259. "type-names emitted by nm. Defaults to %(default)r.")
  260. parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
  261. help="Path to the nm tool to use.")
  262. parser.add_argument('--build-dir',
  263. help="Specify the relative build directory. Used to map object files \
  264. to the correct source files.")
  265. sys.exit(main(**{k: v
  266. for k, v in vars(parser.parse_args()).items()
  267. if v is not None}))