summary.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. #!/usr/bin/env python3
  2. #
  3. # Script to summarize the outputs of other scripts. Operates on CSV files.
  4. #
  5. import collections as co
  6. import csv
  7. import functools as ft
  8. import math as m
  9. import os
  10. import re
  11. # each result is a type generated by another script
  12. RESULTS = []
  13. FIELDS = 'code,data,stack,structs'
  14. def result(cls):
  15. RESULTS.append(cls)
  16. return cls
  17. @result
  18. class CodeResult(co.namedtuple('CodeResult', 'code_size')):
  19. __slots__ = ()
  20. def __new__(cls, code_size=0):
  21. return super().__new__(cls, int(code_size))
  22. def __add__(self, other):
  23. return self.__class__(self.code_size + other.code_size)
  24. def __sub__(self, other):
  25. old = other.code_size if other is not None else 0
  26. new = self.code_size if self is not None else 0
  27. return (new-old) / old if old else 1.0
  28. def __rsub__(self, other):
  29. return self.__class__.__sub__(other, self)
  30. def key(self):
  31. return -self.code_size
  32. _header = '%7s' % 'code'
  33. _nil = '%7s' % '-'
  34. def __str__(self):
  35. return '%7s' % self.code_size
  36. @result
  37. class DataResult(co.namedtuple('DataResult', 'data_size')):
  38. __slots__ = ()
  39. def __new__(cls, data_size=0):
  40. return super().__new__(cls, int(data_size))
  41. def __add__(self, other):
  42. return self.__class__(self.data_size + other.data_size)
  43. def __sub__(self, other):
  44. old = other.data_size if other is not None else 0
  45. new = self.data_size if self is not None else 0
  46. return (new-old) / old if old else 1.0
  47. def __rsub__(self, other):
  48. return self.__class__.__sub__(other, self)
  49. def key(self):
  50. return -self.data_size
  51. _header = '%7s' % 'data'
  52. _nil = '%7s' % '-'
  53. def __str__(self):
  54. return '%7s' % self.data_size
  55. @result
  56. class StackResult(co.namedtuple('StackResult', 'stack_limit')):
  57. __slots__ = ()
  58. def __new__(cls, stack_limit=0):
  59. return super().__new__(cls, float(stack_limit))
  60. def __add__(self, other):
  61. return self.__class__(max(self.stack_limit, other.stack_limit))
  62. def __sub__(self, other):
  63. old_limit = other.stack_limit if other is not None else 0
  64. new_limit = self.stack_limit if self is not None else 0
  65. return (0.0 if m.isinf(new_limit) and m.isinf(old_limit)
  66. else +float('inf') if m.isinf(new_limit)
  67. else -float('inf') if m.isinf(old_limit)
  68. else 0.0 if not old_limit and not new_limit
  69. else 1.0 if not old_limit
  70. else (new_limit-old_limit) / old_limit)
  71. def __rsub__(self, other):
  72. return self.__class__.__sub__(other, self)
  73. def key(self):
  74. return -self.stack_limit
  75. _header = '%7s' % 'stack'
  76. _nil = '%7s' % '-'
  77. def __str__(self):
  78. return '%7s' % (
  79. '∞' if m.isinf(self.stack_limit)
  80. else int(self.stack_limit))
  81. @result
  82. class StructsResult(co.namedtuple('StructsResult', 'struct_size')):
  83. __slots__ = ()
  84. def __new__(cls, struct_size=0):
  85. return super().__new__(cls, int(struct_size))
  86. def __add__(self, other):
  87. return self.__class__(self.struct_size + other.struct_size)
  88. def __sub__(self, other):
  89. old = other.struct_size if other is not None else 0
  90. new = self.struct_size if self is not None else 0
  91. return (new-old) / old if old else 1.0
  92. def __rsub__(self, other):
  93. return self.__class__.__sub__(other, self)
  94. def key(self):
  95. return -self.struct_size
  96. _header = '%7s' % 'structs'
  97. _nil = '%7s' % '-'
  98. def __str__(self):
  99. return '%7s' % self.struct_size
  100. @result
  101. class CoverageLineResult(co.namedtuple('CoverageResult',
  102. 'coverage_line_hits,coverage_line_count')):
  103. __slots__ = ()
  104. def __new__(cls, coverage_line_hits=0, coverage_line_count=0):
  105. return super().__new__(cls,
  106. int(coverage_line_hits),
  107. int(coverage_line_count))
  108. def __add__(self, other):
  109. return self.__class__(
  110. self.coverage_line_hits + other.coverage_line_hits,
  111. self.coverage_line_count + other.coverage_line_count)
  112. def __sub__(self, other):
  113. old_hits = other.coverage_line_hits if other is not None else 0
  114. old_count = other.coverage_line_count if other is not None else 0
  115. new_hits = self.coverage_line_hits if self is not None else 0
  116. new_count = self.coverage_line_count if self is not None else 0
  117. return ((new_hits/new_count if new_count else 1.0)
  118. - (old_hits/old_count if old_count else 1.0))
  119. def __rsub__(self, other):
  120. return self.__class__.__sub__(other, self)
  121. def key(self):
  122. return -(self.coverage_line_hits/self.coverage_line_count
  123. if self.coverage_line_count else -1)
  124. _header = '%19s' % 'coverage/line'
  125. _nil = '%11s %7s' % ('-', '-')
  126. def __str__(self):
  127. return '%11s %7s' % (
  128. '%d/%d' % (self.coverage_line_hits, self.coverage_line_count)
  129. if self.coverage_line_count else '-',
  130. '%.1f%%' % (100*self.coverage_line_hits/self.coverage_line_count)
  131. if self.coverage_line_count else '-')
  132. @result
  133. class CoverageBranchResult(co.namedtuple('CoverageResult',
  134. 'coverage_branch_hits,coverage_branch_count')):
  135. __slots__ = ()
  136. def __new__(cls, coverage_branch_hits=0, coverage_branch_count=0):
  137. return super().__new__(cls,
  138. int(coverage_branch_hits),
  139. int(coverage_branch_count))
  140. def __add__(self, other):
  141. return self.__class__(
  142. self.coverage_branch_hits + other.coverage_branch_hits,
  143. self.coverage_branch_count + other.coverage_branch_count)
  144. def __sub__(self, other):
  145. old_hits = other.coverage_branch_hits if other is not None else 0
  146. old_count = other.coverage_branch_count if other is not None else 0
  147. new_hits = self.coverage_branch_hits if self is not None else 0
  148. new_count = self.coverage_branch_count if self is not None else 0
  149. return ((new_hits/new_count if new_count else 1.0)
  150. - (old_hits/old_count if old_count else 1.0))
  151. def __rsub__(self, other):
  152. return self.__class__.__sub__(other, self)
  153. def key(self):
  154. return -(self.coverage_branch_hits/self.coverage_branch_count
  155. if self.coverage_branch_count else -1)
  156. _header = '%19s' % 'coverage/branch'
  157. _nil = '%11s %7s' % ('-', '-')
  158. def __str__(self):
  159. return '%11s %7s' % (
  160. '%d/%d' % (self.coverage_branch_hits, self.coverage_branch_count)
  161. if self.coverage_branch_count else '-',
  162. '%.1f%%' % (100*self.coverage_branch_hits/self.coverage_branch_count)
  163. if self.coverage_branch_count else '-')
  164. def openio(path, mode='r'):
  165. if path == '-':
  166. if 'r' in mode:
  167. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  168. else:
  169. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  170. else:
  171. return open(path, mode)
  172. def main(**args):
  173. # find results
  174. results = co.defaultdict(lambda: {})
  175. for path in args.get('csv_paths', '-'):
  176. try:
  177. with openio(path) as f:
  178. r = csv.DictReader(f)
  179. for result in r:
  180. file = result.pop('file', '')
  181. name = result.pop('name', '')
  182. for Result in RESULTS:
  183. if all(result.get(f) not in {None, ''}
  184. for f in Result._fields):
  185. results[(file, name)][Result.__name__] = (
  186. results[(file, name)].get(
  187. Result.__name__, Result())
  188. + Result(*(result[f]
  189. for f in Result._fields)))
  190. except FileNotFoundError:
  191. pass
  192. # find previous results?
  193. if args.get('diff'):
  194. prev_results = co.defaultdict(lambda: {})
  195. for path in args.get('csv_paths', '-'):
  196. try:
  197. with openio(args['diff']) as f:
  198. r = csv.DictReader(f)
  199. for result in r:
  200. file = result.pop('file', '')
  201. name = result.pop('name', '')
  202. for Result in RESULTS:
  203. if all(result.get(f) not in {None, ''}
  204. for f in Result._fields):
  205. prev_results[(file, name)][Result.__name__] = (
  206. prev_results[(file, name)].get(
  207. Result.__name__, Result())
  208. + Result(*(result[f]
  209. for f in Result._fields)))
  210. except FileNotFoundError:
  211. pass
  212. # filter our result types by results that are present
  213. if 'all' in args['fields']:
  214. filtered_results = RESULTS
  215. else:
  216. filtered_results = [
  217. Result for Result in RESULTS
  218. if (any(f.startswith(r)
  219. for r in args['fields']
  220. for f in Result._fields)
  221. or any(Result._header.strip().startswith(r)
  222. for r in args['fields']))]
  223. # figure out a sort key
  224. if args.get('sort'):
  225. key_Result = next(
  226. Result for Result in RESULTS
  227. if (any(f.startswith(args['sort'])
  228. for f in Result._fields)
  229. or Result._header.strip().startswith(args['sort'])))
  230. key = lambda result: result.get(key_Result.__name__, key_Result()).key()
  231. reverse = False
  232. elif args.get('reverse_sort'):
  233. key_Result = next(
  234. Result for Result in RESULTS
  235. if (any(f.startswith(args['reverse_sort'])
  236. for f in Result._fields)
  237. or Result._header.strip().startswith(args['reverse_sort'])))
  238. key = lambda result: result.get(key_Result.__name__, key_Result()).key()
  239. reverse = True
  240. else:
  241. key = lambda _: None
  242. reverse = False
  243. # write merged results to CSV
  244. if args.get('output'):
  245. with openio(args['output'], 'w') as f:
  246. w = csv.DictWriter(f, sum(
  247. (Result._fields for Result in filtered_results),
  248. ('file', 'name')))
  249. w.writeheader()
  250. for (file, name), result in sorted(results.items()):
  251. w.writerow(ft.reduce(dict.__or__,
  252. (r._asdict() for r in result.values()),
  253. {'file': file, 'name': name}))
  254. # print results
  255. def print_header(by):
  256. if by == 'total':
  257. entry = lambda k: 'TOTAL'
  258. elif by == 'file':
  259. entry = lambda k: k[0]
  260. else:
  261. entry = lambda k: k[1]
  262. if not args.get('diff'):
  263. print('%-36s %s' % (by,
  264. ' '.join(Result._header for Result in filtered_results)))
  265. else:
  266. old = {entry(k) for k in results.keys()}
  267. new = {entry(k) for k in prev_results.keys()}
  268. print('%-36s %s' % (
  269. '%s (%d added, %d removed)' % (by,
  270. sum(1 for k in new if k not in old),
  271. sum(1 for k in old if k not in new))
  272. if by else '',
  273. ' '.join('%s%-10s' % (Result._header, '')
  274. for Result in filtered_results)))
  275. def print_entries(by):
  276. if by == 'total':
  277. entry = lambda k: 'TOTAL'
  278. elif by == 'file':
  279. entry = lambda k: k[0]
  280. else:
  281. entry = lambda k: k[1]
  282. entries = co.defaultdict(lambda: {})
  283. for k, result in results.items():
  284. entries[entry(k)] |= {
  285. r.__class__.__name__: entries[entry(k)].get(
  286. r.__class__.__name__, r.__class__()) + r
  287. for r in result.values()}
  288. if not args.get('diff'):
  289. for name, result in sorted(entries.items(),
  290. key=lambda p: (key(p[1]), p),
  291. reverse=reverse):
  292. print('%-36s %s' % (name, ' '.join(
  293. str(result.get(Result.__name__, Result._nil))
  294. for Result in filtered_results)))
  295. else:
  296. prev_entries = co.defaultdict(lambda: {})
  297. for k, result in prev_results.items():
  298. prev_entries[entry(k)] |= {
  299. r.__class__.__name__: prev_entries[entry(k)].get(
  300. r.__class__.__name__, r.__class__()) + r
  301. for r in result.values()}
  302. diff_entries = {
  303. name: (prev_entries.get(name), entries.get(name))
  304. for name in (entries.keys() | prev_entries.keys())}
  305. for name, (old, new) in sorted(diff_entries.items(),
  306. key=lambda p: (key(p[1][1]), p)):
  307. fields = []
  308. changed = False
  309. for Result in filtered_results:
  310. o = old.get(Result.__name__) if old is not None else None
  311. n = new.get(Result.__name__) if new is not None else None
  312. ratio = n - o if n is not None or o is not None else 0
  313. changed = changed or ratio
  314. fields.append('%s%-10s' % (
  315. n if n is not None else Result._nil,
  316. '' if not ratio
  317. else ' (+∞%)' if ratio > 0 and m.isinf(ratio)
  318. else ' (-∞%)' if ratio < 0 and m.isinf(ratio)
  319. else ' (%+.1f%%)' % (100*ratio)))
  320. if changed or args.get('all'):
  321. print('%-36s %s' % (name, ' '.join(fields)))
  322. if args.get('quiet'):
  323. pass
  324. elif args.get('summary'):
  325. print_header('')
  326. print_entries('total')
  327. elif args.get('files'):
  328. print_header('file')
  329. print_entries('file')
  330. print_entries('total')
  331. else:
  332. print_header('name')
  333. print_entries('name')
  334. print_entries('total')
  335. if __name__ == "__main__":
  336. import argparse
  337. import sys
  338. parser = argparse.ArgumentParser(
  339. description="Summarize measurements")
  340. parser.add_argument('csv_paths', nargs='*', default='-',
  341. help="Description of where to find *.csv files. May be a directory \
  342. or list of paths.")
  343. parser.add_argument('-q', '--quiet', action='store_true',
  344. help="Don't show anything, useful with -o.")
  345. parser.add_argument('-o', '--output',
  346. help="Specify CSV file to store results.")
  347. parser.add_argument('-d', '--diff',
  348. help="Specify CSV file to diff against.")
  349. parser.add_argument('-a', '--all', action='store_true',
  350. help="Show all objects, not just the ones that changed.")
  351. parser.add_argument('-f', '--fields',
  352. type=lambda x: set(re.split('\s*,\s*', x)),
  353. default=FIELDS,
  354. help="Comma separated list of fields to print, by default all fields \
  355. that are found in the CSV files are printed. \"all\" prints all \
  356. fields this script knows. Defaults to %r." % FIELDS)
  357. parser.add_argument('-s', '--sort',
  358. help="Sort by this field.")
  359. parser.add_argument('-S', '--reverse-sort',
  360. help="Sort by this field, but backwards.")
  361. parser.add_argument('-F', '--files', action='store_true',
  362. help="Show file-level calls.")
  363. parser.add_argument('-Y', '--summary', action='store_true',
  364. help="Only show the totals.")
  365. sys.exit(main(**{k: v
  366. for k, v in vars(parser.parse_args()).items()
  367. if v is not None}))