summary.py 22 KB


  1. #!/usr/bin/env python3
  2. #
  3. # Script to summarize the outputs of other scripts. Operates on CSV files.
  4. #
  5. # Example:
  6. # ./scripts/code.py lfs.o lfs_util.o -q -o lfs.code.csv
  7. # ./scripts/data.py lfs.o lfs_util.o -q -o lfs.data.csv
  8. # ./scripts/summary.py lfs.code.csv lfs.data.csv -q -o lfs.csv
  9. # ./scripts/summary.py -Y lfs.csv -f code=code_size,data=data_size
  10. #
  11. # Copyright (c) 2022, The littlefs authors.
  12. # SPDX-License-Identifier: BSD-3-Clause
  13. #
  14. import collections as co
  15. import csv
  16. import functools as ft
  17. import glob
  18. import math as m
  19. import os
  20. import re
  21. CSV_PATHS = ['*.csv']
  22. # Defaults are common fields generated by other littlefs scripts
  23. MERGES = {
  24. 'add': (
  25. ['code_size', 'data_size', 'stack_frame', 'struct_size',
  26. 'coverage_lines', 'coverage_branches',
  27. 'test_passed',
  28. 'bench_read', 'bench_prog', 'bench_erased'],
  29. lambda xs: sum(xs[1:], start=xs[0])
  30. ),
  31. 'mul': (
  32. [],
  33. lambda xs: m.prod(xs[1:], start=xs[0])
  34. ),
  35. 'min': (
  36. [],
  37. min
  38. ),
  39. 'max': (
  40. ['stack_limit', 'coverage_hits'],
  41. max
  42. ),
  43. 'avg': (
  44. [],
  45. lambda xs: sum(xs[1:], start=xs[0]) / len(xs)
  46. ),
  47. }
  48. def openio(path, mode='r'):
  49. if path == '-':
  50. if mode == 'r':
  51. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  52. else:
  53. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  54. else:
  55. return open(path, mode)
  56. # integer fields
  57. class IntField(co.namedtuple('IntField', 'x')):
  58. __slots__ = ()
  59. def __new__(cls, x):
  60. if isinstance(x, IntField):
  61. return x
  62. if isinstance(x, str):
  63. try:
  64. x = int(x, 0)
  65. except ValueError:
  66. # also accept +-∞ and +-inf
  67. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  68. x = float('inf')
  69. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  70. x = float('-inf')
  71. else:
  72. raise
  73. return super().__new__(cls, x)
  74. def __int__(self):
  75. assert not m.isinf(self.x)
  76. return self.x
  77. def __float__(self):
  78. return float(self.x)
  79. def __str__(self):
  80. if self.x == float('inf'):
  81. return '∞'
  82. elif self.x == float('-inf'):
  83. return '-∞'
  84. else:
  85. return str(self.x)
  86. none = '%7s' % '-'
  87. def table(self):
  88. return '%7s' % (self,)
  89. diff_none = '%7s' % '-'
  90. diff_table = table
  91. def diff_diff(self, other):
  92. new = self.x if self else 0
  93. old = other.x if other else 0
  94. diff = new - old
  95. if diff == float('+inf'):
  96. return '%7s' % '+∞'
  97. elif diff == float('-inf'):
  98. return '%7s' % '-∞'
  99. else:
  100. return '%+7d' % diff
  101. def ratio(self, other):
  102. new = self.x if self else 0
  103. old = other.x if other else 0
  104. if m.isinf(new) and m.isinf(old):
  105. return 0.0
  106. elif m.isinf(new):
  107. return float('+inf')
  108. elif m.isinf(old):
  109. return float('-inf')
  110. elif not old and not new:
  111. return 0.0
  112. elif not old:
  113. return 1.0
  114. else:
  115. return (new-old) / old
  116. def __add__(self, other):
  117. return IntField(self.x + other.x)
  118. def __mul__(self, other):
  119. return IntField(self.x * other.x)
  120. def __lt__(self, other):
  121. return self.x < other.x
  122. def __gt__(self, other):
  123. return self.__class__.__lt__(other, self)
  124. def __le__(self, other):
  125. return not self.__gt__(other)
  126. def __ge__(self, other):
  127. return not self.__lt__(other)
  128. def __truediv__(self, n):
  129. if m.isinf(self.x):
  130. return self
  131. else:
  132. return IntField(round(self.x / n))
  133. # float fields
  134. class FloatField(co.namedtuple('FloatField', 'x')):
  135. __slots__ = ()
  136. def __new__(cls, x):
  137. if isinstance(x, FloatField):
  138. return x
  139. if isinstance(x, str):
  140. try:
  141. x = float(x)
  142. except ValueError:
  143. # also accept +-∞ and +-inf
  144. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  145. x = float('inf')
  146. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  147. x = float('-inf')
  148. else:
  149. raise
  150. return super().__new__(cls, x)
  151. def __float__(self):
  152. return float(self.x)
  153. def __str__(self):
  154. if self.x == float('inf'):
  155. return '∞'
  156. elif self.x == float('-inf'):
  157. return '-∞'
  158. else:
  159. return '%.1f' % self.x
  160. none = IntField.none
  161. table = IntField.table
  162. diff_none = IntField.diff_none
  163. diff_table = IntField.diff_table
  164. diff_diff = IntField.diff_diff
  165. ratio = IntField.ratio
  166. __add__ = IntField.__add__
  167. __mul__ = IntField.__mul__
  168. __lt__ = IntField.__lt__
  169. __gt__ = IntField.__gt__
  170. __le__ = IntField.__le__
  171. __ge__ = IntField.__ge__
  172. def __truediv__(self, n):
  173. if m.isinf(self.x):
  174. return self
  175. else:
  176. return FloatField(self.x / n)
  177. # fractional fields, a/b
  178. class FracField(co.namedtuple('FracField', 'a,b')):
  179. __slots__ = ()
  180. def __new__(cls, a, b=None):
  181. if isinstance(a, FracField) and b is None:
  182. return a
  183. if isinstance(a, str) and b is None:
  184. a, b = a.split('/', 1)
  185. if b is None:
  186. b = a
  187. return super().__new__(cls, IntField(a), IntField(b))
  188. def __str__(self):
  189. return '%s/%s' % (self.a, self.b)
  190. none = '%11s %7s' % ('-', '-')
  191. def table(self):
  192. if not self.b.x:
  193. return self.none
  194. t = self.a.x/self.b.x
  195. return '%11s %7s' % (
  196. self,
  197. '∞%' if t == float('+inf')
  198. else '-∞%' if t == float('-inf')
  199. else '%.1f%%' % (100*t))
  200. diff_none = '%11s' % '-'
  201. def diff_table(self):
  202. if not self.b.x:
  203. return self.diff_none
  204. return '%11s' % (self,)
  205. def diff_diff(self, other):
  206. new_a, new_b = self if self else (IntField(0), IntField(0))
  207. old_a, old_b = other if other else (IntField(0), IntField(0))
  208. return '%11s' % ('%s/%s' % (
  209. new_a.diff_diff(old_a).strip(),
  210. new_b.diff_diff(old_b).strip()))
  211. def ratio(self, other):
  212. new_a, new_b = self if self else (IntField(0), IntField(0))
  213. old_a, old_b = other if other else (IntField(0), IntField(0))
  214. new = new_a.x/new_b.x if new_b.x else 1.0
  215. old = old_a.x/old_b.x if old_b.x else 1.0
  216. return new - old
  217. def __add__(self, other):
  218. return FracField(self.a + other.a, self.b + other.b)
  219. def __mul__(self, other):
  220. return FracField(self.a * other.a, self.b + other.b)
  221. def __lt__(self, other):
  222. self_r = self.a.x/self.b.x if self.b.x else float('-inf')
  223. other_r = other.a.x/other.b.x if other.b.x else float('-inf')
  224. return self_r < other_r
  225. def __gt__(self, other):
  226. return self.__class__.__lt__(other, self)
  227. def __le__(self, other):
  228. return not self.__gt__(other)
  229. def __ge__(self, other):
  230. return not self.__lt__(other)
  231. def __truediv__(self, n):
  232. return FracField(self.a / n, self.b / n)
  233. def homogenize(results, *,
  234. fields=None,
  235. merges=None,
  236. renames=None,
  237. types=None,
  238. **_):
  239. # rename fields?
  240. if renames is not None:
  241. results_ = []
  242. for r in results:
  243. results_.append({renames.get(k, k): v for k, v in r.items()})
  244. results = results_
  245. # find all fields
  246. if not fields:
  247. fields = co.OrderedDict()
  248. for r in results:
  249. # also remove None fields, these can get introduced by
  250. # csv.DictReader when header and rows mismatch
  251. fields.update((k, v) for k, v in r.items() if k is not None)
  252. fields = list(fields.keys())
  253. # go ahead and clean up none values, these can have a few forms
  254. results_ = []
  255. for r in results:
  256. results_.append({
  257. k: r[k] for k in fields
  258. if r.get(k) is not None and not(
  259. isinstance(r[k], str)
  260. and re.match('^\s*[+-]?\s*$', r[k]))})
  261. # find best type for all fields
  262. def try_(x, type):
  263. try:
  264. type(x)
  265. return True
  266. except ValueError:
  267. return False
  268. if types is None:
  269. types = {}
  270. for k in fields:
  271. if merges is not None and merges.get(k):
  272. for type in [IntField, FloatField, FracField]:
  273. if all(k not in r or try_(r[k], type) for r in results_):
  274. types[k] = type
  275. break
  276. else:
  277. print("no type matches field %r?" % k)
  278. sys.exit(-1)
  279. # homogenize types
  280. for k in fields:
  281. if k in types:
  282. for r in results_:
  283. if k in r:
  284. r[k] = types[k](r[k])
  285. return fields, types, results_
  286. def fold(results, *,
  287. fields=None,
  288. merges=None,
  289. by=None,
  290. **_):
  291. folding = co.OrderedDict()
  292. if by is None:
  293. by = [k for k in fields if k not in merges]
  294. for r in results:
  295. name = tuple(r.get(k) for k in by)
  296. if name not in folding:
  297. folding[name] = {k: [] for k in fields if k in merges}
  298. for k in fields:
  299. # drop all fields fields without a type
  300. if k in merges and k in r:
  301. folding[name][k].append(r[k])
  302. # merge fields, we need the count at this point for averages
  303. folded = []
  304. types = {}
  305. for name, r in folding.items():
  306. r_ = {}
  307. for k, vs in r.items():
  308. if vs:
  309. _, merge = MERGES[merges[k]]
  310. r_[k] = merge(vs)
  311. # drop all rows without any fields
  312. # and drop all empty keys
  313. if r_:
  314. folded.append(dict(
  315. {k: n for k, n in zip(by, name) if n},
  316. **r_))
  317. fields_ = by + [k for k in fields if k in merges]
  318. return fields_, folded
  319. def table(results, diff_results=None, *,
  320. fields=None,
  321. types=None,
  322. merges=None,
  323. by=None,
  324. sort=None,
  325. reverse_sort=None,
  326. summary=False,
  327. all=False,
  328. percent=False,
  329. **_):
  330. all_, all = all, __builtins__.all
  331. # fold
  332. if by is not None:
  333. fields, results = fold(results, fields=fields, merges=merges, by=by)
  334. if diff_results is not None:
  335. _, diff_results = fold(diff_results,
  336. fields=fields, merges=merges, by=by)
  337. table = {
  338. tuple(r.get(k,'') for k in fields if k not in merges): r
  339. for r in results}
  340. diff_table = {
  341. tuple(r.get(k,'') for k in fields if k not in merges): r
  342. for r in diff_results or []}
  343. # sort, note that python's sort is stable
  344. names = list(table.keys() | diff_table.keys())
  345. names.sort()
  346. if diff_results is not None:
  347. names.sort(key=lambda n: [
  348. -types[k].ratio(
  349. table.get(n,{}).get(k),
  350. diff_table.get(n,{}).get(k))
  351. for k in fields if k in merges])
  352. if sort:
  353. names.sort(key=lambda n: tuple(
  354. (table[n][k],) if k in table.get(n,{}) else ()
  355. for k in sort),
  356. reverse=True)
  357. elif reverse_sort:
  358. names.sort(key=lambda n: tuple(
  359. (table[n][k],) if k in table.get(n,{}) else ()
  360. for k in reverse_sort),
  361. reverse=False)
  362. # print header
  363. print('%-36s' % ('%s%s' % (
  364. ','.join(k for k in fields if k not in merges),
  365. ' (%d added, %d removed)' % (
  366. sum(1 for n in table if n not in diff_table),
  367. sum(1 for n in diff_table if n not in table))
  368. if diff_results is not None and not percent else '')
  369. if not summary else ''),
  370. end='')
  371. if diff_results is None:
  372. print(' %s' % (
  373. ' '.join(k.rjust(len(types[k].none))
  374. for k in fields if k in merges)))
  375. elif percent:
  376. print(' %s' % (
  377. ' '.join(k.rjust(len(types[k].diff_none))
  378. for k in fields if k in merges)))
  379. else:
  380. print(' %s %s %s' % (
  381. ' '.join(('o'+k).rjust(len(types[k].diff_none))
  382. for k in fields if k in merges),
  383. ' '.join(('n'+k).rjust(len(types[k].diff_none))
  384. for k in fields if k in merges),
  385. ' '.join(('d'+k).rjust(len(types[k].diff_none))
  386. for k in fields if k in merges)))
  387. # print entries
  388. if not summary:
  389. for name in names:
  390. r = table.get(name, {})
  391. if diff_results is not None:
  392. diff_r = diff_table.get(name, {})
  393. ratios = [types[k].ratio(r.get(k), diff_r.get(k))
  394. for k in fields if k in merges]
  395. if not any(ratios) and not all_:
  396. continue
  397. print('%-36s' % ','.join(name), end='')
  398. if diff_results is None:
  399. print(' %s' % (
  400. ' '.join(r[k].table()
  401. if k in r else types[k].none
  402. for k in fields if k in merges)))
  403. elif percent:
  404. print(' %s%s' % (
  405. ' '.join(r[k].diff_table()
  406. if k in r else types[k].diff_none
  407. for k in fields if k in merges),
  408. ' (%s)' % ', '.join(
  409. '+∞%' if t == float('+inf')
  410. else '-∞%' if t == float('-inf')
  411. else '%+.1f%%' % (100*t)
  412. for t in ratios)))
  413. else:
  414. print(' %s %s %s%s' % (
  415. ' '.join(diff_r[k].diff_table()
  416. if k in diff_r else types[k].diff_none
  417. for k in fields if k in merges),
  418. ' '.join(r[k].diff_table()
  419. if k in r else types[k].diff_none
  420. for k in fields if k in merges),
  421. ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
  422. if k in r or k in diff_r else types[k].diff_none
  423. for k in fields if k in merges),
  424. ' (%s)' % ', '.join(
  425. '+∞%' if t == float('+inf')
  426. else '-∞%' if t == float('-inf')
  427. else '%+.1f%%' % (100*t)
  428. for t in ratios
  429. if t)
  430. if any(ratios) else ''))
  431. # print total
  432. _, total = fold(results, fields=fields, merges=merges, by=[])
  433. r = total[0] if total else {}
  434. if diff_results is not None:
  435. _, diff_total = fold(diff_results,
  436. fields=fields, merges=merges, by=[])
  437. diff_r = diff_total[0] if diff_total else {}
  438. ratios = [types[k].ratio(r.get(k), diff_r.get(k))
  439. for k in fields if k in merges]
  440. print('%-36s' % 'TOTAL', end='')
  441. if diff_results is None:
  442. print(' %s' % (
  443. ' '.join(r[k].table()
  444. if k in r else types[k].none
  445. for k in fields if k in merges)))
  446. elif percent:
  447. print(' %s%s' % (
  448. ' '.join(r[k].diff_table()
  449. if k in r else types[k].diff_none
  450. for k in fields if k in merges),
  451. ' (%s)' % ', '.join(
  452. '+∞%' if t == float('+inf')
  453. else '-∞%' if t == float('-inf')
  454. else '%+.1f%%' % (100*t)
  455. for t in ratios)))
  456. else:
  457. print(' %s %s %s%s' % (
  458. ' '.join(diff_r[k].diff_table()
  459. if k in diff_r else types[k].diff_none
  460. for k in fields if k in merges),
  461. ' '.join(r[k].diff_table()
  462. if k in r else types[k].diff_none
  463. for k in fields if k in merges),
  464. ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
  465. if k in r or k in diff_r else types[k].diff_none
  466. for k in fields if k in merges),
  467. ' (%s)' % ', '.join(
  468. '+∞%' if t == float('+inf')
  469. else '-∞%' if t == float('-inf')
  470. else '%+.1f%%' % (100*t)
  471. for t in ratios
  472. if t)
  473. if any(ratios) else ''))
  474. def main(csv_paths, *, fields=None, by=None, **args):
  475. # figure out what fields to use
  476. renames = {}
  477. if fields is not None:
  478. fields_ = []
  479. for name in fields:
  480. if '=' in name:
  481. a, b = name.split('=', 1)
  482. renames[b] = a
  483. name = a
  484. fields_.append(name)
  485. fields = fields_
  486. if by is not None:
  487. by_ = []
  488. for name in by:
  489. if '=' in name:
  490. a, b = name.split('=', 1)
  491. renames[b] = a
  492. name = a
  493. by_.append(name)
  494. by = by_
  495. # include 'by' fields in fields, it doesn't make sense to not
  496. if fields is not None and by is not None:
  497. fields[:0] = [k for k in by if k not in fields]
  498. # use preconfigured merge operations unless any merge operation is
  499. # explictly specified
  500. merge_args = (args
  501. if any(args.get(m) for m in MERGES.keys())
  502. else {m: k for m, (k, _) in MERGES.items()})
  503. merges = {}
  504. for m in MERGES.keys():
  505. for k in merge_args.get(m, []):
  506. if k in merges:
  507. print("conflicting merge type for field %r?" % k)
  508. sys.exit(-1)
  509. merges[k] = m
  510. # allow renames to apply to merges
  511. for m in MERGES.keys():
  512. for k in merge_args.get(m, []):
  513. if renames.get(k, k) not in merges:
  514. merges[renames.get(k, k)] = m
  515. # ignore merges that conflict with 'by' fields
  516. if by is not None:
  517. for k in by:
  518. if k in merges:
  519. del merges[k]
  520. # find CSV files
  521. paths = []
  522. for path in csv_paths:
  523. if os.path.isdir(path):
  524. path = path + '/*.csv'
  525. for path in glob.glob(path):
  526. paths.append(path)
  527. if not paths:
  528. print('no .csv files found in %r?' % csv_paths)
  529. sys.exit(-1)
  530. results = []
  531. for path in paths:
  532. try:
  533. with openio(path) as f:
  534. reader = csv.DictReader(f, restval='')
  535. for r in reader:
  536. results.append(r)
  537. except FileNotFoundError:
  538. pass
  539. # homogenize
  540. fields, types, results = homogenize(results,
  541. fields=fields, merges=merges, renames=renames)
  542. # fold to remove duplicates
  543. fields, results = fold(results,
  544. fields=fields, merges=merges)
  545. # write results to CSV
  546. if args.get('output'):
  547. with openio(args['output'], 'w') as f:
  548. writer = csv.DictWriter(f, fields)
  549. writer.writeheader()
  550. for r in results:
  551. writer.writerow(r)
  552. # find previous results?
  553. if args.get('diff'):
  554. diff_results = []
  555. try:
  556. with openio(args['diff']) as f:
  557. reader = csv.DictReader(f, restval='')
  558. for r in reader:
  559. diff_results.append(r)
  560. except FileNotFoundError:
  561. pass
  562. # homogenize
  563. _, _, diff_results = homogenize(diff_results,
  564. fields=fields, merges=merges, renames=renames, types=types)
  565. # fold to remove duplicates
  566. _, diff_results = fold(diff_results,
  567. fields=fields, merges=merges)
  568. # print table
  569. if not args.get('quiet'):
  570. table(
  571. results,
  572. diff_results if args.get('diff') else None,
  573. fields=fields,
  574. types=types,
  575. merges=merges,
  576. by=by,
  577. **args)
  578. if __name__ == "__main__":
  579. import argparse
  580. import sys
  581. parser = argparse.ArgumentParser(
  582. description="Summarize measurements in CSV files.")
  583. parser.add_argument(
  584. 'csv_paths',
  585. nargs='*',
  586. default=CSV_PATHS,
  587. help="Description of where to find *.csv files. May be a directory "
  588. "or list of paths. Defaults to %r." % CSV_PATHS)
  589. parser.add_argument(
  590. '-q', '--quiet',
  591. action='store_true',
  592. help="Don't show anything, useful with -o.")
  593. parser.add_argument(
  594. '-o', '--output',
  595. help="Specify CSV file to store results.")
  596. parser.add_argument(
  597. '-d', '--diff',
  598. help="Specify CSV file to diff against.")
  599. parser.add_argument(
  600. '-a', '--all',
  601. action='store_true',
  602. help="Show all, not just the ones that changed.")
  603. parser.add_argument(
  604. '-p', '--percent',
  605. action='store_true',
  606. help="Only show percentage change, not a full diff.")
  607. parser.add_argument(
  608. '-f', '--fields',
  609. type=lambda x: [x.strip() for x in x.split(',')],
  610. help="Only show these fields. Can rename fields "
  611. "with new_name=old_name.")
  612. parser.add_argument(
  613. '-b', '--by',
  614. type=lambda x: [x.strip() for x in x.split(',')],
  615. help="Group by these fields. Can rename fields "
  616. "with new_name=old_name.")
  617. parser.add_argument(
  618. '--add',
  619. type=lambda x: [x.strip() for x in x.split(',')],
  620. help="Add these fields when merging.")
  621. parser.add_argument(
  622. '--mul',
  623. type=lambda x: [x.strip() for x in x.split(',')],
  624. help="Multiply these fields when merging.")
  625. parser.add_argument(
  626. '--min',
  627. type=lambda x: [x.strip() for x in x.split(',')],
  628. help="Take the minimum of these fields when merging.")
  629. parser.add_argument(
  630. '--max',
  631. type=lambda x: [x.strip() for x in x.split(',')],
  632. help="Take the maximum of these fields when merging.")
  633. parser.add_argument(
  634. '--avg',
  635. type=lambda x: [x.strip() for x in x.split(',')],
  636. help="Average these fields when merging.")
  637. parser.add_argument(
  638. '-s', '--sort',
  639. type=lambda x: [x.strip() for x in x.split(',')],
  640. help="Sort by these fields.")
  641. parser.add_argument(
  642. '-S', '--reverse-sort',
  643. type=lambda x: [x.strip() for x in x.split(',')],
  644. help="Sort by these fields, but backwards.")
  645. parser.add_argument(
  646. '-Y', '--summary',
  647. action='store_true',
  648. help="Only show the totals.")
  649. sys.exit(main(**{k: v
  650. for k, v in vars(parser.parse_intermixed_args()).items()
  651. if v is not None}))