summary.py 22 KB


  1. #!/usr/bin/env python3
  2. #
  3. # Script to summarize the outputs of other scripts. Operates on CSV files.
  4. #
  5. # Example:
  6. # ./scripts/code.py lfs.o lfs_util.o -q -o lfs.code.csv
  7. # ./scripts/data.py lfs.o lfs_util.o -q -o lfs.data.csv
  8. # ./scripts/summary.py lfs.code.csv lfs.data.csv -q -o lfs.csv
  9. # ./scripts/summary.py -Y lfs.csv -f code=code_size,data=data_size
  10. #
  11. # Copyright (c) 2022, The littlefs authors.
  12. # SPDX-License-Identifier: BSD-3-Clause
  13. #
  14. import collections as co
  15. import csv
  16. import functools as ft
  17. import glob
  18. import math as m
  19. import os
  20. import re
  21. CSV_PATHS = ['*.csv']
  22. # Defaults are common fields generated by other littlefs scripts
  23. MERGES = {
  24. 'add': (
  25. ['code_size', 'data_size', 'stack_frame', 'struct_size',
  26. 'coverage_lines', 'coverage_branches',
  27. 'test_passed'],
  28. lambda xs: sum(xs[1:], start=xs[0])
  29. ),
  30. 'mul': (
  31. [],
  32. lambda xs: m.prod(xs[1:], start=xs[0])
  33. ),
  34. 'min': (
  35. [],
  36. min
  37. ),
  38. 'max': (
  39. ['stack_limit', 'coverage_hits'],
  40. max
  41. ),
  42. 'avg': (
  43. [],
  44. lambda xs: sum(xs[1:], start=xs[0]) / len(xs)
  45. ),
  46. }
  47. def openio(path, mode='r'):
  48. if path == '-':
  49. if mode == 'r':
  50. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  51. else:
  52. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  53. else:
  54. return open(path, mode)
  55. # integer fields
  56. class IntField(co.namedtuple('IntField', 'x')):
  57. __slots__ = ()
  58. def __new__(cls, x):
  59. if isinstance(x, IntField):
  60. return x
  61. if isinstance(x, str):
  62. try:
  63. x = int(x, 0)
  64. except ValueError:
  65. # also accept +-∞ and +-inf
  66. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  67. x = float('inf')
  68. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  69. x = float('-inf')
  70. else:
  71. raise
  72. return super().__new__(cls, x)
  73. def __int__(self):
  74. assert not m.isinf(self.x)
  75. return self.x
  76. def __float__(self):
  77. return float(self.x)
  78. def __str__(self):
  79. if self.x == float('inf'):
  80. return '∞'
  81. elif self.x == float('-inf'):
  82. return '-∞'
  83. else:
  84. return str(self.x)
  85. none = '%7s' % '-'
  86. def table(self):
  87. return '%7s' % (self,)
  88. diff_none = '%7s' % '-'
  89. diff_table = table
  90. def diff_diff(self, other):
  91. new = self.x if self else 0
  92. old = other.x if other else 0
  93. diff = new - old
  94. if diff == float('+inf'):
  95. return '%7s' % '+∞'
  96. elif diff == float('-inf'):
  97. return '%7s' % '-∞'
  98. else:
  99. return '%+7d' % diff
  100. def ratio(self, other):
  101. new = self.x if self else 0
  102. old = other.x if other else 0
  103. if m.isinf(new) and m.isinf(old):
  104. return 0.0
  105. elif m.isinf(new):
  106. return float('+inf')
  107. elif m.isinf(old):
  108. return float('-inf')
  109. elif not old and not new:
  110. return 0.0
  111. elif not old:
  112. return 1.0
  113. else:
  114. return (new-old) / old
  115. def __add__(self, other):
  116. return IntField(self.x + other.x)
  117. def __mul__(self, other):
  118. return IntField(self.x * other.x)
  119. def __lt__(self, other):
  120. return self.x < other.x
  121. def __gt__(self, other):
  122. return self.__class__.__lt__(other, self)
  123. def __le__(self, other):
  124. return not self.__gt__(other)
  125. def __ge__(self, other):
  126. return not self.__lt__(other)
  127. def __truediv__(self, n):
  128. if m.isinf(self.x):
  129. return self
  130. else:
  131. return IntField(round(self.x / n))
  132. # float fields
  133. class FloatField(co.namedtuple('FloatField', 'x')):
  134. __slots__ = ()
  135. def __new__(cls, x):
  136. if isinstance(x, FloatField):
  137. return x
  138. if isinstance(x, str):
  139. try:
  140. x = float(x)
  141. except ValueError:
  142. # also accept +-∞ and +-inf
  143. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  144. x = float('inf')
  145. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  146. x = float('-inf')
  147. else:
  148. raise
  149. return super().__new__(cls, x)
  150. def __float__(self):
  151. return float(self.x)
  152. def __str__(self):
  153. if self.x == float('inf'):
  154. return '∞'
  155. elif self.x == float('-inf'):
  156. return '-∞'
  157. else:
  158. return '%.1f' % self.x
  159. none = IntField.none
  160. table = IntField.table
  161. diff_none = IntField.diff_none
  162. diff_table = IntField.diff_table
  163. diff_diff = IntField.diff_diff
  164. ratio = IntField.ratio
  165. __add__ = IntField.__add__
  166. __mul__ = IntField.__mul__
  167. __lt__ = IntField.__lt__
  168. __gt__ = IntField.__gt__
  169. __le__ = IntField.__le__
  170. __ge__ = IntField.__ge__
  171. def __truediv__(self, n):
  172. if m.isinf(self.x):
  173. return self
  174. else:
  175. return FloatField(self.x / n)
  176. # fractional fields, a/b
  177. class FracField(co.namedtuple('FracField', 'a,b')):
  178. __slots__ = ()
  179. def __new__(cls, a, b=None):
  180. if isinstance(a, FracField) and b is None:
  181. return a
  182. if isinstance(a, str) and b is None:
  183. a, b = a.split('/', 1)
  184. if b is None:
  185. b = a
  186. return super().__new__(cls, IntField(a), IntField(b))
  187. def __str__(self):
  188. return '%s/%s' % (self.a, self.b)
  189. none = '%11s %7s' % ('-', '-')
  190. def table(self):
  191. if not self.b.x:
  192. return self.none
  193. t = self.a.x/self.b.x
  194. return '%11s %7s' % (
  195. self,
  196. '∞%' if t == float('+inf')
  197. else '-∞%' if t == float('-inf')
  198. else '%.1f%%' % (100*t))
  199. diff_none = '%11s' % '-'
  200. def diff_table(self):
  201. if not self.b.x:
  202. return self.diff_none
  203. return '%11s' % (self,)
  204. def diff_diff(self, other):
  205. new_a, new_b = self if self else (IntField(0), IntField(0))
  206. old_a, old_b = other if other else (IntField(0), IntField(0))
  207. return '%11s' % ('%s/%s' % (
  208. new_a.diff_diff(old_a).strip(),
  209. new_b.diff_diff(old_b).strip()))
  210. def ratio(self, other):
  211. new_a, new_b = self if self else (IntField(0), IntField(0))
  212. old_a, old_b = other if other else (IntField(0), IntField(0))
  213. new = new_a.x/new_b.x if new_b.x else 1.0
  214. old = old_a.x/old_b.x if old_b.x else 1.0
  215. return new - old
  216. def __add__(self, other):
  217. return FracField(self.a + other.a, self.b + other.b)
  218. def __mul__(self, other):
  219. return FracField(self.a * other.a, self.b + other.b)
  220. def __lt__(self, other):
  221. self_r = self.a.x/self.b.x if self.b.x else float('-inf')
  222. other_r = other.a.x/other.b.x if other.b.x else float('-inf')
  223. return self_r < other_r
  224. def __gt__(self, other):
  225. return self.__class__.__lt__(other, self)
  226. def __le__(self, other):
  227. return not self.__gt__(other)
  228. def __ge__(self, other):
  229. return not self.__lt__(other)
  230. def __truediv__(self, n):
  231. return FracField(self.a / n, self.b / n)
  232. def homogenize(results, *,
  233. fields=None,
  234. merges=None,
  235. renames=None,
  236. types=None,
  237. **_):
  238. # rename fields?
  239. if renames is not None:
  240. results_ = []
  241. for r in results:
  242. results_.append({renames.get(k, k): v for k, v in r.items()})
  243. results = results_
  244. # find all fields
  245. if not fields:
  246. fields = co.OrderedDict()
  247. for r in results:
  248. # also remove None fields, these can get introduced by
  249. # csv.DictReader when header and rows mismatch
  250. fields.update((k, v) for k, v in r.items() if k is not None)
  251. fields = list(fields.keys())
  252. # go ahead and clean up none values, these can have a few forms
  253. results_ = []
  254. for r in results:
  255. results_.append({
  256. k: r[k] for k in fields
  257. if r.get(k) is not None and not(
  258. isinstance(r[k], str)
  259. and re.match('^\s*[+-]?\s*$', r[k]))})
  260. # find best type for all fields
  261. def try_(x, type):
  262. try:
  263. type(x)
  264. return True
  265. except ValueError:
  266. return False
  267. if types is None:
  268. types = {}
  269. for k in fields:
  270. if merges is not None and merges.get(k):
  271. for type in [IntField, FloatField, FracField]:
  272. if all(k not in r or try_(r[k], type) for r in results_):
  273. types[k] = type
  274. break
  275. else:
  276. print("no type matches field %r?" % k)
  277. sys.exit(-1)
  278. # homogenize types
  279. for k in fields:
  280. if k in types:
  281. for r in results_:
  282. if k in r:
  283. r[k] = types[k](r[k])
  284. return fields, types, results_
  285. def fold(results, *,
  286. fields=None,
  287. merges=None,
  288. by=None,
  289. **_):
  290. folding = co.OrderedDict()
  291. if by is None:
  292. by = [k for k in fields if k not in merges]
  293. for r in results:
  294. name = tuple(r.get(k) for k in by)
  295. if name not in folding:
  296. folding[name] = {k: [] for k in fields if k in merges}
  297. for k in fields:
  298. # drop all fields fields without a type
  299. if k in merges and k in r:
  300. folding[name][k].append(r[k])
  301. # merge fields, we need the count at this point for averages
  302. folded = []
  303. types = {}
  304. for name, r in folding.items():
  305. r_ = {}
  306. for k, vs in r.items():
  307. if vs:
  308. _, merge = MERGES[merges[k]]
  309. r_[k] = merge(vs)
  310. # drop all rows without any fields
  311. # and drop all empty keys
  312. if r_:
  313. folded.append(dict(
  314. {k: n for k, n in zip(by, name) if n},
  315. **r_))
  316. fields_ = by + [k for k in fields if k in merges]
  317. return fields_, folded
  318. def table(results, diff_results=None, *,
  319. fields=None,
  320. types=None,
  321. merges=None,
  322. by=None,
  323. sort=None,
  324. reverse_sort=None,
  325. summary=False,
  326. all=False,
  327. percent=False,
  328. **_):
  329. all_, all = all, __builtins__.all
  330. # fold
  331. if by is not None:
  332. fields, results = fold(results, fields=fields, merges=merges, by=by)
  333. if diff_results is not None:
  334. _, diff_results = fold(diff_results,
  335. fields=fields, merges=merges, by=by)
  336. table = {
  337. tuple(r.get(k,'') for k in fields if k not in merges): r
  338. for r in results}
  339. diff_table = {
  340. tuple(r.get(k,'') for k in fields if k not in merges): r
  341. for r in diff_results or []}
  342. # sort, note that python's sort is stable
  343. names = list(table.keys() | diff_table.keys())
  344. names.sort()
  345. if diff_results is not None:
  346. names.sort(key=lambda n: [
  347. -types[k].ratio(
  348. table.get(n,{}).get(k),
  349. diff_table.get(n,{}).get(k))
  350. for k in fields if k in merges])
  351. if sort:
  352. names.sort(key=lambda n: tuple(
  353. (table[n][k],) if k in table.get(n,{}) else ()
  354. for k in sort),
  355. reverse=True)
  356. elif reverse_sort:
  357. names.sort(key=lambda n: tuple(
  358. (table[n][k],) if k in table.get(n,{}) else ()
  359. for k in reverse_sort),
  360. reverse=False)
  361. # print header
  362. print('%-36s' % ('%s%s' % (
  363. ','.join(k for k in fields if k not in merges),
  364. ' (%d added, %d removed)' % (
  365. sum(1 for n in table if n not in diff_table),
  366. sum(1 for n in diff_table if n not in table))
  367. if diff_results is not None and not percent else '')
  368. if not summary else ''),
  369. end='')
  370. if diff_results is None:
  371. print(' %s' % (
  372. ' '.join(k.rjust(len(types[k].none))
  373. for k in fields if k in merges)))
  374. elif percent:
  375. print(' %s' % (
  376. ' '.join(k.rjust(len(types[k].diff_none))
  377. for k in fields if k in merges)))
  378. else:
  379. print(' %s %s %s' % (
  380. ' '.join(('o'+k).rjust(len(types[k].diff_none))
  381. for k in fields if k in merges),
  382. ' '.join(('n'+k).rjust(len(types[k].diff_none))
  383. for k in fields if k in merges),
  384. ' '.join(('d'+k).rjust(len(types[k].diff_none))
  385. for k in fields if k in merges)))
  386. # print entries
  387. if not summary:
  388. for name in names:
  389. r = table.get(name, {})
  390. if diff_results is not None:
  391. diff_r = diff_table.get(name, {})
  392. ratios = [types[k].ratio(r.get(k), diff_r.get(k))
  393. for k in fields if k in merges]
  394. if not any(ratios) and not all_:
  395. continue
  396. print('%-36s' % ','.join(name), end='')
  397. if diff_results is None:
  398. print(' %s' % (
  399. ' '.join(r[k].table()
  400. if k in r else types[k].none
  401. for k in fields if k in merges)))
  402. elif percent:
  403. print(' %s%s' % (
  404. ' '.join(r[k].diff_table()
  405. if k in r else types[k].diff_none
  406. for k in fields if k in merges),
  407. ' (%s)' % ', '.join(
  408. '+∞%' if t == float('+inf')
  409. else '-∞%' if t == float('-inf')
  410. else '%+.1f%%' % (100*t)
  411. for t in ratios)))
  412. else:
  413. print(' %s %s %s%s' % (
  414. ' '.join(diff_r[k].diff_table()
  415. if k in diff_r else types[k].diff_none
  416. for k in fields if k in merges),
  417. ' '.join(r[k].diff_table()
  418. if k in r else types[k].diff_none
  419. for k in fields if k in merges),
  420. ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
  421. if k in r or k in diff_r else types[k].diff_none
  422. for k in fields if k in merges),
  423. ' (%s)' % ', '.join(
  424. '+∞%' if t == float('+inf')
  425. else '-∞%' if t == float('-inf')
  426. else '%+.1f%%' % (100*t)
  427. for t in ratios
  428. if t)
  429. if any(ratios) else ''))
  430. # print total
  431. _, total = fold(results, fields=fields, merges=merges, by=[])
  432. r = total[0] if total else {}
  433. if diff_results is not None:
  434. _, diff_total = fold(diff_results,
  435. fields=fields, merges=merges, by=[])
  436. diff_r = diff_total[0] if diff_total else {}
  437. ratios = [types[k].ratio(r.get(k), diff_r.get(k))
  438. for k in fields if k in merges]
  439. print('%-36s' % 'TOTAL', end='')
  440. if diff_results is None:
  441. print(' %s' % (
  442. ' '.join(r[k].table()
  443. if k in r else types[k].none
  444. for k in fields if k in merges)))
  445. elif percent:
  446. print(' %s%s' % (
  447. ' '.join(r[k].diff_table()
  448. if k in r else types[k].diff_none
  449. for k in fields if k in merges),
  450. ' (%s)' % ', '.join(
  451. '+∞%' if t == float('+inf')
  452. else '-∞%' if t == float('-inf')
  453. else '%+.1f%%' % (100*t)
  454. for t in ratios)))
  455. else:
  456. print(' %s %s %s%s' % (
  457. ' '.join(diff_r[k].diff_table()
  458. if k in diff_r else types[k].diff_none
  459. for k in fields if k in merges),
  460. ' '.join(r[k].diff_table()
  461. if k in r else types[k].diff_none
  462. for k in fields if k in merges),
  463. ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
  464. if k in r or k in diff_r else types[k].diff_none
  465. for k in fields if k in merges),
  466. ' (%s)' % ', '.join(
  467. '+∞%' if t == float('+inf')
  468. else '-∞%' if t == float('-inf')
  469. else '%+.1f%%' % (100*t)
  470. for t in ratios
  471. if t)
  472. if any(ratios) else ''))
  473. def main(csv_paths, *, fields=None, by=None, **args):
  474. # figure out what fields to use
  475. renames = {}
  476. if fields is not None:
  477. fields_ = []
  478. for name in fields:
  479. if '=' in name:
  480. a, b = name.split('=', 1)
  481. renames[b] = a
  482. name = a
  483. fields_.append(name)
  484. fields = fields_
  485. if by is not None:
  486. by_ = []
  487. for name in by:
  488. if '=' in name:
  489. a, b = name.split('=', 1)
  490. renames[b] = a
  491. name = a
  492. by_.append(name)
  493. by = by_
  494. # include 'by' fields in fields, it doesn't make sense to not
  495. if fields is not None and by is not None:
  496. fields[:0] = [k for k in by if k not in fields]
  497. # use preconfigured merge operations unless any merge operation is
  498. # explictly specified
  499. merge_args = (args
  500. if any(args.get(m) for m in MERGES.keys())
  501. else {m: k for m, (k, _) in MERGES.items()})
  502. merges = {}
  503. for m in MERGES.keys():
  504. for k in merge_args.get(m, []):
  505. if k in merges:
  506. print("conflicting merge type for field %r?" % k)
  507. sys.exit(-1)
  508. merges[k] = m
  509. # allow renames to apply to merges
  510. for m in MERGES.keys():
  511. for k in merge_args.get(m, []):
  512. if renames.get(k, k) not in merges:
  513. merges[renames.get(k, k)] = m
  514. # ignore merges that conflict with 'by' fields
  515. if by is not None:
  516. for k in by:
  517. if k in merges:
  518. del merges[k]
  519. # find CSV files
  520. paths = []
  521. for path in csv_paths:
  522. if os.path.isdir(path):
  523. path = path + '/*.csv'
  524. for path in glob.glob(path):
  525. paths.append(path)
  526. if not paths:
  527. print('no .csv files found in %r?' % csv_paths)
  528. sys.exit(-1)
  529. results = []
  530. for path in paths:
  531. try:
  532. with openio(path) as f:
  533. reader = csv.DictReader(f)
  534. for r in reader:
  535. results.append(r)
  536. except FileNotFoundError:
  537. pass
  538. # homogenize
  539. fields, types, results = homogenize(results,
  540. fields=fields, merges=merges, renames=renames)
  541. # fold to remove duplicates
  542. fields, results = fold(results,
  543. fields=fields, merges=merges)
  544. # write results to CSV
  545. if args.get('output'):
  546. with openio(args['output'], 'w') as f:
  547. writer = csv.DictWriter(f, fields)
  548. writer.writeheader()
  549. for r in results:
  550. writer.writerow(r)
  551. # find previous results?
  552. if args.get('diff'):
  553. diff_results = []
  554. try:
  555. with openio(args['diff']) as f:
  556. reader = csv.DictReader(f)
  557. for r in reader:
  558. diff_results.append(r)
  559. except FileNotFoundError:
  560. pass
  561. # homogenize
  562. _, _, diff_results = homogenize(diff_results,
  563. fields=fields, merges=merges, renames=renames, types=types)
  564. # fold to remove duplicates
  565. _, diff_results = fold(diff_results,
  566. fields=fields, merges=merges)
  567. # print table
  568. if not args.get('quiet'):
  569. table(
  570. results,
  571. diff_results if args.get('diff') else None,
  572. fields=fields,
  573. types=types,
  574. merges=merges,
  575. by=by,
  576. **args)
  577. if __name__ == "__main__":
  578. import argparse
  579. import sys
  580. parser = argparse.ArgumentParser(
  581. description="Summarize measurements in CSV files.")
  582. parser.add_argument(
  583. 'csv_paths',
  584. nargs='*',
  585. default=CSV_PATHS,
  586. help="Description of where to find *.csv files. May be a directory "
  587. "or list of paths. Defaults to %r." % CSV_PATHS)
  588. parser.add_argument(
  589. '-q', '--quiet',
  590. action='store_true',
  591. help="Don't show anything, useful with -o.")
  592. parser.add_argument(
  593. '-o', '--output',
  594. help="Specify CSV file to store results.")
  595. parser.add_argument(
  596. '-d', '--diff',
  597. help="Specify CSV file to diff against.")
  598. parser.add_argument(
  599. '-a', '--all',
  600. action='store_true',
  601. help="Show all, not just the ones that changed.")
  602. parser.add_argument(
  603. '-p', '--percent',
  604. action='store_true',
  605. help="Only show percentage change, not a full diff.")
  606. parser.add_argument(
  607. '-f', '--fields',
  608. type=lambda x: [x.strip() for x in x.split(',')],
  609. help="Only show these fields. Can rename fields "
  610. "with old_name=new_name.")
  611. parser.add_argument(
  612. '-b', '--by',
  613. type=lambda x: [x.strip() for x in x.split(',')],
  614. help="Group by these fields. Can rename fields "
  615. "with old_name=new_name.")
  616. parser.add_argument(
  617. '--add',
  618. type=lambda x: [x.strip() for x in x.split(',')],
  619. help="Add these fields when merging.")
  620. parser.add_argument(
  621. '--mul',
  622. type=lambda x: [x.strip() for x in x.split(',')],
  623. help="Multiply these fields when merging.")
  624. parser.add_argument(
  625. '--min',
  626. type=lambda x: [x.strip() for x in x.split(',')],
  627. help="Take the minimum of these fields when merging.")
  628. parser.add_argument(
  629. '--max',
  630. type=lambda x: [x.strip() for x in x.split(',')],
  631. help="Take the maximum of these fields when merging.")
  632. parser.add_argument(
  633. '--avg',
  634. type=lambda x: [x.strip() for x in x.split(',')],
  635. help="Average these fields when merging.")
  636. parser.add_argument(
  637. '-s', '--sort',
  638. type=lambda x: [x.strip() for x in x.split(',')],
  639. help="Sort by these fields.")
  640. parser.add_argument(
  641. '-S', '--reverse-sort',
  642. type=lambda x: [x.strip() for x in x.split(',')],
  643. help="Sort by these fields, but backwards.")
  644. parser.add_argument(
  645. '-Y', '--summary',
  646. action='store_true',
  647. help="Only show the totals.")
  648. sys.exit(main(**{k: v
  649. for k, v in vars(parser.parse_intermixed_args()).items()
  650. if v is not None}))