||
- #!/usr/bin/env python3
- #
- # Script to summarize the outputs of other scripts. Operates on CSV files.
- #
- # Example:
- # ./scripts/code.py lfs.o lfs_util.o -q -o lfs.code.csv
- # ./scripts/data.py lfs.o lfs_util.o -q -o lfs.data.csv
- # ./scripts/summary.py lfs.code.csv lfs.data.csv -q -o lfs.csv
- # ./scripts/summary.py -Y lfs.csv -f code=code_size,data=data_size
- #
- # Copyright (c) 2022, The littlefs authors.
- # SPDX-License-Identifier: BSD-3-Clause
- #
- import collections as co
- import csv
- import functools as ft
- import glob
- import math as m
- import os
- import re
- CSV_PATHS = ['*.csv']
- # Defaults are common fields generated by other littlefs scripts
- MERGES = {
- 'add': (
- ['code_size', 'data_size', 'stack_frame', 'struct_size',
- 'coverage_lines', 'coverage_branches',
- 'test_passed',
- 'bench_read', 'bench_prog', 'bench_erased'],
- lambda xs: sum(xs[1:], start=xs[0])
- ),
- 'mul': (
- [],
- lambda xs: m.prod(xs[1:], start=xs[0])
- ),
- 'min': (
- [],
- min
- ),
- 'max': (
- ['stack_limit', 'coverage_hits'],
- max
- ),
- 'avg': (
- [],
- lambda xs: sum(xs[1:], start=xs[0]) / len(xs)
- ),
- }
- def openio(path, mode='r'):
- if path == '-':
- if mode == 'r':
- return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
- else:
- return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
- else:
- return open(path, mode)
- # integer fields
- class IntField(co.namedtuple('IntField', 'x')):
- __slots__ = ()
- def __new__(cls, x):
- if isinstance(x, IntField):
- return x
- if isinstance(x, str):
- try:
- x = int(x, 0)
- except ValueError:
- # also accept +-∞ and +-inf
- if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
- x = float('inf')
- elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
- x = float('-inf')
- else:
- raise
- return super().__new__(cls, x)
- def __int__(self):
- assert not m.isinf(self.x)
- return self.x
- def __float__(self):
- return float(self.x)
- def __str__(self):
- if self.x == float('inf'):
- return '∞'
- elif self.x == float('-inf'):
- return '-∞'
- else:
- return str(self.x)
- none = '%7s' % '-'
- def table(self):
- return '%7s' % (self,)
- diff_none = '%7s' % '-'
- diff_table = table
- def diff_diff(self, other):
- new = self.x if self else 0
- old = other.x if other else 0
- diff = new - old
- if diff == float('+inf'):
- return '%7s' % '+∞'
- elif diff == float('-inf'):
- return '%7s' % '-∞'
- else:
- return '%+7d' % diff
- def ratio(self, other):
- new = self.x if self else 0
- old = other.x if other else 0
- if m.isinf(new) and m.isinf(old):
- return 0.0
- elif m.isinf(new):
- return float('+inf')
- elif m.isinf(old):
- return float('-inf')
- elif not old and not new:
- return 0.0
- elif not old:
- return 1.0
- else:
- return (new-old) / old
- def __add__(self, other):
- return IntField(self.x + other.x)
- def __mul__(self, other):
- return IntField(self.x * other.x)
- def __lt__(self, other):
- return self.x < other.x
- def __gt__(self, other):
- return self.__class__.__lt__(other, self)
- def __le__(self, other):
- return not self.__gt__(other)
- def __ge__(self, other):
- return not self.__lt__(other)
- def __truediv__(self, n):
- if m.isinf(self.x):
- return self
- else:
- return IntField(round(self.x / n))
- # float fields
- class FloatField(co.namedtuple('FloatField', 'x')):
- __slots__ = ()
- def __new__(cls, x):
- if isinstance(x, FloatField):
- return x
- if isinstance(x, str):
- try:
- x = float(x)
- except ValueError:
- # also accept +-∞ and +-inf
- if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
- x = float('inf')
- elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
- x = float('-inf')
- else:
- raise
- return super().__new__(cls, x)
- def __float__(self):
- return float(self.x)
- def __str__(self):
- if self.x == float('inf'):
- return '∞'
- elif self.x == float('-inf'):
- return '-∞'
- else:
- return '%.1f' % self.x
- none = IntField.none
- table = IntField.table
- diff_none = IntField.diff_none
- diff_table = IntField.diff_table
- diff_diff = IntField.diff_diff
- ratio = IntField.ratio
- __add__ = IntField.__add__
- __mul__ = IntField.__mul__
- __lt__ = IntField.__lt__
- __gt__ = IntField.__gt__
- __le__ = IntField.__le__
- __ge__ = IntField.__ge__
- def __truediv__(self, n):
- if m.isinf(self.x):
- return self
- else:
- return FloatField(self.x / n)
- # fractional fields, a/b
- class FracField(co.namedtuple('FracField', 'a,b')):
- __slots__ = ()
- def __new__(cls, a, b=None):
- if isinstance(a, FracField) and b is None:
- return a
- if isinstance(a, str) and b is None:
- a, b = a.split('/', 1)
- if b is None:
- b = a
- return super().__new__(cls, IntField(a), IntField(b))
- def __str__(self):
- return '%s/%s' % (self.a, self.b)
- none = '%11s %7s' % ('-', '-')
- def table(self):
- if not self.b.x:
- return self.none
- t = self.a.x/self.b.x
- return '%11s %7s' % (
- self,
- '∞%' if t == float('+inf')
- else '-∞%' if t == float('-inf')
- else '%.1f%%' % (100*t))
- diff_none = '%11s' % '-'
- def diff_table(self):
- if not self.b.x:
- return self.diff_none
- return '%11s' % (self,)
- def diff_diff(self, other):
- new_a, new_b = self if self else (IntField(0), IntField(0))
- old_a, old_b = other if other else (IntField(0), IntField(0))
- return '%11s' % ('%s/%s' % (
- new_a.diff_diff(old_a).strip(),
- new_b.diff_diff(old_b).strip()))
- def ratio(self, other):
- new_a, new_b = self if self else (IntField(0), IntField(0))
- old_a, old_b = other if other else (IntField(0), IntField(0))
- new = new_a.x/new_b.x if new_b.x else 1.0
- old = old_a.x/old_b.x if old_b.x else 1.0
- return new - old
- def __add__(self, other):
- return FracField(self.a + other.a, self.b + other.b)
- def __mul__(self, other):
- return FracField(self.a * other.a, self.b + other.b)
- def __lt__(self, other):
- self_r = self.a.x/self.b.x if self.b.x else float('-inf')
- other_r = other.a.x/other.b.x if other.b.x else float('-inf')
- return self_r < other_r
- def __gt__(self, other):
- return self.__class__.__lt__(other, self)
- def __le__(self, other):
- return not self.__gt__(other)
- def __ge__(self, other):
- return not self.__lt__(other)
- def __truediv__(self, n):
- return FracField(self.a / n, self.b / n)
- def homogenize(results, *,
- fields=None,
- merges=None,
- renames=None,
- types=None,
- **_):
- # rename fields?
- if renames is not None:
- results_ = []
- for r in results:
- results_.append({renames.get(k, k): v for k, v in r.items()})
- results = results_
- # find all fields
- if not fields:
- fields = co.OrderedDict()
- for r in results:
- # also remove None fields, these can get introduced by
- # csv.DictReader when header and rows mismatch
- fields.update((k, v) for k, v in r.items() if k is not None)
- fields = list(fields.keys())
- # go ahead and clean up none values, these can have a few forms
- results_ = []
- for r in results:
- results_.append({
- k: r[k] for k in fields
- if r.get(k) is not None and not(
- isinstance(r[k], str)
- and re.match('^\s*[+-]?\s*$', r[k]))})
- # find best type for all fields
- def try_(x, type):
- try:
- type(x)
- return True
- except ValueError:
- return False
- if types is None:
- types = {}
- for k in fields:
- if merges is not None and merges.get(k):
- for type in [IntField, FloatField, FracField]:
- if all(k not in r or try_(r[k], type) for r in results_):
- types[k] = type
- break
- else:
- print("no type matches field %r?" % k)
- sys.exit(-1)
- # homogenize types
- for k in fields:
- if k in types:
- for r in results_:
- if k in r:
- r[k] = types[k](r[k])
- return fields, types, results_
- def fold(results, *,
- fields=None,
- merges=None,
- by=None,
- **_):
- folding = co.OrderedDict()
- if by is None:
- by = [k for k in fields if k not in merges]
- for r in results:
- name = tuple(r.get(k) for k in by)
- if name not in folding:
- folding[name] = {k: [] for k in fields if k in merges}
- for k in fields:
- # drop all fields fields without a type
- if k in merges and k in r:
- folding[name][k].append(r[k])
- # merge fields, we need the count at this point for averages
- folded = []
- types = {}
- for name, r in folding.items():
- r_ = {}
- for k, vs in r.items():
- if vs:
- _, merge = MERGES[merges[k]]
- r_[k] = merge(vs)
- # drop all rows without any fields
- # and drop all empty keys
- if r_:
- folded.append(dict(
- {k: n for k, n in zip(by, name) if n},
- **r_))
- fields_ = by + [k for k in fields if k in merges]
- return fields_, folded
- def table(results, diff_results=None, *,
- fields=None,
- types=None,
- merges=None,
- by=None,
- sort=None,
- reverse_sort=None,
- summary=False,
- all=False,
- percent=False,
- **_):
- all_, all = all, __builtins__.all
- # fold
- if by is not None:
- fields, results = fold(results, fields=fields, merges=merges, by=by)
- if diff_results is not None:
- _, diff_results = fold(diff_results,
- fields=fields, merges=merges, by=by)
- table = {
- tuple(r.get(k,'') for k in fields if k not in merges): r
- for r in results}
- diff_table = {
- tuple(r.get(k,'') for k in fields if k not in merges): r
- for r in diff_results or []}
- # sort, note that python's sort is stable
- names = list(table.keys() | diff_table.keys())
- names.sort()
- if diff_results is not None:
- names.sort(key=lambda n: [
- -types[k].ratio(
- table.get(n,{}).get(k),
- diff_table.get(n,{}).get(k))
- for k in fields if k in merges])
- if sort:
- names.sort(key=lambda n: tuple(
- (table[n][k],) if k in table.get(n,{}) else ()
- for k in sort),
- reverse=True)
- elif reverse_sort:
- names.sort(key=lambda n: tuple(
- (table[n][k],) if k in table.get(n,{}) else ()
- for k in reverse_sort),
- reverse=False)
- # print header
- print('%-36s' % ('%s%s' % (
- ','.join(k for k in fields if k not in merges),
- ' (%d added, %d removed)' % (
- sum(1 for n in table if n not in diff_table),
- sum(1 for n in diff_table if n not in table))
- if diff_results is not None and not percent else '')
- if not summary else ''),
- end='')
- if diff_results is None:
- print(' %s' % (
- ' '.join(k.rjust(len(types[k].none))
- for k in fields if k in merges)))
- elif percent:
- print(' %s' % (
- ' '.join(k.rjust(len(types[k].diff_none))
- for k in fields if k in merges)))
- else:
- print(' %s %s %s' % (
- ' '.join(('o'+k).rjust(len(types[k].diff_none))
- for k in fields if k in merges),
- ' '.join(('n'+k).rjust(len(types[k].diff_none))
- for k in fields if k in merges),
- ' '.join(('d'+k).rjust(len(types[k].diff_none))
- for k in fields if k in merges)))
- # print entries
- if not summary:
- for name in names:
- r = table.get(name, {})
- if diff_results is not None:
- diff_r = diff_table.get(name, {})
- ratios = [types[k].ratio(r.get(k), diff_r.get(k))
- for k in fields if k in merges]
- if not any(ratios) and not all_:
- continue
- print('%-36s' % ','.join(name), end='')
- if diff_results is None:
- print(' %s' % (
- ' '.join(r[k].table()
- if k in r else types[k].none
- for k in fields if k in merges)))
- elif percent:
- print(' %s%s' % (
- ' '.join(r[k].diff_table()
- if k in r else types[k].diff_none
- for k in fields if k in merges),
- ' (%s)' % ', '.join(
- '+∞%' if t == float('+inf')
- else '-∞%' if t == float('-inf')
- else '%+.1f%%' % (100*t)
- for t in ratios)))
- else:
- print(' %s %s %s%s' % (
- ' '.join(diff_r[k].diff_table()
- if k in diff_r else types[k].diff_none
- for k in fields if k in merges),
- ' '.join(r[k].diff_table()
- if k in r else types[k].diff_none
- for k in fields if k in merges),
- ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
- if k in r or k in diff_r else types[k].diff_none
- for k in fields if k in merges),
- ' (%s)' % ', '.join(
- '+∞%' if t == float('+inf')
- else '-∞%' if t == float('-inf')
- else '%+.1f%%' % (100*t)
- for t in ratios
- if t)
- if any(ratios) else ''))
- # print total
- _, total = fold(results, fields=fields, merges=merges, by=[])
- r = total[0] if total else {}
- if diff_results is not None:
- _, diff_total = fold(diff_results,
- fields=fields, merges=merges, by=[])
- diff_r = diff_total[0] if diff_total else {}
- ratios = [types[k].ratio(r.get(k), diff_r.get(k))
- for k in fields if k in merges]
- print('%-36s' % 'TOTAL', end='')
- if diff_results is None:
- print(' %s' % (
- ' '.join(r[k].table()
- if k in r else types[k].none
- for k in fields if k in merges)))
- elif percent:
- print(' %s%s' % (
- ' '.join(r[k].diff_table()
- if k in r else types[k].diff_none
- for k in fields if k in merges),
- ' (%s)' % ', '.join(
- '+∞%' if t == float('+inf')
- else '-∞%' if t == float('-inf')
- else '%+.1f%%' % (100*t)
- for t in ratios)))
- else:
- print(' %s %s %s%s' % (
- ' '.join(diff_r[k].diff_table()
- if k in diff_r else types[k].diff_none
- for k in fields if k in merges),
- ' '.join(r[k].diff_table()
- if k in r else types[k].diff_none
- for k in fields if k in merges),
- ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
- if k in r or k in diff_r else types[k].diff_none
- for k in fields if k in merges),
- ' (%s)' % ', '.join(
- '+∞%' if t == float('+inf')
- else '-∞%' if t == float('-inf')
- else '%+.1f%%' % (100*t)
- for t in ratios
- if t)
- if any(ratios) else ''))
- def main(csv_paths, *, fields=None, by=None, **args):
- # figure out what fields to use
- renames = {}
- if fields is not None:
- fields_ = []
- for name in fields:
- if '=' in name:
- a, b = name.split('=', 1)
- renames[b] = a
- name = a
- fields_.append(name)
- fields = fields_
- if by is not None:
- by_ = []
- for name in by:
- if '=' in name:
- a, b = name.split('=', 1)
- renames[b] = a
- name = a
- by_.append(name)
- by = by_
- # include 'by' fields in fields, it doesn't make sense to not
- if fields is not None and by is not None:
- fields[:0] = [k for k in by if k not in fields]
- # use preconfigured merge operations unless any merge operation is
- # explictly specified
- merge_args = (args
- if any(args.get(m) for m in MERGES.keys())
- else {m: k for m, (k, _) in MERGES.items()})
- merges = {}
- for m in MERGES.keys():
- for k in merge_args.get(m, []):
- if k in merges:
- print("conflicting merge type for field %r?" % k)
- sys.exit(-1)
- merges[k] = m
- # allow renames to apply to merges
- for m in MERGES.keys():
- for k in merge_args.get(m, []):
- if renames.get(k, k) not in merges:
- merges[renames.get(k, k)] = m
- # ignore merges that conflict with 'by' fields
- if by is not None:
- for k in by:
- if k in merges:
- del merges[k]
- # find CSV files
- paths = []
- for path in csv_paths:
- if os.path.isdir(path):
- path = path + '/*.csv'
- for path in glob.glob(path):
- paths.append(path)
- if not paths:
- print('no .csv files found in %r?' % csv_paths)
- sys.exit(-1)
- results = []
- for path in paths:
- try:
- with openio(path) as f:
- reader = csv.DictReader(f, restval='')
- for r in reader:
- results.append(r)
- except FileNotFoundError:
- pass
- # homogenize
- fields, types, results = homogenize(results,
- fields=fields, merges=merges, renames=renames)
- # fold to remove duplicates
- fields, results = fold(results,
- fields=fields, merges=merges)
- # write results to CSV
- if args.get('output'):
- with openio(args['output'], 'w') as f:
- writer = csv.DictWriter(f, fields)
- writer.writeheader()
- for r in results:
- writer.writerow(r)
- # find previous results?
- if args.get('diff'):
- diff_results = []
- try:
- with openio(args['diff']) as f:
- reader = csv.DictReader(f, restval='')
- for r in reader:
- diff_results.append(r)
- except FileNotFoundError:
- pass
- # homogenize
- _, _, diff_results = homogenize(diff_results,
- fields=fields, merges=merges, renames=renames, types=types)
- # fold to remove duplicates
- _, diff_results = fold(diff_results,
- fields=fields, merges=merges)
- # print table
- if not args.get('quiet'):
- table(
- results,
- diff_results if args.get('diff') else None,
- fields=fields,
- types=types,
- merges=merges,
- by=by,
- **args)
- if __name__ == "__main__":
- import argparse
- import sys
- parser = argparse.ArgumentParser(
- description="Summarize measurements in CSV files.")
- parser.add_argument(
- 'csv_paths',
- nargs='*',
- default=CSV_PATHS,
- help="Description of where to find *.csv files. May be a directory "
- "or list of paths. Defaults to %r." % CSV_PATHS)
- parser.add_argument(
- '-q', '--quiet',
- action='store_true',
- help="Don't show anything, useful with -o.")
- parser.add_argument(
- '-o', '--output',
- help="Specify CSV file to store results.")
- parser.add_argument(
- '-d', '--diff',
- help="Specify CSV file to diff against.")
- parser.add_argument(
- '-a', '--all',
- action='store_true',
- help="Show all, not just the ones that changed.")
- parser.add_argument(
- '-p', '--percent',
- action='store_true',
- help="Only show percentage change, not a full diff.")
- parser.add_argument(
- '-f', '--fields',
- type=lambda x: [x.strip() for x in x.split(',')],
- help="Only show these fields. Can rename fields "
- "with new_name=old_name.")
- parser.add_argument(
- '-b', '--by',
- type=lambda x: [x.strip() for x in x.split(',')],
- help="Group by these fields. Can rename fields "
- "with new_name=old_name.")
- parser.add_argument(
- '--add',
- type=lambda x: [x.strip() for x in x.split(',')],
- help="Add these fields when merging.")
- parser.add_argument(
- '--mul',
- type=lambda x: [x.strip() for x in x.split(',')],
- help="Multiply these fields when merging.")
- parser.add_argument(
- '--min',
- type=lambda x: [x.strip() for x in x.split(',')],
- help="Take the minimum of these fields when merging.")
- parser.add_argument(
- '--max',
- type=lambda x: [x.strip() for x in x.split(',')],
- help="Take the maximum of these fields when merging.")
- parser.add_argument(
- '--avg',
- type=lambda x: [x.strip() for x in x.split(',')],
- help="Average these fields when merging.")
- parser.add_argument(
- '-s', '--sort',
- type=lambda x: [x.strip() for x in x.split(',')],
- help="Sort by these fields.")
- parser.add_argument(
- '-S', '--reverse-sort',
- type=lambda x: [x.strip() for x in x.split(',')],
- help="Sort by these fields, but backwards.")
- parser.add_argument(
- '-Y', '--summary',
- action='store_true',
- help="Only show the totals.")
- sys.exit(main(**{k: v
- for k, v in vars(parser.parse_intermixed_args()).items()
- if v is not None}))
|