Переглянути джерело

Consistent handling of by/field arguments for plot.py and summary.py

Now both scripts also fallback to guessing what fields to use based on
what fields can be converted to integers. This is more falible, and
doesn't work for tests/benchmarks, but in those cases explicit fields
can be used (which is what would be needed without guessing anyways).
Christopher Haster 3 роки тому
батько
коміт
fb58148df2
3 змінених файлів з 208 додано та 205 видалено
  1. 2 1
      Makefile
  2. 37 20
      scripts/plot.py
  3. 169 184
      scripts/summary.py

+ 2 - 1
Makefile

@@ -170,10 +170,11 @@ coverage: $(GCDA)
 .PHONY: summary sizes
 summary sizes: $(BUILDDIR)lfs.csv
 	$(strip ./scripts/summary.py -Y $^ \
-		-f code=code_size,$\
+		-fcode=code_size,$\
 			data=data_size,$\
 			stack=stack_limit,$\
 			struct=struct_size \
+		--max=stack \
 		$(SUMMARYFLAGS))
 
 

+ 37 - 20
scripts/plot.py

@@ -330,12 +330,13 @@ def collect(csv_paths, renames=[]):
 
     return results
 
-def dataset(results, x=None, y=None, defines={}):
+def dataset(results, x=None, y=None, define=[]):
     # organize by 'by', x, and y
     dataset = {}
-    for i, r in enumerate(results):
+    i = 0
+    for r in results:
         # filter results by matching defines
-        if not all(k in r and r[k] in vs for k, vs in defines.items()):
+        if not all(k in r and r[k] in vs for k, vs in define):
             continue
 
         # find xs
@@ -348,6 +349,7 @@ def dataset(results, x=None, y=None, defines={}):
                 continue
         else:
             x_ = i
+            i += 1
 
         # find ys
         if y is not None:
@@ -368,14 +370,29 @@ def dataset(results, x=None, y=None, defines={}):
 
     return dataset
 
-def datasets(results, by=None, x=None, y=None, defines={}):
+def datasets(results, by=None, x=None, y=None, define=[]):
     # filter results by matching defines
     results_ = []
     for r in results:
-        if all(k in r and r[k] in vs for k, vs in defines.items()):
+        if all(k in r and r[k] in vs for k, vs in define):
             results_.append(r)
     results = results_
 
+    # if y not specified, try to guess from data
+    if y is None:
+        y = co.OrderedDict()
+        for r in results:
+            for k, v in r.items():
+                if by is not None and k in by:
+                    continue
+                if y.get(k, True):
+                    try:
+                        dat(v)
+                        y[k] = True
+                    except ValueError:
+                        y[k] = False
+        y = list(k for k,v in y.items() if v)
+
     if by is not None:
         # find all 'by' values
         ks = set()
@@ -387,13 +404,17 @@ def datasets(results, by=None, x=None, y=None, defines={}):
     datasets = co.OrderedDict()
     for ks_ in (ks if by is not None else [()]):
         for x_ in (x if x is not None else [None]):
-            for y_ in (y if y is not None else [None]):
-                datasets[ks_ + (x_, y_)] = dataset(
+            for y_ in y:
+                # hide x/y if there is only one field
+                k_x = x_ if len(x or []) > 1 else ''
+                k_y = y_ if len(y or []) > 1 else ''
+
+                datasets[ks_ + (k_x, k_y)] = dataset(
                     results,
                     x_,
                     y_,
-                    {by_: {k_} for by_, k_ in zip(by, ks_)}
-                        if by is not None else {})
+                    [(by_, k_) for by_, k_ in zip(by, ks_)]
+                        if by is not None else [])
 
     return datasets
     
@@ -431,7 +452,7 @@ def main(csv_paths, *,
     if ylim is not None and len(ylim) == 1:
         ylim = (0, ylim[0])
 
-    # seperate out renames
+    # separate out renames
     renames = [k.split('=', 1)
         for k in it.chain(by or [], x or [], y or [])
         if '=' in k]
@@ -452,7 +473,7 @@ def main(csv_paths, *,
         results = collect(csv_paths, renames)
 
         # then extract the requested datasets
-        datasets_ = datasets(results, by, x, y, dict(define))
+        datasets_ = datasets(results, by, x, y, define)
 
         # what colors to use?
         if colors is not None:
@@ -483,10 +504,7 @@ def main(csv_paths, *,
                         else '%s ' % line_chars_[i % len(line_chars_)]
                         if line_chars is not None
                         else '',
-                    ','.join(k_ for i, k_ in enumerate(k)
-                        if k_
-                        if not (i == len(k)-2 and len(x) == 1)
-                        if not (i == len(k)-1 and len(y) == 1)))
+                    ','.join(k_ for k_ in k if k_))
 
                 if label:
                     legend_.append(label)
@@ -685,7 +703,7 @@ if __name__ == "__main__":
         '-b', '--by',
         type=lambda x: [x.strip() for x in x.split(',')],
         help="Fields to render as separate plots. All other fields will be "
-            "summed. Can rename fields with new_name=old_name.")
+            "summed as needed. Can rename fields with new_name=old_name.")
     parser.add_argument(
         '-x',
         type=lambda x: [x.strip() for x in x.split(',')],
@@ -694,15 +712,14 @@ if __name__ == "__main__":
     parser.add_argument(
         '-y',
         type=lambda x: [x.strip() for x in x.split(',')],
-        required=True,
         help="Fields to use for the y-axis. Can rename fields with "
             "new_name=old_name.")
     parser.add_argument(
         '-D', '--define',
-        type=lambda x: (lambda k, v: (k, set(v.split(','))))(*x.split('=', 1)),
+        type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
         action='append',
-        help="Only include rows where this field is this value (field=value). "
-            "May include comma-separated options.")
+        help="Only include rows where this field is this value. May include "
+            "comma-separated options.")
     parser.add_argument(
         '--color',
         choices=['never', 'always', 'auto'],

+ 169 - 184
scripts/summary.py

@@ -16,6 +16,7 @@ import collections as co
 import csv
 import functools as ft
 import glob
+import itertools as it
 import math as m
 import os
 import re
@@ -23,31 +24,13 @@ import re
 
 CSV_PATHS = ['*.csv']
 
-# Defaults are common fields generated by other littlefs scripts
-MERGES = {
-    'add': (
-        ['code_size', 'data_size', 'stack_frame', 'struct_size',
-            'coverage_lines', 'coverage_branches',
-            'test_passed',
-            'bench_read', 'bench_prog', 'bench_erased'],
-        lambda xs: sum(xs[1:], start=xs[0])
-    ),
-    'mul': (
-        [],
-        lambda xs: m.prod(xs[1:], start=xs[0])
-    ),
-    'min': (
-        [],
-        min
-    ),
-    'max': (
-        ['stack_limit', 'coverage_hits'],
-        max
-    ),
-    'avg': (
-        [],
-        lambda xs: sum(xs[1:], start=xs[0]) / len(xs)
-    ),
+# supported merge operations
+OPS = {
+    'add': lambda xs: sum(xs[1:], start=xs[0]),
+    'mul': lambda xs: m.prod(xs[1:], start=xs[0]),
+    'min': min,
+    'max': max,
+    'avg': lambda xs: sum(xs[1:], start=xs[0]) / len(xs),
 }
 
 
@@ -273,112 +256,142 @@ class FracField(co.namedtuple('FracField', 'a,b')):
     def __truediv__(self, n):
         return FracField(self.a / n, self.b / n)
 
+# available types
+TYPES = [IntField, FloatField, FracField]
+
 
 def homogenize(results, *,
+        by=None,
         fields=None,
-        merges=None,
-        renames=None,
+        renames=[],
+        define={},
         types=None,
         **_):
+    results = results.copy()
+
     # rename fields?
-    if renames is not None:
+    if renames:
+        for r in results:
+            # make a copy so renames can overlap
+            r_ = {}
+            for new_k, old_k in renames:
+                if old_k in r:
+                    r_[new_k] = r[old_k]
+            r.update(r_)
+
+    # filter by matching defines
+    if define:
         results_ = []
         for r in results:
-            results_.append({renames.get(k, k): v for k, v in r.items()})
+            if all(k in r and r[k] in vs for k, vs in define):
+                results_.append(r)
         results = results_
 
-    # find all fields
-    if not fields:
+    # if fields not specified, try to guess from data
+    if fields is None:
         fields = co.OrderedDict()
         for r in results:
-            # also remove None fields, these can get introduced by
-            # csv.DictReader when header and rows mismatch
-            fields.update((k, v) for k, v in r.items() if k is not None)
-        fields = list(fields.keys())
+            for k, v in r.items():
+                if by is not None and k in by:
+                    continue
+                types_ = []
+                for type in fields.get(k, TYPES):
+                    try:
+                        type(v)
+                        types_.append(type)
+                    except ValueError:
+                        pass
+                fields[k] = types_
+        fields = list(k for k,v in fields.items() if v)
+
+    # infer 'by' fields?
+    if by is None:
+        by = co.OrderedDict()
+        for r in results:
+            # also ignore None keys, these are introduced by csv.DictReader
+            # when header + row mismatch
+            by.update((k, True) for k in r.keys()
+                if k is not None
+                    and k not in fields
+                    and not any(k == old_k for _, old_k in renames))
+        by = list(by.keys()) 
 
     # go ahead and clean up none values, these can have a few forms
     results_ = []
     for r in results:
         results_.append({
-            k: r[k] for k in fields
-            if r.get(k) is not None and not(
+            k: r[k] for k in it.chain(by, fields)
+            if r.get(k) is not None and not (
                 isinstance(r[k], str)
                 and re.match('^\s*[+-]?\s*$', r[k]))})
+    results = results_
 
     # find best type for all fields
-    def try_(x, type):
-        try:
-            type(x)
-            return True
-        except ValueError:
-            return False
-
     if types is None:
+        def is_type(x, type):
+            try:
+                type(x)
+                return True
+            except ValueError:
+                return False
+
         types = {}
         for k in fields:
-            if merges is not None and merges.get(k):
-                for type in [IntField, FloatField, FracField]:
-                    if all(k not in r or try_(r[k], type) for r in results_):
-                        types[k] = type
-                        break
-                else:
-                    print("no type matches field %r?" % k)
-                    sys.exit(-1)
+            for type in TYPES:
+                if all(k not in r or is_type(r[k], type) for r in results_):
+                    types[k] = type
+                    break
+            else:
+                print("no type matches field %r?" % k)
+                sys.exit(-1)
 
     # homogenize types
-    for k in fields:
-        if k in types:
-            for r in results_:
-                if k in r:
-                    r[k] = types[k](r[k])
+    for r in results:
+        for k in fields:
+            if k in r:
+                r[k] = types[k](r[k])
 
-    return fields, types, results_
+    return by, fields, types, results
 
 
 def fold(results, *,
-        fields=None,
-        merges=None,
-        by=None,
+        by=[],
+        fields=[],
+        ops={},
         **_):
     folding = co.OrderedDict()
-    if by is None:
-        by = [k for k in fields if k not in merges]
-
     for r in results:
-        name = tuple(r.get(k) for k in by)
+        name = tuple(r.get(k, '') for k in by)
         if name not in folding:
-            folding[name] = {k: [] for k in fields if k in merges}
+            folding[name] = {k: [] for k in fields}
         for k in fields:
-            # drop all fields fields without a type
-            if k in merges and k in r:
+            if k in r:
                 folding[name][k].append(r[k])
 
     # merge fields, we need the count at this point for averages
     folded = []
-    types = {}
     for name, r in folding.items():
         r_ = {}
         for k, vs in r.items():
             if vs:
-                _, merge = MERGES[merges[k]]
-                r_[k] = merge(vs)
+                # sum fields by default
+                op = OPS[ops.get(k, 'add')]
+                r_[k] = op(vs)
 
-        # drop all rows without any fields
-        # and drop all empty keys
+        # drop any rows without fields and any empty keys
         if r_:
             folded.append(dict(
-                {k: n for k, n in zip(by, name) if n},
+                {k: v for k, v in zip(by, name) if v},
                 **r_))
 
-    fields_ = by + [k for k in fields if k in merges]
-    return fields_, folded
+    return folded
 
 
 def table(results, diff_results=None, *,
+        by=None,
         fields=None,
         types=None,
-        merges=None,
-        by=None,
+        ops=None,
         sort=None,
         reverse_sort=None,
         summary=False,
@@ -387,29 +400,18 @@ def table(results, diff_results=None, *,
         **_):
     all_, all = all, __builtins__.all
 
-    # fold
-    if by is not None:
-        fields, results = fold(results, fields=fields, merges=merges, by=by)
-        if diff_results is not None:
-            _, diff_results = fold(diff_results,
-                fields=fields, merges=merges, by=by)
-
-    table = {
-        tuple(r.get(k,'') for k in fields if k not in merges): r
-        for r in results}
-    diff_table = {
-        tuple(r.get(k,'') for k in fields if k not in merges): r
-        for r in diff_results or []}
+    table = {tuple(r.get(k,'') for k in by): r for r in results}
+    diff_table = {tuple(r.get(k,'') for k in by): r for r in diff_results or []}
 
     # sort, note that python's sort is stable
     names = list(table.keys() | diff_table.keys())
     names.sort()
     if diff_results is not None:
-        names.sort(key=lambda n: [
+        names.sort(key=lambda n: tuple(
             -types[k].ratio(
                 table.get(n,{}).get(k),
                 diff_table.get(n,{}).get(k))
-                for k in fields if k in merges])
+            for k in fields))
     if sort:
         names.sort(key=lambda n: tuple(
             (table[n][k],) if k in table.get(n,{}) else ()
@@ -423,7 +425,7 @@ def table(results, diff_results=None, *,
 
     # print header
     print('%-36s' % ('%s%s' % (
-        ','.join(k for k in fields if k not in merges),
+        ','.join(k for k in by),
         ' (%d added, %d removed)' % (
             sum(1 for n in table if n not in diff_table),
             sum(1 for n in diff_table if n not in table))
@@ -433,19 +435,19 @@ def table(results, diff_results=None, *,
     if diff_results is None:
         print(' %s' % (
             ' '.join(k.rjust(len(types[k].none))
-                for k in fields if k in merges)))
+                for k in fields)))
     elif percent:
         print(' %s' % (
             ' '.join(k.rjust(len(types[k].diff_none))
-                for k in fields if k in merges)))
+                for k in fields)))
     else:
         print(' %s %s %s' % (
             ' '.join(('o'+k).rjust(len(types[k].diff_none))
-                for k in fields if k in merges),
+                for k in fields),
             ' '.join(('n'+k).rjust(len(types[k].diff_none))
-                for k in fields if k in merges),
+                for k in fields),
             ' '.join(('d'+k).rjust(len(types[k].diff_none))
-                for k in fields if k in merges)))
+                for k in fields)))
 
     # print entries
     if not summary:
@@ -454,7 +456,7 @@ def table(results, diff_results=None, *,
             if diff_results is not None:
                 diff_r = diff_table.get(name, {})
                 ratios = [types[k].ratio(r.get(k), diff_r.get(k))
-                    for k in fields if k in merges]
+                    for k in fields]
                 if not any(ratios) and not all_:
                     continue
 
@@ -463,12 +465,12 @@ def table(results, diff_results=None, *,
                 print(' %s' % (
                     ' '.join(r[k].table()
                         if k in r else types[k].none
-                        for k in fields if k in merges)))
+                        for k in fields)))
             elif percent:
                 print(' %s%s' % (
                     ' '.join(r[k].diff_table()
                         if k in r else types[k].diff_none
-                        for k in fields if k in merges),
+                        for k in fields),
                     ' (%s)' % ', '.join(
                             '+∞%' if t == float('+inf')
                             else '-∞%' if t == float('-inf')
@@ -478,13 +480,13 @@ def table(results, diff_results=None, *,
                 print(' %s %s %s%s' % (
                     ' '.join(diff_r[k].diff_table()
                         if k in diff_r else types[k].diff_none
-                        for k in fields if k in merges),
+                        for k in fields),
                     ' '.join(r[k].diff_table()
                         if k in r else types[k].diff_none
-                        for k in fields if k in merges),
+                        for k in fields),
                     ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
                         if k in r or k in diff_r else types[k].diff_none
-                        for k in fields if k in merges),
+                        for k in fields),
                     ' (%s)' % ', '.join(
                             '+∞%' if t == float('+inf')
                             else '-∞%' if t == float('-inf')
@@ -494,26 +496,25 @@ def table(results, diff_results=None, *,
                         if any(ratios) else ''))
 
     # print total
-    _, total = fold(results, fields=fields, merges=merges, by=[])
+    total = fold(results, by=[], fields=fields, ops=ops)
     r = total[0] if total else {}
     if diff_results is not None:
-        _, diff_total = fold(diff_results,
-            fields=fields, merges=merges, by=[])
+        diff_total = fold(diff_results, by=[], fields=fields, ops=ops)
         diff_r = diff_total[0] if diff_total else {}
         ratios = [types[k].ratio(r.get(k), diff_r.get(k))
-            for k in fields if k in merges]
+            for k in fields]
 
     print('%-36s' % 'TOTAL', end='')
     if diff_results is None:
         print(' %s' % (
             ' '.join(r[k].table()
                 if k in r else types[k].none
-                for k in fields if k in merges)))
+                for k in fields)))
     elif percent:
         print(' %s%s' % (
             ' '.join(r[k].diff_table()
                 if k in r else types[k].diff_none
-                for k in fields if k in merges),
+                for k in fields),
             ' (%s)' % ', '.join(
                     '+∞%' if t == float('+inf')
                     else '-∞%' if t == float('-inf')
@@ -523,13 +524,13 @@ def table(results, diff_results=None, *,
         print(' %s %s %s%s' % (
             ' '.join(diff_r[k].diff_table()
                 if k in diff_r else types[k].diff_none
-                for k in fields if k in merges),
+                for k in fields),
             ' '.join(r[k].diff_table()
                 if k in r else types[k].diff_none
-                for k in fields if k in merges),
+                for k in fields),
             ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
                 if k in r or k in diff_r else types[k].diff_none
-                for k in fields if k in merges),
+                for k in fields),
             ' (%s)' % ', '.join(
                     '+∞%' if t == float('+inf')
                     else '-∞%' if t == float('-inf')
@@ -539,56 +540,35 @@ def table(results, diff_results=None, *,
                 if any(ratios) else ''))
 
 
-def main(csv_paths, *, fields=None, by=None, **args):
-    # figure out what fields to use
-    renames = {}
-
-    if fields is not None:
-        fields_ = []
-        for name in fields:
-            if '=' in name:
-                a, b = name.split('=', 1)
-                renames[b] = a
-                name = a
-            fields_.append(name)
-        fields = fields_
-
+def main(csv_paths, *,
+        by=None,
+        fields=None,
+        define=[],
+        **args):
+    # separate out renames
+    renames = [k.split('=', 1)
+        for k in it.chain(by or [], fields or [])
+        if '=' in k]
     if by is not None:
-        by_ = []
-        for name in by:
-            if '=' in name:
-                a, b = name.split('=', 1)
-                renames[b] = a
-                name = a
-            by_.append(name)
-        by = by_
-
-    # include 'by' fields in fields, it doesn't make sense to not
-    if fields is not None and by is not None:
-        fields[:0] = [k for k in by if k not in fields]
-
-    # use preconfigured merge operations unless any merge operation is
-    # explictly specified
-    merge_args = (args
-        if any(args.get(m) for m in MERGES.keys())
-        else {m: k for m, (k, _) in MERGES.items()})
-    merges = {}
-    for m in MERGES.keys():
-        for k in merge_args.get(m, []):
-            if k in merges:
-                print("conflicting merge type for field %r?" % k)
+        by = [k.split('=', 1)[0] for k in by]
+    if fields is not None:
+        fields = [k.split('=', 1)[0] for k in fields]
+
+    # figure out merge operations
+    ops = {}
+    for m in OPS.keys():
+        for k in args.get(m, []):
+            if k in ops:
+                print("conflicting op for field %r?" % k)
                 sys.exit(-1)
-            merges[k] = m
-    # allow renames to apply to merges
-    for m in MERGES.keys():
-        for k in merge_args.get(m, []):
-            if renames.get(k, k) not in merges:
-                merges[renames.get(k, k)] = m
-    # ignore merges that conflict with 'by' fields
-    if by is not None:
-        for k in by:
-            if k in merges:
-                del merges[k]
+            ops[k] = m
+    # rename ops?
+    if renames:
+        ops_ = {}
+        for new_k, old_k in renames:
+            if old_k in ops:
+                ops_[new_k] = ops[old_k]
+        ops.update(ops_)
 
     # find CSV files
     paths = []
@@ -614,17 +594,17 @@ def main(csv_paths, *, fields=None, by=None, **args):
             pass
 
     # homogenize
-    fields, types, results = homogenize(results,
-        fields=fields, merges=merges, renames=renames)
+    by, fields, types, results = homogenize(results,
+        by=by, fields=fields, renames=renames, define=define)
 
     # fold to remove duplicates
-    fields, results = fold(results,
-        fields=fields, merges=merges)
+    results = fold(results,
+        by=by, fields=fields, ops=ops)
 
     # write results to CSV
     if args.get('output'):
         with openio(args['output'], 'w') as f:
-            writer = csv.DictWriter(f, fields)
+            writer = csv.DictWriter(f, by + fields)
             writer.writeheader()
             for r in results:
                 writer.writerow(r)
@@ -641,22 +621,22 @@ def main(csv_paths, *, fields=None, by=None, **args):
             pass
 
         # homogenize
-        _, _, diff_results = homogenize(diff_results,
-            fields=fields, merges=merges, renames=renames, types=types)
+        _, _, _, diff_results = homogenize(diff_results,
+            by=by, fields=fields, renames=renames, define=define, types=types)
 
         # fold to remove duplicates
-        _, diff_results = fold(diff_results,
-            fields=fields, merges=merges)
+        diff_results = fold(diff_results,
+            by=by, fields=fields, ops=ops)
 
     # print table
     if not args.get('quiet'):
         table(
             results,
             diff_results if args.get('diff') else None,
+            by=by,
             fields=fields,
+            ops=ops,
             types=types,
-            merges=merges,
-            by=by,
             **args)
 
 
@@ -690,35 +670,40 @@ if __name__ == "__main__":
         action='store_true',
         help="Only show percentage change, not a full diff.")
     parser.add_argument(
-        '-f', '--fields',
+        '-b', '--by',
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Only show these fields. Can rename fields "
-            "with new_name=old_name.")
+        help="Group by these fields. All other fields will be merged as "
+            "needed. Can rename fields with new_name=old_name.")
     parser.add_argument(
-        '-b', '--by',
+        '-f', '--fields',
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Group by these fields. Can rename fields "
-            "with new_name=old_name.")
+        help="Use these fields. Can rename fields with new_name=old_name.")
+    parser.add_argument(
+        '-D', '--define',
+        type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
+        action='append',
+        help="Only include rows where this field is this value. May include "
+            "comma-separated options.")
     parser.add_argument(
         '--add',
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Add these fields when merging.")
+        help="Add these fields (the default).")
     parser.add_argument(
         '--mul',
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Multiply these fields when merging.")
+        help="Multiply these fields.")
     parser.add_argument(
         '--min',
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Take the minimum of these fields when merging.")
+        help="Take the minimum of these fields.")
     parser.add_argument(
         '--max',
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Take the maximum of these fields when merging.")
+        help="Take the maximum of these fields.")
     parser.add_argument(
         '--avg',
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Average these fields when merging.")
+        help="Average these fields.")
     parser.add_argument(
         '-s', '--sort',
         type=lambda x: [x.strip() for x in x.split(',')],