Browse Source

Consistent handling of by/field arguments for plot.py and summary.py

Now both scripts also fallback to guessing what fields to use based on
what fields can be converted to integers. This is more falible, and
doesn't work for tests/benchmarks, but in those cases explicit fields
can be used (which is what would be needed without guessing anyways).
Christopher Haster 3 years ago
parent
commit
fb58148df2
3 changed files with 208 additions and 205 deletions
  1. 2 1
      Makefile
  2. 37 20
      scripts/plot.py
  3. 169 184
      scripts/summary.py

+ 2 - 1
Makefile

@@ -170,10 +170,11 @@ coverage: $(GCDA)
 .PHONY: summary sizes
 .PHONY: summary sizes
 summary sizes: $(BUILDDIR)lfs.csv
 summary sizes: $(BUILDDIR)lfs.csv
 	$(strip ./scripts/summary.py -Y $^ \
 	$(strip ./scripts/summary.py -Y $^ \
-		-f code=code_size,$\
+		-fcode=code_size,$\
 			data=data_size,$\
 			data=data_size,$\
 			stack=stack_limit,$\
 			stack=stack_limit,$\
 			struct=struct_size \
 			struct=struct_size \
+		--max=stack \
 		$(SUMMARYFLAGS))
 		$(SUMMARYFLAGS))
 
 
 
 

+ 37 - 20
scripts/plot.py

@@ -330,12 +330,13 @@ def collect(csv_paths, renames=[]):
 
 
     return results
     return results
 
 
-def dataset(results, x=None, y=None, defines={}):
+def dataset(results, x=None, y=None, define=[]):
     # organize by 'by', x, and y
     # organize by 'by', x, and y
     dataset = {}
     dataset = {}
-    for i, r in enumerate(results):
+    i = 0
+    for r in results:
         # filter results by matching defines
         # filter results by matching defines
-        if not all(k in r and r[k] in vs for k, vs in defines.items()):
+        if not all(k in r and r[k] in vs for k, vs in define):
             continue
             continue
 
 
         # find xs
         # find xs
@@ -348,6 +349,7 @@ def dataset(results, x=None, y=None, defines={}):
                 continue
                 continue
         else:
         else:
             x_ = i
             x_ = i
+            i += 1
 
 
         # find ys
         # find ys
         if y is not None:
         if y is not None:
@@ -368,14 +370,29 @@ def dataset(results, x=None, y=None, defines={}):
 
 
     return dataset
     return dataset
 
 
-def datasets(results, by=None, x=None, y=None, defines={}):
+def datasets(results, by=None, x=None, y=None, define=[]):
     # filter results by matching defines
     # filter results by matching defines
     results_ = []
     results_ = []
     for r in results:
     for r in results:
-        if all(k in r and r[k] in vs for k, vs in defines.items()):
+        if all(k in r and r[k] in vs for k, vs in define):
             results_.append(r)
             results_.append(r)
     results = results_
     results = results_
 
 
+    # if y not specified, try to guess from data
+    if y is None:
+        y = co.OrderedDict()
+        for r in results:
+            for k, v in r.items():
+                if by is not None and k in by:
+                    continue
+                if y.get(k, True):
+                    try:
+                        dat(v)
+                        y[k] = True
+                    except ValueError:
+                        y[k] = False
+        y = list(k for k,v in y.items() if v)
+
     if by is not None:
     if by is not None:
         # find all 'by' values
         # find all 'by' values
         ks = set()
         ks = set()
@@ -387,13 +404,17 @@ def datasets(results, by=None, x=None, y=None, defines={}):
     datasets = co.OrderedDict()
     datasets = co.OrderedDict()
     for ks_ in (ks if by is not None else [()]):
     for ks_ in (ks if by is not None else [()]):
         for x_ in (x if x is not None else [None]):
         for x_ in (x if x is not None else [None]):
-            for y_ in (y if y is not None else [None]):
-                datasets[ks_ + (x_, y_)] = dataset(
+            for y_ in y:
+                # hide x/y if there is only one field
+                k_x = x_ if len(x or []) > 1 else ''
+                k_y = y_ if len(y or []) > 1 else ''
+
+                datasets[ks_ + (k_x, k_y)] = dataset(
                     results,
                     results,
                     x_,
                     x_,
                     y_,
                     y_,
-                    {by_: {k_} for by_, k_ in zip(by, ks_)}
-                        if by is not None else {})
+                    [(by_, k_) for by_, k_ in zip(by, ks_)]
+                        if by is not None else [])
 
 
     return datasets
     return datasets
     
     
@@ -431,7 +452,7 @@ def main(csv_paths, *,
     if ylim is not None and len(ylim) == 1:
     if ylim is not None and len(ylim) == 1:
         ylim = (0, ylim[0])
         ylim = (0, ylim[0])
 
 
-    # seperate out renames
+    # separate out renames
     renames = [k.split('=', 1)
     renames = [k.split('=', 1)
         for k in it.chain(by or [], x or [], y or [])
         for k in it.chain(by or [], x or [], y or [])
         if '=' in k]
         if '=' in k]
@@ -452,7 +473,7 @@ def main(csv_paths, *,
         results = collect(csv_paths, renames)
         results = collect(csv_paths, renames)
 
 
         # then extract the requested datasets
         # then extract the requested datasets
-        datasets_ = datasets(results, by, x, y, dict(define))
+        datasets_ = datasets(results, by, x, y, define)
 
 
         # what colors to use?
         # what colors to use?
         if colors is not None:
         if colors is not None:
@@ -483,10 +504,7 @@ def main(csv_paths, *,
                         else '%s ' % line_chars_[i % len(line_chars_)]
                         else '%s ' % line_chars_[i % len(line_chars_)]
                         if line_chars is not None
                         if line_chars is not None
                         else '',
                         else '',
-                    ','.join(k_ for i, k_ in enumerate(k)
-                        if k_
-                        if not (i == len(k)-2 and len(x) == 1)
-                        if not (i == len(k)-1 and len(y) == 1)))
+                    ','.join(k_ for k_ in k if k_))
 
 
                 if label:
                 if label:
                     legend_.append(label)
                     legend_.append(label)
@@ -685,7 +703,7 @@ if __name__ == "__main__":
         '-b', '--by',
         '-b', '--by',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
         help="Fields to render as separate plots. All other fields will be "
         help="Fields to render as separate plots. All other fields will be "
-            "summed. Can rename fields with new_name=old_name.")
+            "summed as needed. Can rename fields with new_name=old_name.")
     parser.add_argument(
     parser.add_argument(
         '-x',
         '-x',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
@@ -694,15 +712,14 @@ if __name__ == "__main__":
     parser.add_argument(
     parser.add_argument(
         '-y',
         '-y',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
-        required=True,
         help="Fields to use for the y-axis. Can rename fields with "
         help="Fields to use for the y-axis. Can rename fields with "
             "new_name=old_name.")
             "new_name=old_name.")
     parser.add_argument(
     parser.add_argument(
         '-D', '--define',
         '-D', '--define',
-        type=lambda x: (lambda k, v: (k, set(v.split(','))))(*x.split('=', 1)),
+        type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
         action='append',
         action='append',
-        help="Only include rows where this field is this value (field=value). "
-            "May include comma-separated options.")
+        help="Only include rows where this field is this value. May include "
+            "comma-separated options.")
     parser.add_argument(
     parser.add_argument(
         '--color',
         '--color',
         choices=['never', 'always', 'auto'],
         choices=['never', 'always', 'auto'],

+ 169 - 184
scripts/summary.py

@@ -16,6 +16,7 @@ import collections as co
 import csv
 import csv
 import functools as ft
 import functools as ft
 import glob
 import glob
+import itertools as it
 import math as m
 import math as m
 import os
 import os
 import re
 import re
@@ -23,31 +24,13 @@ import re
 
 
 CSV_PATHS = ['*.csv']
 CSV_PATHS = ['*.csv']
 
 
-# Defaults are common fields generated by other littlefs scripts
-MERGES = {
-    'add': (
-        ['code_size', 'data_size', 'stack_frame', 'struct_size',
-            'coverage_lines', 'coverage_branches',
-            'test_passed',
-            'bench_read', 'bench_prog', 'bench_erased'],
-        lambda xs: sum(xs[1:], start=xs[0])
-    ),
-    'mul': (
-        [],
-        lambda xs: m.prod(xs[1:], start=xs[0])
-    ),
-    'min': (
-        [],
-        min
-    ),
-    'max': (
-        ['stack_limit', 'coverage_hits'],
-        max
-    ),
-    'avg': (
-        [],
-        lambda xs: sum(xs[1:], start=xs[0]) / len(xs)
-    ),
+# supported merge operations
+OPS = {
+    'add': lambda xs: sum(xs[1:], start=xs[0]),
+    'mul': lambda xs: m.prod(xs[1:], start=xs[0]),
+    'min': min,
+    'max': max,
+    'avg': lambda xs: sum(xs[1:], start=xs[0]) / len(xs),
 }
 }
 
 
 
 
@@ -273,112 +256,142 @@ class FracField(co.namedtuple('FracField', 'a,b')):
     def __truediv__(self, n):
     def __truediv__(self, n):
         return FracField(self.a / n, self.b / n)
         return FracField(self.a / n, self.b / n)
 
 
+# available types
+TYPES = [IntField, FloatField, FracField]
+
 
 
 def homogenize(results, *,
 def homogenize(results, *,
+        by=None,
         fields=None,
         fields=None,
-        merges=None,
-        renames=None,
+        renames=[],
+        define={},
         types=None,
         types=None,
         **_):
         **_):
+    results = results.copy()
+
     # rename fields?
     # rename fields?
-    if renames is not None:
+    if renames:
+        for r in results:
+            # make a copy so renames can overlap
+            r_ = {}
+            for new_k, old_k in renames:
+                if old_k in r:
+                    r_[new_k] = r[old_k]
+            r.update(r_)
+
+    # filter by matching defines
+    if define:
         results_ = []
         results_ = []
         for r in results:
         for r in results:
-            results_.append({renames.get(k, k): v for k, v in r.items()})
+            if all(k in r and r[k] in vs for k, vs in define):
+                results_.append(r)
         results = results_
         results = results_
 
 
-    # find all fields
-    if not fields:
+    # if fields not specified, try to guess from data
+    if fields is None:
         fields = co.OrderedDict()
         fields = co.OrderedDict()
         for r in results:
         for r in results:
-            # also remove None fields, these can get introduced by
-            # csv.DictReader when header and rows mismatch
-            fields.update((k, v) for k, v in r.items() if k is not None)
-        fields = list(fields.keys())
+            for k, v in r.items():
+                if by is not None and k in by:
+                    continue
+                types_ = []
+                for type in fields.get(k, TYPES):
+                    try:
+                        type(v)
+                        types_.append(type)
+                    except ValueError:
+                        pass
+                fields[k] = types_
+        fields = list(k for k,v in fields.items() if v)
+
+    # infer 'by' fields?
+    if by is None:
+        by = co.OrderedDict()
+        for r in results:
+            # also ignore None keys, these are introduced by csv.DictReader
+            # when header + row mismatch
+            by.update((k, True) for k in r.keys()
+                if k is not None
+                    and k not in fields
+                    and not any(k == old_k for _, old_k in renames))
+        by = list(by.keys()) 
 
 
     # go ahead and clean up none values, these can have a few forms
     # go ahead and clean up none values, these can have a few forms
     results_ = []
     results_ = []
     for r in results:
     for r in results:
         results_.append({
         results_.append({
-            k: r[k] for k in fields
-            if r.get(k) is not None and not(
+            k: r[k] for k in it.chain(by, fields)
+            if r.get(k) is not None and not (
                 isinstance(r[k], str)
                 isinstance(r[k], str)
                 and re.match('^\s*[+-]?\s*$', r[k]))})
                 and re.match('^\s*[+-]?\s*$', r[k]))})
+    results = results_
 
 
     # find best type for all fields
     # find best type for all fields
-    def try_(x, type):
-        try:
-            type(x)
-            return True
-        except ValueError:
-            return False
-
     if types is None:
     if types is None:
+        def is_type(x, type):
+            try:
+                type(x)
+                return True
+            except ValueError:
+                return False
+
         types = {}
         types = {}
         for k in fields:
         for k in fields:
-            if merges is not None and merges.get(k):
-                for type in [IntField, FloatField, FracField]:
-                    if all(k not in r or try_(r[k], type) for r in results_):
-                        types[k] = type
-                        break
-                else:
-                    print("no type matches field %r?" % k)
-                    sys.exit(-1)
+            for type in TYPES:
+                if all(k not in r or is_type(r[k], type) for r in results_):
+                    types[k] = type
+                    break
+            else:
+                print("no type matches field %r?" % k)
+                sys.exit(-1)
 
 
     # homogenize types
     # homogenize types
-    for k in fields:
-        if k in types:
-            for r in results_:
-                if k in r:
-                    r[k] = types[k](r[k])
+    for r in results:
+        for k in fields:
+            if k in r:
+                r[k] = types[k](r[k])
 
 
-    return fields, types, results_
+    return by, fields, types, results
 
 
 
 
 def fold(results, *,
 def fold(results, *,
-        fields=None,
-        merges=None,
-        by=None,
+        by=[],
+        fields=[],
+        ops={},
         **_):
         **_):
     folding = co.OrderedDict()
     folding = co.OrderedDict()
-    if by is None:
-        by = [k for k in fields if k not in merges]
-
     for r in results:
     for r in results:
-        name = tuple(r.get(k) for k in by)
+        name = tuple(r.get(k, '') for k in by)
         if name not in folding:
         if name not in folding:
-            folding[name] = {k: [] for k in fields if k in merges}
+            folding[name] = {k: [] for k in fields}
         for k in fields:
         for k in fields:
-            # drop all fields fields without a type
-            if k in merges and k in r:
+            if k in r:
                 folding[name][k].append(r[k])
                 folding[name][k].append(r[k])
 
 
     # merge fields, we need the count at this point for averages
     # merge fields, we need the count at this point for averages
     folded = []
     folded = []
-    types = {}
     for name, r in folding.items():
     for name, r in folding.items():
         r_ = {}
         r_ = {}
         for k, vs in r.items():
         for k, vs in r.items():
             if vs:
             if vs:
-                _, merge = MERGES[merges[k]]
-                r_[k] = merge(vs)
+                # sum fields by default
+                op = OPS[ops.get(k, 'add')]
+                r_[k] = op(vs)
 
 
-        # drop all rows without any fields
-        # and drop all empty keys
+        # drop any rows without fields and any empty keys
         if r_:
         if r_:
             folded.append(dict(
             folded.append(dict(
-                {k: n for k, n in zip(by, name) if n},
+                {k: v for k, v in zip(by, name) if v},
                 **r_))
                 **r_))
 
 
-    fields_ = by + [k for k in fields if k in merges]
-    return fields_, folded
+    return folded
 
 
 
 
 def table(results, diff_results=None, *,
 def table(results, diff_results=None, *,
+        by=None,
         fields=None,
         fields=None,
         types=None,
         types=None,
-        merges=None,
-        by=None,
+        ops=None,
         sort=None,
         sort=None,
         reverse_sort=None,
         reverse_sort=None,
         summary=False,
         summary=False,
@@ -387,29 +400,18 @@ def table(results, diff_results=None, *,
         **_):
         **_):
     all_, all = all, __builtins__.all
     all_, all = all, __builtins__.all
 
 
-    # fold
-    if by is not None:
-        fields, results = fold(results, fields=fields, merges=merges, by=by)
-        if diff_results is not None:
-            _, diff_results = fold(diff_results,
-                fields=fields, merges=merges, by=by)
-
-    table = {
-        tuple(r.get(k,'') for k in fields if k not in merges): r
-        for r in results}
-    diff_table = {
-        tuple(r.get(k,'') for k in fields if k not in merges): r
-        for r in diff_results or []}
+    table = {tuple(r.get(k,'') for k in by): r for r in results}
+    diff_table = {tuple(r.get(k,'') for k in by): r for r in diff_results or []}
 
 
     # sort, note that python's sort is stable
     # sort, note that python's sort is stable
     names = list(table.keys() | diff_table.keys())
     names = list(table.keys() | diff_table.keys())
     names.sort()
     names.sort()
     if diff_results is not None:
     if diff_results is not None:
-        names.sort(key=lambda n: [
+        names.sort(key=lambda n: tuple(
             -types[k].ratio(
             -types[k].ratio(
                 table.get(n,{}).get(k),
                 table.get(n,{}).get(k),
                 diff_table.get(n,{}).get(k))
                 diff_table.get(n,{}).get(k))
-                for k in fields if k in merges])
+            for k in fields))
     if sort:
     if sort:
         names.sort(key=lambda n: tuple(
         names.sort(key=lambda n: tuple(
             (table[n][k],) if k in table.get(n,{}) else ()
             (table[n][k],) if k in table.get(n,{}) else ()
@@ -423,7 +425,7 @@ def table(results, diff_results=None, *,
 
 
     # print header
     # print header
     print('%-36s' % ('%s%s' % (
     print('%-36s' % ('%s%s' % (
-        ','.join(k for k in fields if k not in merges),
+        ','.join(k for k in by),
         ' (%d added, %d removed)' % (
         ' (%d added, %d removed)' % (
             sum(1 for n in table if n not in diff_table),
             sum(1 for n in table if n not in diff_table),
             sum(1 for n in diff_table if n not in table))
             sum(1 for n in diff_table if n not in table))
@@ -433,19 +435,19 @@ def table(results, diff_results=None, *,
     if diff_results is None:
     if diff_results is None:
         print(' %s' % (
         print(' %s' % (
             ' '.join(k.rjust(len(types[k].none))
             ' '.join(k.rjust(len(types[k].none))
-                for k in fields if k in merges)))
+                for k in fields)))
     elif percent:
     elif percent:
         print(' %s' % (
         print(' %s' % (
             ' '.join(k.rjust(len(types[k].diff_none))
             ' '.join(k.rjust(len(types[k].diff_none))
-                for k in fields if k in merges)))
+                for k in fields)))
     else:
     else:
         print(' %s %s %s' % (
         print(' %s %s %s' % (
             ' '.join(('o'+k).rjust(len(types[k].diff_none))
             ' '.join(('o'+k).rjust(len(types[k].diff_none))
-                for k in fields if k in merges),
+                for k in fields),
             ' '.join(('n'+k).rjust(len(types[k].diff_none))
             ' '.join(('n'+k).rjust(len(types[k].diff_none))
-                for k in fields if k in merges),
+                for k in fields),
             ' '.join(('d'+k).rjust(len(types[k].diff_none))
             ' '.join(('d'+k).rjust(len(types[k].diff_none))
-                for k in fields if k in merges)))
+                for k in fields)))
 
 
     # print entries
     # print entries
     if not summary:
     if not summary:
@@ -454,7 +456,7 @@ def table(results, diff_results=None, *,
             if diff_results is not None:
             if diff_results is not None:
                 diff_r = diff_table.get(name, {})
                 diff_r = diff_table.get(name, {})
                 ratios = [types[k].ratio(r.get(k), diff_r.get(k))
                 ratios = [types[k].ratio(r.get(k), diff_r.get(k))
-                    for k in fields if k in merges]
+                    for k in fields]
                 if not any(ratios) and not all_:
                 if not any(ratios) and not all_:
                     continue
                     continue
 
 
@@ -463,12 +465,12 @@ def table(results, diff_results=None, *,
                 print(' %s' % (
                 print(' %s' % (
                     ' '.join(r[k].table()
                     ' '.join(r[k].table()
                         if k in r else types[k].none
                         if k in r else types[k].none
-                        for k in fields if k in merges)))
+                        for k in fields)))
             elif percent:
             elif percent:
                 print(' %s%s' % (
                 print(' %s%s' % (
                     ' '.join(r[k].diff_table()
                     ' '.join(r[k].diff_table()
                         if k in r else types[k].diff_none
                         if k in r else types[k].diff_none
-                        for k in fields if k in merges),
+                        for k in fields),
                     ' (%s)' % ', '.join(
                     ' (%s)' % ', '.join(
                             '+∞%' if t == float('+inf')
                             '+∞%' if t == float('+inf')
                             else '-∞%' if t == float('-inf')
                             else '-∞%' if t == float('-inf')
@@ -478,13 +480,13 @@ def table(results, diff_results=None, *,
                 print(' %s %s %s%s' % (
                 print(' %s %s %s%s' % (
                     ' '.join(diff_r[k].diff_table()
                     ' '.join(diff_r[k].diff_table()
                         if k in diff_r else types[k].diff_none
                         if k in diff_r else types[k].diff_none
-                        for k in fields if k in merges),
+                        for k in fields),
                     ' '.join(r[k].diff_table()
                     ' '.join(r[k].diff_table()
                         if k in r else types[k].diff_none
                         if k in r else types[k].diff_none
-                        for k in fields if k in merges),
+                        for k in fields),
                     ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
                     ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
                         if k in r or k in diff_r else types[k].diff_none
                         if k in r or k in diff_r else types[k].diff_none
-                        for k in fields if k in merges),
+                        for k in fields),
                     ' (%s)' % ', '.join(
                     ' (%s)' % ', '.join(
                             '+∞%' if t == float('+inf')
                             '+∞%' if t == float('+inf')
                             else '-∞%' if t == float('-inf')
                             else '-∞%' if t == float('-inf')
@@ -494,26 +496,25 @@ def table(results, diff_results=None, *,
                         if any(ratios) else ''))
                         if any(ratios) else ''))
 
 
     # print total
     # print total
-    _, total = fold(results, fields=fields, merges=merges, by=[])
+    total = fold(results, by=[], fields=fields, ops=ops)
     r = total[0] if total else {}
     r = total[0] if total else {}
     if diff_results is not None:
     if diff_results is not None:
-        _, diff_total = fold(diff_results,
-            fields=fields, merges=merges, by=[])
+        diff_total = fold(diff_results, by=[], fields=fields, ops=ops)
         diff_r = diff_total[0] if diff_total else {}
         diff_r = diff_total[0] if diff_total else {}
         ratios = [types[k].ratio(r.get(k), diff_r.get(k))
         ratios = [types[k].ratio(r.get(k), diff_r.get(k))
-            for k in fields if k in merges]
+            for k in fields]
 
 
     print('%-36s' % 'TOTAL', end='')
     print('%-36s' % 'TOTAL', end='')
     if diff_results is None:
     if diff_results is None:
         print(' %s' % (
         print(' %s' % (
             ' '.join(r[k].table()
             ' '.join(r[k].table()
                 if k in r else types[k].none
                 if k in r else types[k].none
-                for k in fields if k in merges)))
+                for k in fields)))
     elif percent:
     elif percent:
         print(' %s%s' % (
         print(' %s%s' % (
             ' '.join(r[k].diff_table()
             ' '.join(r[k].diff_table()
                 if k in r else types[k].diff_none
                 if k in r else types[k].diff_none
-                for k in fields if k in merges),
+                for k in fields),
             ' (%s)' % ', '.join(
             ' (%s)' % ', '.join(
                     '+∞%' if t == float('+inf')
                     '+∞%' if t == float('+inf')
                     else '-∞%' if t == float('-inf')
                     else '-∞%' if t == float('-inf')
@@ -523,13 +524,13 @@ def table(results, diff_results=None, *,
         print(' %s %s %s%s' % (
         print(' %s %s %s%s' % (
             ' '.join(diff_r[k].diff_table()
             ' '.join(diff_r[k].diff_table()
                 if k in diff_r else types[k].diff_none
                 if k in diff_r else types[k].diff_none
-                for k in fields if k in merges),
+                for k in fields),
             ' '.join(r[k].diff_table()
             ' '.join(r[k].diff_table()
                 if k in r else types[k].diff_none
                 if k in r else types[k].diff_none
-                for k in fields if k in merges),
+                for k in fields),
             ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
             ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
                 if k in r or k in diff_r else types[k].diff_none
                 if k in r or k in diff_r else types[k].diff_none
-                for k in fields if k in merges),
+                for k in fields),
             ' (%s)' % ', '.join(
             ' (%s)' % ', '.join(
                     '+∞%' if t == float('+inf')
                     '+∞%' if t == float('+inf')
                     else '-∞%' if t == float('-inf')
                     else '-∞%' if t == float('-inf')
@@ -539,56 +540,35 @@ def table(results, diff_results=None, *,
                 if any(ratios) else ''))
                 if any(ratios) else ''))
 
 
 
 
-def main(csv_paths, *, fields=None, by=None, **args):
-    # figure out what fields to use
-    renames = {}
-
-    if fields is not None:
-        fields_ = []
-        for name in fields:
-            if '=' in name:
-                a, b = name.split('=', 1)
-                renames[b] = a
-                name = a
-            fields_.append(name)
-        fields = fields_
-
+def main(csv_paths, *,
+        by=None,
+        fields=None,
+        define=[],
+        **args):
+    # separate out renames
+    renames = [k.split('=', 1)
+        for k in it.chain(by or [], fields or [])
+        if '=' in k]
     if by is not None:
     if by is not None:
-        by_ = []
-        for name in by:
-            if '=' in name:
-                a, b = name.split('=', 1)
-                renames[b] = a
-                name = a
-            by_.append(name)
-        by = by_
-
-    # include 'by' fields in fields, it doesn't make sense to not
-    if fields is not None and by is not None:
-        fields[:0] = [k for k in by if k not in fields]
-
-    # use preconfigured merge operations unless any merge operation is
-    # explictly specified
-    merge_args = (args
-        if any(args.get(m) for m in MERGES.keys())
-        else {m: k for m, (k, _) in MERGES.items()})
-    merges = {}
-    for m in MERGES.keys():
-        for k in merge_args.get(m, []):
-            if k in merges:
-                print("conflicting merge type for field %r?" % k)
+        by = [k.split('=', 1)[0] for k in by]
+    if fields is not None:
+        fields = [k.split('=', 1)[0] for k in fields]
+
+    # figure out merge operations
+    ops = {}
+    for m in OPS.keys():
+        for k in args.get(m, []):
+            if k in ops:
+                print("conflicting op for field %r?" % k)
                 sys.exit(-1)
                 sys.exit(-1)
-            merges[k] = m
-    # allow renames to apply to merges
-    for m in MERGES.keys():
-        for k in merge_args.get(m, []):
-            if renames.get(k, k) not in merges:
-                merges[renames.get(k, k)] = m
-    # ignore merges that conflict with 'by' fields
-    if by is not None:
-        for k in by:
-            if k in merges:
-                del merges[k]
+            ops[k] = m
+    # rename ops?
+    if renames:
+        ops_ = {}
+        for new_k, old_k in renames:
+            if old_k in ops:
+                ops_[new_k] = ops[old_k]
+        ops.update(ops_)
 
 
     # find CSV files
     # find CSV files
     paths = []
     paths = []
@@ -614,17 +594,17 @@ def main(csv_paths, *, fields=None, by=None, **args):
             pass
             pass
 
 
     # homogenize
     # homogenize
-    fields, types, results = homogenize(results,
-        fields=fields, merges=merges, renames=renames)
+    by, fields, types, results = homogenize(results,
+        by=by, fields=fields, renames=renames, define=define)
 
 
     # fold to remove duplicates
     # fold to remove duplicates
-    fields, results = fold(results,
-        fields=fields, merges=merges)
+    results = fold(results,
+        by=by, fields=fields, ops=ops)
 
 
     # write results to CSV
     # write results to CSV
     if args.get('output'):
     if args.get('output'):
         with openio(args['output'], 'w') as f:
         with openio(args['output'], 'w') as f:
-            writer = csv.DictWriter(f, fields)
+            writer = csv.DictWriter(f, by + fields)
             writer.writeheader()
             writer.writeheader()
             for r in results:
             for r in results:
                 writer.writerow(r)
                 writer.writerow(r)
@@ -641,22 +621,22 @@ def main(csv_paths, *, fields=None, by=None, **args):
             pass
             pass
 
 
         # homogenize
         # homogenize
-        _, _, diff_results = homogenize(diff_results,
-            fields=fields, merges=merges, renames=renames, types=types)
+        _, _, _, diff_results = homogenize(diff_results,
+            by=by, fields=fields, renames=renames, define=define, types=types)
 
 
         # fold to remove duplicates
         # fold to remove duplicates
-        _, diff_results = fold(diff_results,
-            fields=fields, merges=merges)
+        diff_results = fold(diff_results,
+            by=by, fields=fields, ops=ops)
 
 
     # print table
     # print table
     if not args.get('quiet'):
     if not args.get('quiet'):
         table(
         table(
             results,
             results,
             diff_results if args.get('diff') else None,
             diff_results if args.get('diff') else None,
+            by=by,
             fields=fields,
             fields=fields,
+            ops=ops,
             types=types,
             types=types,
-            merges=merges,
-            by=by,
             **args)
             **args)
 
 
 
 
@@ -690,35 +670,40 @@ if __name__ == "__main__":
         action='store_true',
         action='store_true',
         help="Only show percentage change, not a full diff.")
         help="Only show percentage change, not a full diff.")
     parser.add_argument(
     parser.add_argument(
-        '-f', '--fields',
+        '-b', '--by',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Only show these fields. Can rename fields "
-            "with new_name=old_name.")
+        help="Group by these fields. All other fields will be merged as "
+            "needed. Can rename fields with new_name=old_name.")
     parser.add_argument(
     parser.add_argument(
-        '-b', '--by',
+        '-f', '--fields',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Group by these fields. Can rename fields "
-            "with new_name=old_name.")
+        help="Use these fields. Can rename fields with new_name=old_name.")
+    parser.add_argument(
+        '-D', '--define',
+        type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
+        action='append',
+        help="Only include rows where this field is this value. May include "
+            "comma-separated options.")
     parser.add_argument(
     parser.add_argument(
         '--add',
         '--add',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Add these fields when merging.")
+        help="Add these fields (the default).")
     parser.add_argument(
     parser.add_argument(
         '--mul',
         '--mul',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Multiply these fields when merging.")
+        help="Multiply these fields.")
     parser.add_argument(
     parser.add_argument(
         '--min',
         '--min',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Take the minimum of these fields when merging.")
+        help="Take the minimum of these fields.")
     parser.add_argument(
     parser.add_argument(
         '--max',
         '--max',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Take the maximum of these fields when merging.")
+        help="Take the maximum of these fields.")
     parser.add_argument(
     parser.add_argument(
         '--avg',
         '--avg',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],
-        help="Average these fields when merging.")
+        help="Average these fields.")
     parser.add_argument(
     parser.add_argument(
         '-s', '--sort',
         '-s', '--sort',
         type=lambda x: [x.strip() for x in x.split(',')],
         type=lambda x: [x.strip() for x in x.split(',')],