ソースを参照

Made summary.py more powerful, dropped -m from size scripts

With more scripts generating CSV files this moves most CSV manipulation
into summary.py, which can now handle more or less any arbitrary CSV
file with arbitrary names and fields.

This also includes a bunch of additional, probably unnecessary, tweaks:

- summary.py/coverage.py use a custom fractional type for encoding
  fractions, this will also be used for test counts.

- Added a smaller diff output for size scripts with the --percent flag.

- Added line and hit info to coverage.py's CSV files.

- Added --tree flag to stack.py to show only the call tree without
  other noise.

- Renamed structs.py to struct.py.

- Changed a few flags around for consistency between size/summary scripts.

- Added `make sizes` alias.

- Added `make lfs.code.csv` rules
Christopher Haster 3 年 前
コミット
acdea1880e
8 ファイル変更3075 行追加1727 行削除
  1. 42 16
      Makefile
  2. 373 194
      scripts/code.py
  3. 612 343
      scripts/coverage.py
  4. 373 194
      scripts/data.py
  5. 489 271
      scripts/stack.py
  6. 522 0
      scripts/struct.py
  7. 0 348
      scripts/structs.py
  8. 664 361
      scripts/summary.py

+ 42 - 16
Makefile

@@ -67,7 +67,7 @@ override TESTFLAGS     += -v
 override CODEFLAGS     += -v
 override DATAFLAGS     += -v
 override STACKFLAGS    += -v
-override STRUCTSFLAGS  += -v
+override STRUCTFLAGS   += -v
 override COVERAGEFLAGS += -v
 override TESTFLAGS     += -v
 override TESTCFLAGS    += -v
@@ -76,11 +76,10 @@ ifdef EXEC
 override TESTFLAGS 	   += --exec="$(EXEC)"
 endif
 ifdef BUILDDIR
-override TESTFLAGS     += --build-dir="$(BUILDDIR:/=)"
 override CODEFLAGS     += --build-dir="$(BUILDDIR:/=)"
 override DATAFLAGS     += --build-dir="$(BUILDDIR:/=)"
 override STACKFLAGS    += --build-dir="$(BUILDDIR:/=)"
-override STRUCTSFLAGS  += --build-dir="$(BUILDDIR:/=)"
+override STRUCTFLAGS   += --build-dir="$(BUILDDIR:/=)"
 override COVERAGEFLAGS += --build-dir="$(BUILDDIR:/=)"
 endif
 ifneq ($(NM),nm)
@@ -88,7 +87,7 @@ override CODEFLAGS += --nm-tool="$(NM)"
 override DATAFLAGS += --nm-tool="$(NM)"
 endif
 ifneq ($(OBJDUMP),objdump)
-override STRUCTSFLAGS += --objdump-tool="$(OBJDUMP)"
+override STRUCTFLAGS += --objdump-tool="$(OBJDUMP)"
 endif
 
 
@@ -132,17 +131,22 @@ data: $(OBJ)
 stack: $(CI)
 	./scripts/stack.py $^ -S $(STACKFLAGS)
 
-.PHONY: structs
-structs: $(OBJ)
-	./scripts/structs.py $^ -S $(STRUCTSFLAGS)
+.PHONY: struct
+struct: $(OBJ)
+	./scripts/struct.py $^ -S $(STRUCTFLAGS)
 
 .PHONY: coverage
 coverage: $(GCDA)
 	./scripts/coverage.py $^ -s $(COVERAGEFLAGS)
 
-.PHONY: summary
-summary: $(BUILDDIR)lfs.csv
-	./scripts/summary.py -Y $^ $(SUMMARYFLAGS)
+.PHONY: summary sizes
+summary sizes: $(BUILDDIR)lfs.csv
+	$(strip ./scripts/summary.py -Y $^ \
+		-f code=code_size,$\
+			data=data_size,$\
+			stack=stack_limit,$\
+			struct=struct_size \
+		$(SUMMARYFLAGS))
 
 
 # rules
@@ -157,11 +161,27 @@ $(BUILDDIR)lfs: $(OBJ)
 $(BUILDDIR)lfs.a: $(OBJ)
 	$(AR) rcs $@ $^
 
-$(BUILDDIR)lfs.csv: $(OBJ) $(CI)
-	./scripts/code.py $(OBJ) -q $(CODEFLAGS) -o $@
-	./scripts/data.py $(OBJ) -q -m $@ $(DATAFLAGS) -o $@
-	./scripts/stack.py $(CI) -q -m $@ $(STACKFLAGS) -o $@
-	./scripts/structs.py $(OBJ) -q -m $@ $(STRUCTSFLAGS) -o $@
+$(BUILDDIR)lfs.code.csv: $(OBJ)
+	./scripts/code.py $^ -q $(CODEFLAGS) -o $@
+
+$(BUILDDIR)lfs.data.csv: $(OBJ)
+	./scripts/data.py $^ -q $(CODEFLAGS) -o $@
+
+$(BUILDDIR)lfs.stack.csv: $(CI)
+	./scripts/stack.py $^ -q $(CODEFLAGS) -o $@
+
+$(BUILDDIR)lfs.struct.csv: $(OBJ)
+	./scripts/struct.py $^ -q $(CODEFLAGS) -o $@
+
+$(BUILDDIR)lfs.coverage.csv: $(GCDA)
+	./scripts/coverage.py $^ -q $(COVERAGEFLAGS) -o $@
+
+$(BUILDDIR)lfs.csv: \
+		$(BUILDDIR)lfs.code.csv \
+		$(BUILDDIR)lfs.data.csv \
+		$(BUILDDIR)lfs.stack.csv \
+		$(BUILDDIR)lfs.struct.csv
+	./scripts/summary.py $^ -q $(SUMMARYFLAGS) -o $@
 
 $(BUILDDIR)runners/test_runner: $(TEST_OBJ)
 	$(CC) $(CFLAGS) $^ $(LFLAGS) -o $@
@@ -191,7 +211,13 @@ $(BUILDDIR)%.t.c: %.c $(TESTS)
 clean:
 	rm -f $(BUILDDIR)lfs
 	rm -f $(BUILDDIR)lfs.a
-	rm -f $(BUILDDIR)lfs.csv
+	$(strip rm -f \
+		$(BUILDDIR)lfs.csv \
+		$(BUILDDIR)lfs.code.csv \
+		$(BUILDDIR)lfs.data.csv \
+		$(BUILDDIR)lfs.stack.csv \
+		$(BUILDDIR)lfs.struct.csv \
+		$(BUILDDIR)lfs.coverage.csv)
 	rm -f $(BUILDDIR)runners/test_runner
 	rm -f $(OBJ)
 	rm -f $(DEP)

+ 373 - 194
scripts/code.py

@@ -5,71 +5,123 @@
 # by Linux's Bloat-O-Meter.
 #
 
-import os
+import collections as co
+import csv
 import glob
 import itertools as it
-import subprocess as sp
-import shlex
+import math as m
+import os
 import re
-import csv
-import collections as co
+import shlex
+import subprocess as sp
 
 
 OBJ_PATHS = ['*.o']
+NM_TOOL = ['nm']
+TYPE = 'tTrRdD'
+
 
-class CodeResult(co.namedtuple('CodeResult', 'code_size')):
+# integer fields
+class IntField(co.namedtuple('IntField', 'x')):
     __slots__ = ()
-    def __new__(cls, code_size=0):
-        return super().__new__(cls, int(code_size))
+    def __new__(cls, x):
+        if isinstance(x, IntField):
+            return x
+        if isinstance(x, str):
+            try:
+                x = int(x, 0)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = float('inf')
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = float('-inf')
+                else:
+                    raise
+        return super().__new__(cls, x)
+
+    def __int__(self):
+        assert not m.isinf(self.x)
+        return self.x
+
+    def __float__(self):
+        return float(self.x)
 
-    def __add__(self, other):
-        return self.__class__(self.code_size + other.code_size)
+    def __str__(self):
+        if self.x == float('inf'):
+            return '∞'
+        elif self.x == float('-inf'):
+            return '-∞'
+        else:
+            return str(self.x)
 
-    def __sub__(self, other):
-        return CodeDiff(other, self)
+    none = '%7s' % '-'
+    def table(self):
+        return '%7s' % (self,)
 
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
+    diff_none = '%7s' % '-'
+    diff_table = table
 
-    def key(self, **args):
-        if args.get('size_sort'):
-            return -self.code_size
-        elif args.get('reverse_size_sort'):
-            return +self.code_size
+    def diff_diff(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        diff = new - old
+        if diff == float('+inf'):
+            return '%7s' % '+∞'
+        elif diff == float('-inf'):
+            return '%7s' % '-∞'
         else:
-            return None
+            return '%+7d' % diff
+
+    def ratio(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        if m.isinf(new) and m.isinf(old):
+            return 0.0
+        elif m.isinf(new):
+            return float('+inf')
+        elif m.isinf(old):
+            return float('-inf')
+        elif not old and not new:
+            return 0.0
+        elif not old:
+            return 1.0
+        else:
+            return (new-old) / old
 
-    _header = '%7s' % 'size'
-    def __str__(self):
-        return '%7d' % self.code_size
+    def __add__(self, other):
+        return IntField(self.x + other.x)
 
-class CodeDiff(co.namedtuple('CodeDiff',  'old,new')):
-    __slots__ = ()
+    def __mul__(self, other):
+        return IntField(self.x * other.x)
 
-    def ratio(self):
-        old = self.old.code_size if self.old is not None else 0
-        new = self.new.code_size if self.new is not None else 0
-        return (new-old) / old if old else 1.0
+    def __lt__(self, other):
+        return self.x < other.x
 
-    def key(self, **args):
-        return (
-            self.new.key(**args) if self.new is not None else 0,
-            -self.ratio())
+    def __gt__(self, other):
+        return self.__class__.__lt__(other, self)
 
-    def __bool__(self):
-        return bool(self.ratio())
+    def __le__(self, other):
+        return not self.__gt__(other)
 
-    _header = '%7s %7s %7s' % ('old', 'new', 'diff')
-    def __str__(self):
-        old = self.old.code_size if self.old is not None else 0
-        new = self.new.code_size if self.new is not None else 0
-        diff = new - old
-        ratio = self.ratio()
-        return '%7s %7s %+7d%s' % (
-            old or "-",
-            new or "-",
-            diff,
-            ' (%+.1f%%)' % (100*ratio) if ratio else '')
+    def __ge__(self, other):
+        return not self.__lt__(other)
+
+    def __truediv__(self, n):
+        if m.isinf(self.x):
+            return self
+        else:
+            return IntField(round(self.x / n))
+
+# code size results
+class CodeResult(co.namedtuple('CodeResult', 'file,function,code_size')):
+    __slots__ = ()
+    def __new__(cls, file, function, code_size):
+        return super().__new__(cls, file, function, IntField(code_size))
+
+    def __add__(self, other):
+        return CodeResult(self.file, self.function,
+            self.code_size + other.code_size)
 
 
 def openio(path, mode='r'):
@@ -81,20 +133,25 @@ def openio(path, mode='r'):
     else:
         return open(path, mode)
 
-def collect(paths, **args):
-    results = co.defaultdict(lambda: CodeResult())
+def collect(paths, *,
+        nm_tool=NM_TOOL,
+        type=TYPE,
+        build_dir=None,
+        everything=False,
+        **args):
+    results = []
     pattern = re.compile(
         '^(?P<size>[0-9a-fA-F]+)' +
-        ' (?P<type>[%s])' % re.escape(args['type']) +
+        ' (?P<type>[%s])' % re.escape(type) +
         ' (?P<func>.+?)$')
     for path in paths:
         # map to source file
         src_path = re.sub('\.o$', '.c', path)
-        if args.get('build_dir'):
-            src_path = re.sub('%s/*' % re.escape(args['build_dir']), '',
+        if build_dir:
+            src_path = re.sub('%s/*' % re.escape(build_dir), '',
                 src_path)
         # note nm-tool may contain extra args
-        cmd = args['nm_tool'] + ['--size-sort', path]
+        cmd = nm_tool + ['--size-sort', path]
         if args.get('verbose'):
             print(' '.join(shlex.quote(c) for c in cmd))
         proc = sp.Popen(cmd,
@@ -107,12 +164,15 @@ def collect(paths, **args):
             if m:
                 func = m.group('func')
                 # discard internal functions
-                if not args.get('everything') and func.startswith('__'):
+                if not everything and func.startswith('__'):
                     continue
                 # discard .8449 suffixes created by optimizer
                 func = re.sub('\.[0-9]+', '', func)
-                results[(src_path, func)] += CodeResult(
-                    int(m.group('size'), 16))
+
+                results.append(CodeResult(
+                    src_path, func,
+                    int(m.group('size'), 16)))
+
         proc.wait()
         if proc.returncode != 0:
             if not args.get('verbose'):
@@ -122,12 +182,167 @@ def collect(paths, **args):
 
     return results
 
-def main(**args):
+
+def fold(results, *,
+        by=['file', 'function'],
+        **_):
+    folding = co.OrderedDict()
+    for r in results:
+        name = tuple(getattr(r, k) for k in by)
+        if name not in folding:
+            folding[name] = []
+        folding[name].append(r)
+
+    folded = []
+    for rs in folding.values():
+        folded.append(sum(rs[1:], start=rs[0]))
+
+    return folded
+
+
+def table(results, diff_results=None, *,
+        by_file=False,
+        size_sort=False,
+        reverse_size_sort=False,
+        summary=False,
+        all=False,
+        percent=False,
+        **_):
+    all_, all = all, __builtins__.all
+
+    # fold
+    results = fold(results, by=['file' if by_file else 'function'])
+    if diff_results is not None:
+        diff_results = fold(diff_results,
+            by=['file' if by_file else 'function'])
+
+    table = {
+        r.file if by_file else r.function: r
+        for r in results}
+    diff_table = {
+        r.file if by_file else r.function: r
+        for r in diff_results or []}
+
+    # sort, note that python's sort is stable
+    names = list(table.keys() | diff_table.keys())
+    names.sort()
+    if diff_results is not None:
+        names.sort(key=lambda n: -IntField.ratio(
+            table[n].code_size if n in table else None,
+            diff_table[n].code_size if n in diff_table else None))
+    if size_sort:
+        names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
+            reverse=True)
+    elif reverse_size_sort:
+        names.sort(key=lambda n: (table[n].code_size,) if n in table else (),
+            reverse=False)
+
+    # print header
+    print('%-36s' % ('%s%s' % (
+        'file' if by_file else 'function',
+        ' (%d added, %d removed)' % (
+            sum(1 for n in table if n not in diff_table),
+            sum(1 for n in diff_table if n not in table))
+            if diff_results is not None and not percent else '')
+        if not summary else ''),
+        end='')
+    if diff_results is None:
+        print(' %s' % ('size'.rjust(len(IntField.none))))
+    elif percent:
+        print(' %s' % ('size'.rjust(len(IntField.diff_none))))
+    else:
+        print(' %s %s %s' % (
+            'old'.rjust(len(IntField.diff_none)),
+            'new'.rjust(len(IntField.diff_none)),
+            'diff'.rjust(len(IntField.diff_none))))
+
+    # print entries
+    if not summary:
+        for name in names:
+            r = table.get(name)
+            if diff_results is not None:
+                diff_r = diff_table.get(name)
+                ratio = IntField.ratio(
+                    r.code_size if r else None,
+                    diff_r.code_size if diff_r else None)
+                if not ratio and not all_:
+                    continue
+
+            print('%-36s' % name, end='')
+            if diff_results is None:
+                print(' %s' % (
+                    r.code_size.table()
+                        if r else IntField.none))
+            elif percent:
+                print(' %s%s' % (
+                    r.code_size.diff_table()
+                        if r else IntField.diff_none,
+                    ' (%s)' % (
+                        '+∞%' if ratio == float('+inf')
+                        else '-∞%' if ratio == float('-inf')
+                        else '%+.1f%%' % (100*ratio))))
+            else:
+                print(' %s %s %s%s' % (
+                    diff_r.code_size.diff_table()
+                        if diff_r else IntField.diff_none,
+                    r.code_size.diff_table()
+                        if r else IntField.diff_none,
+                    IntField.diff_diff(
+                        r.code_size if r else None,
+                        diff_r.code_size if diff_r else None)
+                        if r or diff_r else IntField.diff_none,
+                    ' (%s)' % (
+                        '+∞%' if ratio == float('+inf')
+                        else '-∞%' if ratio == float('-inf')
+                        else '%+.1f%%' % (100*ratio))
+                        if ratio else ''))
+
+    # print total
+    total = fold(results, by=[])
+    r = total[0] if total else None
+    if diff_results is not None:
+        diff_total = fold(diff_results, by=[])
+        diff_r = diff_total[0] if diff_total else None
+        ratio = IntField.ratio(
+            r.code_size if r else None,
+            diff_r.code_size if diff_r else None)
+
+    print('%-36s' % 'TOTAL', end='')
+    if diff_results is None:
+        print(' %s' % (
+            r.code_size.table()
+                if r else IntField.none))
+    elif percent:
+        print(' %s%s' % (
+            r.code_size.diff_table()
+                if r else IntField.diff_none,
+            ' (%s)' % (
+                '+∞%' if ratio == float('+inf')
+                else '-∞%' if ratio == float('-inf')
+                else '%+.1f%%' % (100*ratio))))
+    else:
+        print(' %s %s %s%s' % (
+            diff_r.code_size.diff_table()
+                if diff_r else IntField.diff_none,
+            r.code_size.diff_table()
+                if r else IntField.diff_none,
+            IntField.diff_diff(
+                r.code_size if r else None,
+                diff_r.code_size if diff_r else None)
+                if r or diff_r else IntField.diff_none,
+            ' (%s)' % (
+                '+∞%' if ratio == float('+inf')
+                else '-∞%' if ratio == float('-inf')
+                else '%+.1f%%' % (100*ratio))
+                if ratio else ''))
+
+
+def main(obj_paths, **args):
     # find sizes
     if not args.get('use', None):
         # find .o files
         paths = []
-        for path in args['obj_paths']:
+        for path in obj_paths:
             if os.path.isdir(path):
                 path = path + '/*.o'
 
@@ -135,127 +350,61 @@ def main(**args):
                 paths.append(path)
 
         if not paths:
-            print('no .obj files found in %r?' % args['obj_paths'])
+            print('no .obj files found in %r?' % obj_paths)
             sys.exit(-1)
 
         results = collect(paths, **args)
     else:
+        results = []
         with openio(args['use']) as f:
-            r = csv.DictReader(f)
-            results = {
-                (result['file'], result['name']): CodeResult(
-                    *(result[f] for f in CodeResult._fields))
-                for result in r
-                if all(result.get(f) not in {None, ''}
-                    for f in CodeResult._fields)}
+            reader = csv.DictReader(f)
+            for r in reader:
+                try:
+                    results.append(CodeResult(**{
+                        k: v for k, v in r.items()
+                        if k in CodeResult._fields}))
+                except TypeError:
+                    pass
 
-    # find previous results?
-    if args.get('diff'):
-        try:
-            with openio(args['diff']) as f:
-                r = csv.DictReader(f)
-                prev_results = {
-                    (result['file'], result['name']): CodeResult(
-                        *(result[f] for f in CodeResult._fields))
-                    for result in r
-                    if all(result.get(f) not in {None, ''}
-                        for f in CodeResult._fields)}
-        except FileNotFoundError:
-            prev_results = []
+    # fold to remove duplicates
+    results = fold(results)
+
+    # sort because why not
+    results.sort()
 
     # write results to CSV
     if args.get('output'):
-        merged_results = co.defaultdict(lambda: {})
-        other_fields = []
-
-        # merge?
-        if args.get('merge'):
-            try:
-                with openio(args['merge']) as f:
-                    r = csv.DictReader(f)
-                    for result in r:
-                        file = result.pop('file', '')
-                        func = result.pop('name', '')
-                        for f in CodeResult._fields:
-                            result.pop(f, None)
-                        merged_results[(file, func)] = result
-                        other_fields = result.keys()
-            except FileNotFoundError:
-                pass
-
-        for (file, func), result in results.items():
-            merged_results[(file, func)] |= result._asdict()
-
         with openio(args['output'], 'w') as f:
-            w = csv.DictWriter(f, ['file', 'name',
-                *other_fields, *CodeResult._fields])
-            w.writeheader()
-            for (file, func), result in sorted(merged_results.items()):
-                w.writerow({'file': file, 'name': func, **result})
-
-    # print results
-    def print_header(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
+            writer = csv.DictWriter(f, CodeResult._fields)
+            writer.writeheader()
+            for r in results:
+                writer.writerow(r._asdict())
 
-        if not args.get('diff'):
-            print('%-36s %s' % (by, CodeResult._header))
-        else:
-            old = {entry(k) for k in results.keys()}
-            new = {entry(k) for k in prev_results.keys()}
-            print('%-36s %s' % (
-                '%s (%d added, %d removed)' % (by,
-                        sum(1 for k in new if k not in old),
-                        sum(1 for k in old if k not in new))
-                    if by else '',
-                CodeDiff._header))
-
-    def print_entries(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
+    # find previous results?
+    if args.get('diff'):
+        diff_results = []
+        try:
+            with openio(args['diff']) as f:
+                reader = csv.DictReader(f)
+                for r in reader:
+                    try:
+                        diff_results.append(CodeResult(**{
+                            k: v for k, v in r.items()
+                            if k in CodeResult._fields}))
+                    except TypeError:
+                        pass
+        except FileNotFoundError:
+            pass
 
-        entries = co.defaultdict(lambda: CodeResult())
-        for k, result in results.items():
-            entries[entry(k)] += result
+        # fold to remove duplicates
+        diff_results = fold(diff_results)
 
-        if not args.get('diff'):
-            for name, result in sorted(entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                print('%-36s %s' % (name, result))
-        else:
-            prev_entries = co.defaultdict(lambda: CodeResult())
-            for k, result in prev_results.items():
-                prev_entries[entry(k)] += result
-
-            diff_entries = {name: entries.get(name) - prev_entries.get(name)
-                for name in (entries.keys() | prev_entries.keys())}
-
-            for name, diff in sorted(diff_entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                if diff or args.get('all'):
-                    print('%-36s %s' % (name, diff))
-
-    if args.get('quiet'):
-        pass
-    elif args.get('summary'):
-        print_header('')
-        print_entries('total')
-    elif args.get('files'):
-        print_header('file')
-        print_entries('file')
-        print_entries('total')
-    else:
-        print_header('function')
-        print_entries('function')
-        print_entries('total')
+    # print table
+    if not args.get('quiet'):
+        table(
+            results,
+            diff_results if args.get('diff') else None,
+            **args)
 
 
 if __name__ == "__main__":
@@ -263,42 +412,72 @@ if __name__ == "__main__":
     import sys
     parser = argparse.ArgumentParser(
         description="Find code size at the function level.")
-    parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory \
-            or a list of paths. Defaults to %r." % OBJ_PATHS)
-    parser.add_argument('-v', '--verbose', action='store_true',
+    parser.add_argument(
+        'obj_paths',
+        nargs='*',
+        default=OBJ_PATHS,
+        help="Description of where to find *.o files. May be a directory "
+            "or a list of paths. Defaults to %(default)r.")
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
         help="Output commands that run behind the scenes.")
-    parser.add_argument('-q', '--quiet', action='store_true',
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
         help="Don't show anything, useful with -o.")
-    parser.add_argument('-o', '--output',
+    parser.add_argument(
+        '-o', '--output',
         help="Specify CSV file to store results.")
-    parser.add_argument('-u', '--use',
-        help="Don't compile and find code sizes, instead use this CSV file.")
-    parser.add_argument('-d', '--diff',
-        help="Specify CSV file to diff code size against.")
-    parser.add_argument('-m', '--merge',
-        help="Merge with an existing CSV file when writing to output.")
-    parser.add_argument('-a', '--all', action='store_true',
-        help="Show all functions, not just the ones that changed.")
-    parser.add_argument('-A', '--everything', action='store_true',
-        help="Include builtin and libc specific symbols.")
-    parser.add_argument('-s', '--size-sort', action='store_true',
+    parser.add_argument(
+        '-u', '--use',
+        help="Don't parse anything, use this CSV file.")
+    parser.add_argument(
+        '-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument(
+        '-a', '--all',
+        action='store_true',
+        help="Show all, not just the ones that changed.")
+    parser.add_argument(
+        '-p', '--percent',
+        action='store_true',
+        help="Only show percentage change, not a full diff.")
+    parser.add_argument(
+        '-b', '--by-file',
+        action='store_true',
+        help="Group by file. Note this does not include padding "
+            "so sizes may differ from other tools.")
+    parser.add_argument(
+        '-s', '--size-sort',
+        action='store_true',
         help="Sort by size.")
-    parser.add_argument('-S', '--reverse-size-sort', action='store_true',
+    parser.add_argument(
+        '-S', '--reverse-size-sort',
+        action='store_true',
         help="Sort by size, but backwards.")
-    parser.add_argument('-F', '--files', action='store_true',
-        help="Show file-level code sizes. Note this does not include padding! "
-            "So sizes may differ from other tools.")
-    parser.add_argument('-Y', '--summary', action='store_true',
-        help="Only show the total code size.")
-    parser.add_argument('--type', default='tTrRdD',
+    parser.add_argument(
+        '-Y', '--summary',
+        action='store_true',
+        help="Only show the total size.")
+    parser.add_argument(
+        '-A', '--everything',
+        action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument(
+        '--type',
+        default=TYPE,
         help="Type of symbols to report, this uses the same single-character "
             "type-names emitted by nm. Defaults to %(default)r.")
-    parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
-        help="Path to the nm tool to use.")
-    parser.add_argument('--build-dir',
-        help="Specify the relative build directory. Used to map object files \
-            to the correct source files.")
+    parser.add_argument(
+        '--nm-tool',
+        type=lambda x: x.split(),
+        default=NM_TOOL,
+        help="Path to the nm tool to use. Defaults to %(default)r")
+    parser.add_argument(
+        '--build-dir',
+        help="Specify the relative build directory. Used to map object files "
+            "to the correct source files.")
     sys.exit(main(**{k: v
         for k, v in vars(parser.parse_args()).items()
         if v is not None}))

+ 612 - 343
scripts/coverage.py

@@ -10,6 +10,7 @@ import csv
 import glob
 import itertools as it
 import json
+import math as m
 import os
 import re
 import shlex
@@ -20,139 +21,189 @@ import subprocess as sp
 
 
 GCDA_PATHS = ['*.gcda']
+GCOV_TOOL = ['gcov']
 
-class CoverageResult(co.namedtuple('CoverageResult',
-        'coverage_line_hits,coverage_line_count,'
-        'coverage_branch_hits,coverage_branch_count')):
+
+# integer fields
+class IntField(co.namedtuple('IntField', 'x')):
     __slots__ = ()
-    def __new__(cls,
-            coverage_line_hits=0, coverage_line_count=0,
-            coverage_branch_hits=0, coverage_branch_count=0):
-        return super().__new__(cls,
-            int(coverage_line_hits),
-            int(coverage_line_count),
-            int(coverage_branch_hits),
-            int(coverage_branch_count))
+    def __new__(cls, x):
+        if isinstance(x, IntField):
+            return x
+        if isinstance(x, str):
+            try:
+                x = int(x, 0)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = float('inf')
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = float('-inf')
+                else:
+                    raise
+        return super().__new__(cls, x)
+
+    def __int__(self):
+        assert not m.isinf(self.x)
+        return self.x
+
+    def __float__(self):
+        return float(self.x)
+
+    def __str__(self):
+        if self.x == float('inf'):
+            return '∞'
+        elif self.x == float('-inf'):
+            return '-∞'
+        else:
+            return str(self.x)
+
+    none = '%7s' % '-'
+    def table(self):
+        return '%7s' % (self,)
+
+    diff_none = '%7s' % '-'
+    diff_table = table
+
+    def diff_diff(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        diff = new - old
+        if diff == float('+inf'):
+            return '%7s' % '+∞'
+        elif diff == float('-inf'):
+            return '%7s' % '-∞'
+        else:
+            return '%+7d' % diff
+
+    def ratio(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        if m.isinf(new) and m.isinf(old):
+            return 0.0
+        elif m.isinf(new):
+            return float('+inf')
+        elif m.isinf(old):
+            return float('-inf')
+        elif not old and not new:
+            return 0.0
+        elif not old:
+            return 1.0
+        else:
+            return (new-old) / old
 
     def __add__(self, other):
-        return self.__class__(
-            self.coverage_line_hits + other.coverage_line_hits,
-            self.coverage_line_count + other.coverage_line_count,
-            self.coverage_branch_hits + other.coverage_branch_hits,
-            self.coverage_branch_count + other.coverage_branch_count)
-
-    def __sub__(self, other):
-        return CoverageDiff(other, self)
-
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
-
-    def key(self, **args):
-        ratio_line = (self.coverage_line_hits/self.coverage_line_count
-            if self.coverage_line_count else -1)
-        ratio_branch = (self.coverage_branch_hits/self.coverage_branch_count
-            if self.coverage_branch_count else -1)
-
-        if args.get('line_sort'):
-            return (-ratio_line, -ratio_branch)
-        elif args.get('reverse_line_sort'):
-            return (+ratio_line, +ratio_branch)
-        elif args.get('branch_sort'):
-            return (-ratio_branch, -ratio_line)
-        elif args.get('reverse_branch_sort'):
-            return (+ratio_branch, +ratio_line)
+        return IntField(self.x + other.x)
+
+    def __mul__(self, other):
+        return IntField(self.x * other.x)
+
+    def __lt__(self, other):
+        return self.x < other.x
+
+    def __gt__(self, other):
+        return self.__class__.__lt__(other, self)
+
+    def __le__(self, other):
+        return not self.__gt__(other)
+
+    def __ge__(self, other):
+        return not self.__lt__(other)
+
+    def __truediv__(self, n):
+        if m.isinf(self.x):
+            return self
         else:
-            return None
+            return IntField(round(self.x / n))
 
-    _header = '%19s %19s' % ('hits/line', 'hits/branch')
-    def __str__(self):
-        line_hits = self.coverage_line_hits
-        line_count = self.coverage_line_count
-        branch_hits = self.coverage_branch_hits
-        branch_count = self.coverage_branch_count
-        return '%11s %7s %11s %7s' % (
-            '%d/%d' % (line_hits, line_count)
-                if line_count else '-',
-            '%.1f%%' % (100*line_hits/line_count)
-                if line_count else '-',
-            '%d/%d' % (branch_hits, branch_count)
-                if branch_count else '-',
-            '%.1f%%' % (100*branch_hits/branch_count)
-                if branch_count else '-')
-
-class CoverageDiff(co.namedtuple('CoverageDiff', 'old,new')):
+# fractional fields, a/b
+class FracField(co.namedtuple('FracField', 'a,b')):
     __slots__ = ()
+    def __new__(cls, a, b=None):
+        if isinstance(a, FracField) and b is None:
+            return a
+        if isinstance(a, str) and b is None:
+            a, b = a.split('/', 1)
+        if b is None:
+            b = a
+        return super().__new__(cls, IntField(a), IntField(b))
 
-    def ratio_line(self):
-        old_line_hits = (self.old.coverage_line_hits
-            if self.old is not None else 0)
-        old_line_count = (self.old.coverage_line_count
-            if self.old is not None else 0)
-        new_line_hits = (self.new.coverage_line_hits
-            if self.new is not None else 0)
-        new_line_count = (self.new.coverage_line_count
-            if self.new is not None else 0)
-        return ((new_line_hits/new_line_count if new_line_count else 1.0)
-            - (old_line_hits/old_line_count if old_line_count else 1.0))
-
-    def ratio_branch(self):
-        old_branch_hits = (self.old.coverage_branch_hits
-            if self.old is not None else 0)
-        old_branch_count = (self.old.coverage_branch_count
-            if self.old is not None else 0)
-        new_branch_hits = (self.new.coverage_branch_hits
-            if self.new is not None else 0)
-        new_branch_count = (self.new.coverage_branch_count
-            if self.new is not None else 0)
-        return ((new_branch_hits/new_branch_count if new_branch_count else 1.0)
-            - (old_branch_hits/old_branch_count if old_branch_count else 1.0))
-
-    def key(self, **args):
-        return (
-            self.new.key(**args) if self.new is not None else 0,
-            -self.ratio_line(),
-            -self.ratio_branch())
-
-    def __bool__(self):
-        return bool(self.ratio_line() or self.ratio_branch())
-
-    _header = '%23s %23s %23s' % ('old', 'new', 'diff')
     def __str__(self):
-        old_line_hits = (self.old.coverage_line_hits
-            if self.old is not None else 0)
-        old_line_count = (self.old.coverage_line_count
-            if self.old is not None else 0)
-        old_branch_hits = (self.old.coverage_branch_hits
-            if self.old is not None else 0)
-        old_branch_count = (self.old.coverage_branch_count
-            if self.old is not None else 0)
-        new_line_hits = (self.new.coverage_line_hits
-            if self.new is not None else 0)
-        new_line_count = (self.new.coverage_line_count
-            if self.new is not None else 0)
-        new_branch_hits = (self.new.coverage_branch_hits
-            if self.new is not None else 0)
-        new_branch_count = (self.new.coverage_branch_count
-            if self.new is not None else 0)
-        diff_line_hits = new_line_hits - old_line_hits
-        diff_line_count = new_line_count - old_line_count
-        diff_branch_hits = new_branch_hits - old_branch_hits
-        diff_branch_count = new_branch_count - old_branch_count
-        ratio_line = self.ratio_line()
-        ratio_branch = self.ratio_branch()
-        return '%11s %11s %11s %11s %11s %11s%-10s%s' % (
-            '%d/%d' % (old_line_hits, old_line_count)
-                if old_line_count else '-',
-            '%d/%d' % (old_branch_hits, old_branch_count)
-                if old_branch_count else '-',
-            '%d/%d' % (new_line_hits, new_line_count)
-                if new_line_count else '-',
-            '%d/%d' % (new_branch_hits, new_branch_count)
-                if new_branch_count else '-',
-            '%+d/%+d' % (diff_line_hits, diff_line_count),
-            '%+d/%+d' % (diff_branch_hits, diff_branch_count),
-            ' (%+.1f%%)' % (100*ratio_line) if ratio_line else '',
-            ' (%+.1f%%)' % (100*ratio_branch) if ratio_branch else '')
+        return '%s/%s' % (self.a, self.b)
+
+    none = '%11s %7s' % ('-', '-')
+    def table(self):
+        if not self.b.x:
+            return self.none
+
+        t = self.a.x/self.b.x
+        return '%11s %7s' % (
+            self,
+            '∞%' if t == float('+inf')
+            else '-∞%' if t == float('-inf')
+            else '%.1f%%' % (100*t))
+
+    diff_none = '%11s' % '-'
+    def diff_table(self):
+        if not self.b.x:
+            return self.diff_none
+
+        return '%11s' % (self,)
+
+    def diff_diff(self, other):
+        new_a, new_b = self if self else (IntField(0), IntField(0))
+        old_a, old_b = other if other else (IntField(0), IntField(0))
+        return '%11s' % ('%s/%s' % (
+            new_a.diff_diff(old_a).strip(),
+            new_b.diff_diff(old_b).strip()))
+
+    def ratio(self, other):
+        new_a, new_b = self if self else (IntField(0), IntField(0))
+        old_a, old_b = other if other else (IntField(0), IntField(0))
+        new = new_a.x/new_b.x if new_b.x else 1.0
+        old = old_a.x/old_b.x if old_b.x else 1.0
+        return new - old
+
+    def __add__(self, other):
+        return FracField(self.a + other.a, self.b + other.b)
+
+    def __mul__(self, other):
+        return FracField(self.a * other.a, self.b + other.b)
+
+    def __lt__(self, other):
+        self_r = self.a.x/self.b.x if self.b.x else float('-inf')
+        other_r = other.a.x/other.b.x if other.b.x else float('-inf')
+        return self_r < other_r
+
+    def __gt__(self, other):
+        return self.__class__.__lt__(other, self)
+
+    def __le__(self, other):
+        return not self.__gt__(other)
+
+    def __ge__(self, other):
+        return not self.__lt__(other)
+
+    def __truediv__(self, n):
+        return FracField(self.a / n, self.b / n)
+
+# coverage results
+class CoverageResult(co.namedtuple('CoverageResult',
+        'file,function,line,'
+        'coverage_hits,coverage_lines,coverage_branches')):
+    __slots__ = ()
+    def __new__(cls, file, function, line,
+            coverage_hits, coverage_lines, coverage_branches):
+        return super().__new__(cls, file, function, int(IntField(line)),
+            IntField(coverage_hits),
+            FracField(coverage_lines),
+            FracField(coverage_branches))
+
+    def __add__(self, other):
+        return CoverageResult(self.file, self.function, self.line,
+            max(self.coverage_hits, other.coverage_hits),
+            self.coverage_lines + other.coverage_lines,
+            self.coverage_branches + other.coverage_branches)
 
 
 def openio(path, mode='r'):
@@ -164,27 +215,22 @@ def openio(path, mode='r'):
     else:
         return open(path, mode)
 
-def color(**args):
-    if args.get('color') == 'auto':
-        return sys.stdout.isatty()
-    elif args.get('color') == 'always':
-        return True
-    else:
-        return False
-
-def collect(paths, **args):
-    results = {}
+def collect(paths, *,
+        gcov_tool=GCOV_TOOL,
+        build_dir=None,
+        everything=False,
+        **args):
+    results = []
     for path in paths:
         # map to source file
         src_path = re.sub('\.t\.a\.gcda$', '.c', path)
-        # TODO test this
-        if args.get('build_dir'):
-            src_path = re.sub('%s/*' % re.escape(args['build_dir']), '',
+        if build_dir:
+            src_path = re.sub('%s/*' % re.escape(build_dir), '',
                 src_path)
 
         # get coverage info through gcov's json output
         # note, gcov-tool may contain extra args
-        cmd = args['gcov_tool'] + ['-b', '-t', '--json-format', path]
+        cmd = GCOV_TOOL + ['-b', '-t', '--json-format', path]
         if args.get('verbose'):
             print(' '.join(shlex.quote(c) for c in cmd))
         proc = sp.Popen(cmd,
@@ -208,49 +254,277 @@ def collect(paths, **args):
             for line in file['lines']:
                 func = line.get('function_name', '(inlined)')
                 # discard internal function (this includes injected test cases)
-                if not args.get('everything'):
+                if not everything:
                     if func.startswith('__'):
                         continue
 
-                results[(src_path, func, line['line_number'])] = (
+                results.append(CoverageResult(
+                    src_path, func, line['line_number'],
                     line['count'],
-                    CoverageResult(
-                        coverage_line_hits=1 if line['count'] > 0 else 0,
-                        coverage_line_count=1,
-                        coverage_branch_hits=sum(
-                            1 if branch['count'] > 0 else 0
+                    FracField(
+                        1 if line['count'] > 0 else 0,
+                        1),
+                    FracField(
+                        sum(1 if branch['count'] > 0 else 0
                             for branch in line['branches']),
-                        coverage_branch_count=len(line['branches'])))
-
-    # merge into functions, since this is what other scripts use
-    func_results = co.defaultdict(lambda: CoverageResult())
-    for (file, func, _), (_, result) in results.items():
-        func_results[(file, func)] += result
-
-    return func_results, results
+                        len(line['branches']))))
+
+    return results
+
+
+def fold(results, *,
+        by=['file', 'function', 'line'],
+        **_):
+    folding = co.OrderedDict()
+    for r in results:
+        name = tuple(getattr(r, k) for k in by)
+        if name not in folding:
+            folding[name] = []
+        folding[name].append(r)
+
+    folded = []
+    for rs in folding.values():
+        folded.append(sum(rs[1:], start=rs[0]))
+
+    return folded
+
+
+def table(results, diff_results=None, *,
+        by_file=False,
+        by_line=False,
+        line_sort=False,
+        reverse_line_sort=False,
+        branch_sort=False,
+        reverse_branch_sort=False,
+        summary=False,
+        all=False,
+        percent=False,
+        **_):
+    all_, all = all, __builtins__.all
+
+    # fold
+    results = fold(results,
+        by=['file', 'line'] if by_line
+            else ['file'] if by_file
+            else ['function'])
+    if diff_results is not None:
+        diff_results = fold(diff_results,
+            by=['file', 'line'] if by_line
+                else ['file'] if by_file
+                else ['function'])
+
+    table = {
+        (r.file, r.line) if by_line
+            else r.file if by_file
+            else r.function: r
+        for r in results}
+    diff_table = {
+        (r.file, r.line) if by_line
+            else r.file if by_file
+            else r.function: r
+        for r in diff_results or []}
+
+    # sort, note that python's sort is stable
+    names = list(table.keys() | diff_table.keys())
+    names.sort()
+    if diff_results is not None:
+        names.sort(key=lambda n: (
+            -FracField.ratio(
+                table[n].coverage_lines if n in table else None,
+                diff_table[n].coverage_lines if n in diff_table else None),
+            -FracField.ratio(
+                table[n].coverage_branches if n in table else None,
+                diff_table[n].coverage_branches if n in diff_table else None)))
+    if line_sort:
+        names.sort(key=lambda n: (table[n].coverage_lines,)
+            if n in table else (),
+            reverse=True)
+    elif reverse_line_sort:
+        names.sort(key=lambda n: (table[n].coverage_lines,)
+            if n in table else (),
+            reverse=False)
+    elif branch_sort:
+        names.sort(key=lambda n: (table[n].coverage_branches,)
+            if n in table else (),
+            reverse=True)
+    elif reverse_branch_sort:
+        names.sort(key=lambda n: (table[n].coverage_branches,)
+            if n in table else (),
+            reverse=False)
+
+    # print header
+    print('%-36s' % ('%s%s' % (
+        'line' if by_line
+            else 'file' if by_file
+            else 'function',
+        ' (%d added, %d removed)' % (
+            sum(1 for n in table if n not in diff_table),
+            sum(1 for n in diff_table if n not in table))
+            if diff_results is not None and not percent else '')
+        if not summary else ''),
+        end='')
+    if diff_results is None:
+        print(' %s %s' % (
+            'hits/line'.rjust(len(FracField.none)),
+            'hits/branch'.rjust(len(FracField.none))))
+    elif percent:
+        print(' %s %s' % (
+            'hits/line'.rjust(len(FracField.diff_none)),
+            'hits/branch'.rjust(len(FracField.diff_none))))
+    else:
+        print(' %s %s %s %s %s %s' % (
+            'oh/line'.rjust(len(FracField.diff_none)),
+            'oh/branch'.rjust(len(FracField.diff_none)),
+            'nh/line'.rjust(len(FracField.diff_none)),
+            'nh/branch'.rjust(len(FracField.diff_none)),
+            'dh/line'.rjust(len(FracField.diff_none)),
+            'dh/branch'.rjust(len(FracField.diff_none))))
+
+    # print entries
+    if not summary:
+        for name in names:
+            r = table.get(name)
+            if diff_results is not None:
+                diff_r = diff_table.get(name)
+                line_ratio = FracField.ratio(
+                    r.coverage_lines if r else None,
+                    diff_r.coverage_lines if diff_r else None)
+                branch_ratio = FracField.ratio(
+                    r.coverage_branches if r else None,
+                    diff_r.coverage_branches if diff_r else None)
+                if not line_ratio and not branch_ratio and not all_:
+                    continue
 
-def annotate(paths, results, **args):
+            print('%-36s' % (
+                ':'.join('%s' % n for n in name)
+                if by_line else name), end='')
+            if diff_results is None:
+                print(' %s %s' % (
+                    r.coverage_lines.table()
+                        if r else FracField.none,
+                    r.coverage_branches.table()
+                        if r else FracField.none))
+            elif percent:
+                print(' %s %s%s' % (
+                    r.coverage_lines.diff_table()
+                        if r else FracField.diff_none,
+                    r.coverage_branches.diff_table()
+                        if r else FracField.diff_none,
+                    ' (%s)' % ', '.join(
+                            '+∞%' if t == float('+inf')
+                            else '-∞%' if t == float('-inf')
+                            else '%+.1f%%' % (100*t)
+                            for t in [line_ratio, branch_ratio])))
+            else:
+                print(' %s %s %s %s %s %s%s' % (
+                    diff_r.coverage_lines.diff_table()
+                        if diff_r else FracField.diff_none,
+                    diff_r.coverage_branches.diff_table()
+                        if diff_r else FracField.diff_none,
+                    r.coverage_lines.diff_table()
+                        if r else FracField.diff_none,
+                    r.coverage_branches.diff_table()
+                        if r else FracField.diff_none,
+                    FracField.diff_diff(
+                        r.coverage_lines if r else None,
+                        diff_r.coverage_lines if diff_r else None)
+                        if r or diff_r else FracField.diff_none,
+                    FracField.diff_diff(
+                        r.coverage_branches if r else None,
+                        diff_r.coverage_branches if diff_r else None)
+                        if r or diff_r else FracField.diff_none,
+                    ' (%s)' % ', '.join(
+                            '+∞%' if t == float('+inf')
+                            else '-∞%' if t == float('-inf')
+                            else '%+.1f%%' % (100*t)
+                            for t in [line_ratio, branch_ratio]
+                            if t)
+                        if line_ratio or branch_ratio else ''))
+
+    # print total
+    total = fold(results, by=[])
+    r = total[0] if total else None
+    if diff_results is not None:
+        diff_total = fold(diff_results, by=[])
+        diff_r = diff_total[0] if diff_total else None
+        line_ratio = FracField.ratio(
+            r.coverage_lines if r else None,
+            diff_r.coverage_lines if diff_r else None)
+        branch_ratio = FracField.ratio(
+            r.coverage_branches if r else None,
+            diff_r.coverage_branches if diff_r else None)
+
+    print('%-36s' % 'TOTAL', end='')
+    if diff_results is None:
+        print(' %s %s' % (
+            r.coverage_lines.table()
+                if r else FracField.none,
+            r.coverage_branches.table()
+                if r else FracField.none))
+    elif percent:
+        print(' %s %s%s' % (
+            r.coverage_lines.diff_table()
+                if r else FracField.diff_none,
+            r.coverage_branches.diff_table()
+                if r else FracField.diff_none,
+            ' (%s)' % ', '.join(
+                    '+∞%' if t == float('+inf')
+                    else '-∞%' if t == float('-inf')
+                    else '%+.1f%%' % (100*t)
+                    for t in [line_ratio, branch_ratio])))
+    else:
+        print(' %s %s %s %s %s %s%s' % (
+            diff_r.coverage_lines.diff_table()
+                if diff_r else FracField.diff_none,
+            diff_r.coverage_branches.diff_table()
+                if diff_r else FracField.diff_none,
+            r.coverage_lines.diff_table()
+                if r else FracField.diff_none,
+            r.coverage_branches.diff_table()
+                if r else FracField.diff_none,
+            FracField.diff_diff(
+                r.coverage_lines if r else None,
+                diff_r.coverage_lines if diff_r else None)
+                if r or diff_r else FracField.diff_none,
+            FracField.diff_diff(
+                r.coverage_branches if r else None,
+                diff_r.coverage_branches if diff_r else None)
+                if r or diff_r else FracField.diff_none,
+            ' (%s)' % ', '.join(
+                    '+∞%' if t == float('+inf')
+                    else '-∞%' if t == float('-inf')
+                    else '%+.1f%%' % (100*t)
+                    for t in [line_ratio, branch_ratio]
+                    if t)
+                if line_ratio or branch_ratio else ''))
+
+
+def annotate(paths, results, *,
+        annotate=False,
+        lines=False,
+        branches=False,
+        build_dir=None,
+        **args):
     for path in paths:
         # map to source file
         src_path = re.sub('\.t\.a\.gcda$', '.c', path)
-        # TODO test this
-        if args.get('build_dir'):
-            src_path = re.sub('%s/*' % re.escape(args['build_dir']), '',
+        if build_dir:
+            src_path = re.sub('%s/*' % re.escape(build_dir), '',
                 src_path)
 
         # flatten to line info
-        line_results = {line: (hits, result)
-            for (_, _, line), (hits, result) in results.items()}
+        results = fold(results, by=['file', 'line'])
+        table = {r.line: r for r in results if r.file == src_path}
 
         # calculate spans to show
-        if not args.get('annotate'):
+        if not annotate:
             spans = []
             last = None
-            for line, (hits, result) in sorted(line_results.items()):
-                if ((args.get('lines') and hits == 0)
-                        or (args.get('branches')
-                            and result.coverage_branch_hits
-                                < result.coverage_branch_count)):
+            for line, r in sorted(table.items()):
+                if ((lines and int(r.coverage_hits) == 0)
+                        or (branches
+                            and r.coverage_branches.a
+                                < r.coverage_branches.b)):
                     if last is not None and line - last.stop <= args['context']:
                         last = range(
                             last.start,
@@ -268,48 +542,55 @@ def annotate(paths, results, **args):
             skipped = False
             for i, line in enumerate(f):
                 # skip lines not in spans?
-                if (not args.get('annotate')
-                        and not any(i+1 in s for s in spans)):
+                if not annotate and not any(i+1 in s for s in spans):
                     skipped = True
                     continue
 
                 if skipped:
                     skipped = False
                     print('%s@@ %s:%d @@%s' % (
-                        '\x1b[36m' if color(**args) else '',
+                        '\x1b[36m' if args['color'] else '',
                         src_path,
                         i+1,
-                        '\x1b[m' if color(**args) else ''))
+                        '\x1b[m' if args['color'] else ''))
 
                 # build line
                 if line.endswith('\n'):
                     line = line[:-1]
 
-                if i+1 in line_results:
-                    hits, result = line_results[i+1]
-                    line = '%-*s // %d hits, %d/%d branches' % (
+                if i+1 in table:
+                    r = table[i+1]
+                    line = '%-*s // %s hits, %s branches' % (
                         args['width'],
                         line,
-                        hits,
-                        result.coverage_branch_hits,
-                        result.coverage_branch_count)
+                        r.coverage_hits,
+                        r.coverage_branches)
 
-                    if color(**args):
-                        if args.get('lines') and hits == 0:
+                    if args['color']:
+                        if lines and int(r.coverage_hits) == 0:
                             line = '\x1b[1;31m%s\x1b[m' % line
-                        elif (args.get('branches') and
-                                result.coverage_branch_hits
-                                < result.coverage_branch_count):
+                        elif (branches
+                                and r.coverage_branches.a
+                                    < r.coverage_branches.b):
                             line = '\x1b[35m%s\x1b[m' % line
 
                 print(line)
 
-def main(**args):
+
+def main(gcda_paths, **args):
+    # figure out what color should be
+    if args.get('color') == 'auto':
+        args['color'] = sys.stdout.isatty()
+    elif args.get('color') == 'always':
+        args['color'] = True
+    else:
+        args['color'] = False
+
     # find sizes
     if not args.get('use', None):
         # find .gcda files
         paths = []
-        for path in args['gcda_paths']:
+        for path in gcda_paths:
             if os.path.isdir(path):
                 path = path + '/*.gcda'
 
@@ -317,143 +598,77 @@ def main(**args):
                 paths.append(path)
 
         if not paths:
-            print('no .gcda files found in %r?' % args['gcda_paths'])
+            print('no .gcda files found in %r?' % gcda_paths)
             sys.exit(-1)
 
-        results, line_results = collect(paths, **args)
+        results = collect(paths, **args)
     else:
+        results = []
         with openio(args['use']) as f:
-            r = csv.DictReader(f)
-            results = {
-                (result['file'], result['name']): CoverageResult(
-                    *(result[f] for f in CoverageResult._fields))
-                for result in r
-                if all(result.get(f) not in {None, ''}
-
-                    for f in CoverageResult._fields)}
-        paths = []
-        line_results = {}
+            reader = csv.DictReader(f)
+            for r in reader:
+                try:
+                    results.append(CoverageResult(**{
+                        k: v for k, v in r.items()
+                        if k in CoverageResult._fields}))
+                except TypeError:
+                    pass
 
-    # find previous results?
-    if args.get('diff'):
-        try:
-            with openio(args['diff']) as f:
-                r = csv.DictReader(f)
-                prev_results = {
-                    (result['file'], result['name']): CoverageResult(
-                        *(result[f] for f in CoverageResult._fields))
-                    for result in r
-                    if all(result.get(f) not in {None, ''}
-                        for f in CoverageResult._fields)}
-        except FileNotFoundError:
-            prev_results = []
+    # fold to remove duplicates
+    results = fold(results)
+
+    # sort because why not
+    results.sort()
 
     # write results to CSV
     if args.get('output'):
-        merged_results = co.defaultdict(lambda: {})
-        other_fields = []
-
-        # merge?
-        if args.get('merge'):
-            try:
-                with openio(args['merge']) as f:
-                    r = csv.DictReader(f)
-                    for result in r:
-                        file = result.pop('file', '')
-                        func = result.pop('name', '')
-                        for f in CoverageResult._fields:
-                            result.pop(f, None)
-                        merged_results[(file, func)] = result
-                        other_fields = result.keys()
-            except FileNotFoundError:
-                pass
-
-        for (file, func), result in results.items():
-            merged_results[(file, func)] |= result._asdict()
-
         with openio(args['output'], 'w') as f:
-            w = csv.DictWriter(f, ['file', 'name',
-                *other_fields, *CoverageResult._fields])
-            w.writeheader()
-            for (file, func), result in sorted(merged_results.items()):
-                w.writerow({'file': file, 'name': func, **result})
-
-    # print results
-    def print_header(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
-
-        if not args.get('diff'):
-            print('%-36s %s' % (by, CoverageResult._header))
-        else:
-            old = {entry(k) for k in results.keys()}
-            new = {entry(k) for k in prev_results.keys()}
-            print('%-36s %s' % (
-                '%s (%d added, %d removed)' % (by,
-                        sum(1 for k in new if k not in old),
-                        sum(1 for k in old if k not in new))
-                    if by else '',
-                CoverageDiff._header))
-
-    def print_entries(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
-
-        entries = co.defaultdict(lambda: CoverageResult())
-        for k, result in results.items():
-            entries[entry(k)] += result
+            writer = csv.DictWriter(f, CoverageResult._fields)
+            writer.writeheader()
+            for r in results:
+                writer.writerow(r._asdict())
 
-        if not args.get('diff'):
-            for name, result in sorted(entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                print('%-36s %s' % (name, result))
+    # find previous results?
+    if args.get('diff'):
+        diff_results = []
+        try:
+            with openio(args['diff']) as f:
+                reader = csv.DictReader(f)
+                for r in reader:
+                    try:
+                        diff_results.append(CoverageResult(**{
+                            k: v for k, v in r.items()
+                            if k in CoverageResult._fields}))
+                    except TypeError:
+                        pass
+        except FileNotFoundError:
+            pass
+
+        # fold to remove duplicates
+        diff_results = fold(diff_results)
+
+    if not args.get('quiet'):
+        if (args.get('annotate')
+                or args.get('lines')
+                or args.get('branches')):
+            # annotate sources
+            annotate(
+                paths,
+                results,
+                **args)
         else:
-            prev_entries = co.defaultdict(lambda: CoverageResult())
-            for k, result in prev_results.items():
-                prev_entries[entry(k)] += result
-
-            diff_entries = {name: entries.get(name) - prev_entries.get(name)
-                for name in (entries.keys() | prev_entries.keys())}
-
-            for name, diff in sorted(diff_entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                if diff or args.get('all'):
-                    print('%-36s %s' % (name, diff))
-
-    if args.get('quiet'):
-        pass
-    elif (args.get('annotate')
-            or args.get('lines')
-            or args.get('branches')):
-        annotate(paths, line_results, **args)
-    elif args.get('summary'):
-        print_header('')
-        print_entries('total')
-    elif args.get('files'):
-        print_header('file')
-        print_entries('file')
-        print_entries('total')
-    else:
-        print_header('function')
-        print_entries('function')
-        print_entries('total')
+            # print table
+            table(
+                results,
+                diff_results if args.get('diff') else None,
+                **args)
 
     # catch lack of coverage
     if args.get('error_on_lines') and any(
-            r.coverage_line_hits < r.coverage_line_count
-            for r in results.values()):
+            r.coverage_lines.a < r.coverage_lines.b for r in results):
         sys.exit(2)
     elif args.get('error_on_branches') and any(
-            r.coverage_branch_hits < r.coverage_branch_count
-            for r in results.values()):
+            r.coverage_branches.a < r.coverage_branches.b for r in results):
         sys.exit(3)
 
 
@@ -462,60 +677,114 @@ if __name__ == "__main__":
     import sys
     parser = argparse.ArgumentParser(
         description="Find coverage info after running tests.")
-    parser.add_argument('gcda_paths', nargs='*', default=GCDA_PATHS,
-        help="Description of where to find *.gcda files. May be a directory \
-            or a list of paths. Defaults to %r." % GCDA_PATHS)
-    parser.add_argument('-v', '--verbose', action='store_true',
+    parser.add_argument(
+        'gcda_paths',
+        nargs='*',
+        default=GCDA_PATHS,
+        help="Description of where to find *.gcda files. May be a directory "
+            "or a list of paths. Defaults to %(default)r.")
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
         help="Output commands that run behind the scenes.")
-    parser.add_argument('-q', '--quiet', action='store_true',
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
         help="Don't show anything, useful with -o.")
-    parser.add_argument('-o', '--output',
+    parser.add_argument(
+        '-o', '--output',
         help="Specify CSV file to store results.")
-    parser.add_argument('-u', '--use',
-        help="Don't compile and find code sizes, instead use this CSV file.")
-    parser.add_argument('-d', '--diff',
-        help="Specify CSV file to diff code size against.")
-    parser.add_argument('-m', '--merge',
-        help="Merge with an existing CSV file when writing to output.")
-    parser.add_argument('-a', '--all', action='store_true',
-        help="Show all functions, not just the ones that changed.")
-    parser.add_argument('-A', '--everything', action='store_true',
-        help="Include builtin and libc specific symbols.")
-    parser.add_argument('-s', '--line-sort', action='store_true',
+    parser.add_argument(
+        '-u', '--use',
+        help="Don't parse anything, use this CSV file.")
+    parser.add_argument(
+        '-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument(
+        '-a', '--all',
+        action='store_true',
+        help="Show all, not just the ones that changed.")
+    parser.add_argument(
+        '-p', '--percent',
+        action='store_true',
+        help="Only show percentage change, not a full diff.")
+    parser.add_argument(
+        '-b', '--by-file',
+        action='store_true',
+        help="Group by file.")
+    parser.add_argument(
+        '--by-line',
+        action='store_true',
+        help="Group by line.")
+    parser.add_argument(
+        '-s', '--line-sort',
+        action='store_true',
         help="Sort by line coverage.")
-    parser.add_argument('-S', '--reverse-line-sort', action='store_true',
+    parser.add_argument(
+        '-S', '--reverse-line-sort',
+        action='store_true',
         help="Sort by line coverage, but backwards.")
-    parser.add_argument('--branch-sort', action='store_true',
+    parser.add_argument(
+        '--branch-sort',
+        action='store_true',
         help="Sort by branch coverage.")
-    parser.add_argument('--reverse-branch-sort', action='store_true',
+    parser.add_argument(
+        '--reverse-branch-sort',
+        action='store_true',
         help="Sort by branch coverage, but backwards.")
-    parser.add_argument('-F', '--files', action='store_true',
-        help="Show file-level coverage.")
-    parser.add_argument('-Y', '--summary', action='store_true',
-        help="Only show the total coverage.")
-    parser.add_argument('-p', '--annotate', action='store_true',
+    parser.add_argument(
+        '-Y', '--summary',
+        action='store_true',
+        help="Only show the total size.")
+    parser.add_argument(
+        '-l', '--annotate',
+        action='store_true',
         help="Show source files annotated with coverage info.")
-    parser.add_argument('-l', '--lines', action='store_true',
+    parser.add_argument(
+        '-L', '--lines',
+        action='store_true',
         help="Show uncovered lines.")
-    parser.add_argument('-b', '--branches', action='store_true',
+    parser.add_argument(
+        '-B', '--branches',
+        action='store_true',
         help="Show uncovered branches.")
-    parser.add_argument('-c', '--context', type=lambda x: int(x, 0), default=3,
-        help="Show a additional lines of context. Defaults to 3.")
-    parser.add_argument('-W', '--width', type=lambda x: int(x, 0), default=80,
-        help="Assume source is styled with this many columns. Defaults to 80.")
-    parser.add_argument('--color',
-        choices=['never', 'always', 'auto'], default='auto',
+    parser.add_argument(
+        '-c', '--context',
+        type=lambda x: int(x, 0),
+        default=3,
+        help="Show a additional lines of context. Defaults to %(default)r.")
+    parser.add_argument(
+        '-W', '--width',
+        type=lambda x: int(x, 0),
+        default=80,
+        help="Assume source is styled with this many columns. Defaults "
+            "to %(default)r.")
+    parser.add_argument(
+        '--color',
+        choices=['never', 'always', 'auto'],
+        default='auto',
         help="When to use terminal colors.")
-    parser.add_argument('-e', '--error-on-lines', action='store_true',
+    parser.add_argument(
+        '-e', '--error-on-lines',
+        action='store_true',
         help="Error if any lines are not covered.")
-    parser.add_argument('-E', '--error-on-branches', action='store_true',
+    parser.add_argument(
+        '-E', '--error-on-branches',
+        action='store_true',
         help="Error if any branches are not covered.")
-    parser.add_argument('--gcov-tool', default=['gcov'],
+    parser.add_argument(
+        '-A', '--everything',
+        action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument(
+        '--gcov-tool',
+        default=GCOV_TOOL,
         type=lambda x: x.split(),
-        help="Path to the gcov tool to use.")
-    parser.add_argument('--build-dir',
-        help="Specify the relative build directory. Used to map object files \
-            to the correct source files.")
+        help="Path to the gcov tool to use. Defaults to %(default)r.")
+    parser.add_argument(
+        '--build-dir',
+        help="Specify the relative build directory. Used to map object files "
+            "to the correct source files.")
     sys.exit(main(**{k: v
         for k, v in vars(parser.parse_args()).items()
         if v is not None}))

+ 373 - 194
scripts/data.py

@@ -5,71 +5,123 @@
 # by Linux's Bloat-O-Meter.
 #
 
-import os
+import collections as co
+import csv
 import glob
 import itertools as it
-import subprocess as sp
-import shlex
+import math as m
+import os
 import re
-import csv
-import collections as co
+import shlex
+import subprocess as sp
 
 
 OBJ_PATHS = ['*.o']
+NM_TOOL = ['nm']
+TYPE = 'dDbB'
+
 
-class DataResult(co.namedtuple('DataResult', 'data_size')):
+# integer fields
+class IntField(co.namedtuple('IntField', 'x')):
     __slots__ = ()
-    def __new__(cls, data_size=0):
-        return super().__new__(cls, int(data_size))
+    def __new__(cls, x):
+        if isinstance(x, IntField):
+            return x
+        if isinstance(x, str):
+            try:
+                x = int(x, 0)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = float('inf')
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = float('-inf')
+                else:
+                    raise
+        return super().__new__(cls, x)
+
+    def __int__(self):
+        assert not m.isinf(self.x)
+        return self.x
+
+    def __float__(self):
+        return float(self.x)
 
-    def __add__(self, other):
-        return self.__class__(self.data_size + other.data_size)
+    def __str__(self):
+        if self.x == float('inf'):
+            return '∞'
+        elif self.x == float('-inf'):
+            return '-∞'
+        else:
+            return str(self.x)
 
-    def __sub__(self, other):
-        return DataDiff(other, self)
+    none = '%7s' % '-'
+    def table(self):
+        return '%7s' % (self,)
 
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
+    diff_none = '%7s' % '-'
+    diff_table = table
 
-    def key(self, **args):
-        if args.get('size_sort'):
-            return -self.data_size
-        elif args.get('reverse_size_sort'):
-            return +self.data_size
+    def diff_diff(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        diff = new - old
+        if diff == float('+inf'):
+            return '%7s' % '+∞'
+        elif diff == float('-inf'):
+            return '%7s' % '-∞'
         else:
-            return None
+            return '%+7d' % diff
+
+    def ratio(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        if m.isinf(new) and m.isinf(old):
+            return 0.0
+        elif m.isinf(new):
+            return float('+inf')
+        elif m.isinf(old):
+            return float('-inf')
+        elif not old and not new:
+            return 0.0
+        elif not old:
+            return 1.0
+        else:
+            return (new-old) / old
 
-    _header = '%7s' % 'size'
-    def __str__(self):
-        return '%7d' % self.data_size
+    def __add__(self, other):
+        return IntField(self.x + other.x)
 
-class DataDiff(co.namedtuple('DataDiff',  'old,new')):
-    __slots__ = ()
+    def __mul__(self, other):
+        return IntField(self.x * other.x)
 
-    def ratio(self):
-        old = self.old.data_size if self.old is not None else 0
-        new = self.new.data_size if self.new is not None else 0
-        return (new-old) / old if old else 1.0
+    def __lt__(self, other):
+        return self.x < other.x
 
-    def key(self, **args):
-        return (
-            self.new.key(**args) if self.new is not None else 0,
-            -self.ratio())
+    def __gt__(self, other):
+        return self.__class__.__lt__(other, self)
 
-    def __bool__(self):
-        return bool(self.ratio())
+    def __le__(self, other):
+        return not self.__gt__(other)
 
-    _header = '%7s %7s %7s' % ('old', 'new', 'diff')
-    def __str__(self):
-        old = self.old.data_size if self.old is not None else 0
-        new = self.new.data_size if self.new is not None else 0
-        diff = new - old
-        ratio = self.ratio()
-        return '%7s %7s %+7d%s' % (
-            old or "-",
-            new or "-",
-            diff,
-            ' (%+.1f%%)' % (100*ratio) if ratio else '')
+    def __ge__(self, other):
+        return not self.__lt__(other)
+
+    def __truediv__(self, n):
+        if m.isinf(self.x):
+            return self
+        else:
+            return IntField(round(self.x / n))
+
+# data size results
+class DataResult(co.namedtuple('DataResult', 'file,function,data_size')):
+    __slots__ = ()
+    def __new__(cls, file, function, data_size):
+        return super().__new__(cls, file, function, IntField(data_size))
+
+    def __add__(self, other):
+        return DataResult(self.file, self.function,
+            self.data_size + other.data_size)
 
 
 def openio(path, mode='r'):
@@ -81,20 +133,25 @@ def openio(path, mode='r'):
     else:
         return open(path, mode)
 
-def collect(paths, **args):
-    results = co.defaultdict(lambda: DataResult())
+def collect(paths, *,
+        nm_tool=NM_TOOL,
+        type=TYPE,
+        build_dir=None,
+        everything=False,
+        **args):
+    results = []
     pattern = re.compile(
         '^(?P<size>[0-9a-fA-F]+)' +
-        ' (?P<type>[%s])' % re.escape(args['type']) +
+        ' (?P<type>[%s])' % re.escape(type) +
         ' (?P<func>.+?)$')
     for path in paths:
         # map to source file
         src_path = re.sub('\.o$', '.c', path)
-        if args.get('build_dir'):
-            src_path = re.sub('%s/*' % re.escape(args['build_dir']), '',
+        if build_dir:
+            src_path = re.sub('%s/*' % re.escape(build_dir), '',
                 src_path)
         # note nm-tool may contain extra args
-        cmd = args['nm_tool'] + ['--size-sort', path]
+        cmd = nm_tool + ['--size-sort', path]
         if args.get('verbose'):
             print(' '.join(shlex.quote(c) for c in cmd))
         proc = sp.Popen(cmd,
@@ -107,12 +164,15 @@ def collect(paths, **args):
             if m:
                 func = m.group('func')
                 # discard internal functions
-                if not args.get('everything') and func.startswith('__'):
+                if not everything and func.startswith('__'):
                     continue
                 # discard .8449 suffixes created by optimizer
                 func = re.sub('\.[0-9]+', '', func)
-                results[(src_path, func)] += DataResult(
-                    int(m.group('size'), 16))
+
+                results.append(DataResult(
+                    src_path, func,
+                    int(m.group('size'), 16)))
+
         proc.wait()
         if proc.returncode != 0:
             if not args.get('verbose'):
@@ -122,12 +182,167 @@ def collect(paths, **args):
 
     return results
 
-def main(**args):
+
+def fold(results, *,
+        by=['file', 'function'],
+        **_):
+    folding = co.OrderedDict()
+    for r in results:
+        name = tuple(getattr(r, k) for k in by)
+        if name not in folding:
+            folding[name] = []
+        folding[name].append(r)
+
+    folded = []
+    for rs in folding.values():
+        folded.append(sum(rs[1:], start=rs[0]))
+
+    return folded
+
+
+def table(results, diff_results=None, *,
+        by_file=False,
+        size_sort=False,
+        reverse_size_sort=False,
+        summary=False,
+        all=False,
+        percent=False,
+        **_):
+    all_, all = all, __builtins__.all
+
+    # fold
+    results = fold(results, by=['file' if by_file else 'function'])
+    if diff_results is not None:
+        diff_results = fold(diff_results,
+            by=['file' if by_file else 'function'])
+
+    table = {
+        r.file if by_file else r.function: r
+        for r in results}
+    diff_table = {
+        r.file if by_file else r.function: r
+        for r in diff_results or []}
+
+    # sort, note that python's sort is stable
+    names = list(table.keys() | diff_table.keys())
+    names.sort()
+    if diff_results is not None:
+        names.sort(key=lambda n: -IntField.ratio(
+            table[n].data_size if n in table else None,
+            diff_table[n].data_size if n in diff_table else None))
+    if size_sort:
+        names.sort(key=lambda n: (table[n].data_size,) if n in table else (),
+            reverse=True)
+    elif reverse_size_sort:
+        names.sort(key=lambda n: (table[n].data_size,) if n in table else (),
+            reverse=False)
+
+    # print header
+    print('%-36s' % ('%s%s' % (
+        'file' if by_file else 'function',
+        ' (%d added, %d removed)' % (
+            sum(1 for n in table if n not in diff_table),
+            sum(1 for n in diff_table if n not in table))
+            if diff_results is not None and not percent else '')
+        if not summary else ''),
+        end='')
+    if diff_results is None:
+        print(' %s' % ('size'.rjust(len(IntField.none))))
+    elif percent:
+        print(' %s' % ('size'.rjust(len(IntField.diff_none))))
+    else:
+        print(' %s %s %s' % (
+            'old'.rjust(len(IntField.diff_none)),
+            'new'.rjust(len(IntField.diff_none)),
+            'diff'.rjust(len(IntField.diff_none))))
+
+    # print entries
+    if not summary:
+        for name in names:
+            r = table.get(name)
+            if diff_results is not None:
+                diff_r = diff_table.get(name)
+                ratio = IntField.ratio(
+                    r.data_size if r else None,
+                    diff_r.data_size if diff_r else None)
+                if not ratio and not all_:
+                    continue
+
+            print('%-36s' % name, end='')
+            if diff_results is None:
+                print(' %s' % (
+                    r.data_size.table()
+                        if r else IntField.none))
+            elif percent:
+                print(' %s%s' % (
+                    r.data_size.diff_table()
+                        if r else IntField.diff_none,
+                    ' (%s)' % (
+                        '+∞%' if ratio == float('+inf')
+                        else '-∞%' if ratio == float('-inf')
+                        else '%+.1f%%' % (100*ratio))))
+            else:
+                print(' %s %s %s%s' % (
+                    diff_r.data_size.diff_table()
+                        if diff_r else IntField.diff_none,
+                    r.data_size.diff_table()
+                        if r else IntField.diff_none,
+                    IntField.diff_diff(
+                        r.data_size if r else None,
+                        diff_r.data_size if diff_r else None)
+                        if r or diff_r else IntField.diff_none,
+                    ' (%s)' % (
+                        '+∞%' if ratio == float('+inf')
+                        else '-∞%' if ratio == float('-inf')
+                        else '%+.1f%%' % (100*ratio))
+                        if ratio else ''))
+
+    # print total
+    total = fold(results, by=[])
+    r = total[0] if total else None
+    if diff_results is not None:
+        diff_total = fold(diff_results, by=[])
+        diff_r = diff_total[0] if diff_total else None
+        ratio = IntField.ratio(
+            r.data_size if r else None,
+            diff_r.data_size if diff_r else None)
+
+    print('%-36s' % 'TOTAL', end='')
+    if diff_results is None:
+        print(' %s' % (
+            r.data_size.table()
+                if r else IntField.none))
+    elif percent:
+        print(' %s%s' % (
+            r.data_size.diff_table()
+                if r else IntField.diff_none,
+            ' (%s)' % (
+                '+∞%' if ratio == float('+inf')
+                else '-∞%' if ratio == float('-inf')
+                else '%+.1f%%' % (100*ratio))))
+    else:
+        print(' %s %s %s%s' % (
+            diff_r.data_size.diff_table()
+                if diff_r else IntField.diff_none,
+            r.data_size.diff_table()
+                if r else IntField.diff_none,
+            IntField.diff_diff(
+                r.data_size if r else None,
+                diff_r.data_size if diff_r else None)
+                if r or diff_r else IntField.diff_none,
+            ' (%s)' % (
+                '+∞%' if ratio == float('+inf')
+                else '-∞%' if ratio == float('-inf')
+                else '%+.1f%%' % (100*ratio))
+                if ratio else ''))
+
+
+def main(obj_paths, **args):
     # find sizes
     if not args.get('use', None):
         # find .o files
         paths = []
-        for path in args['obj_paths']:
+        for path in obj_paths:
             if os.path.isdir(path):
                 path = path + '/*.o'
 
@@ -135,127 +350,61 @@ def main(**args):
                 paths.append(path)
 
         if not paths:
-            print('no .obj files found in %r?' % args['obj_paths'])
+            print('no .obj files found in %r?' % obj_paths)
             sys.exit(-1)
 
         results = collect(paths, **args)
     else:
+        results = []
         with openio(args['use']) as f:
-            r = csv.DictReader(f)
-            results = {
-                (result['file'], result['name']): DataResult(
-                    *(result[f] for f in DataResult._fields))
-                for result in r
-                if all(result.get(f) not in {None, ''}
-                    for f in DataResult._fields)}
+            reader = csv.DictReader(f)
+            for r in reader:
+                try:
+                    results.append(DataResult(**{
+                        k: v for k, v in r.items()
+                        if k in DataResult._fields}))
+                except TypeError:
+                    pass
 
-    # find previous results?
-    if args.get('diff'):
-        try:
-            with openio(args['diff']) as f:
-                r = csv.DictReader(f)
-                prev_results = {
-                    (result['file'], result['name']): DataResult(
-                        *(result[f] for f in DataResult._fields))
-                    for result in r
-                    if all(result.get(f) not in {None, ''}
-                        for f in DataResult._fields)}
-        except FileNotFoundError:
-            prev_results = []
+    # fold to remove duplicates
+    results = fold(results)
+
+    # sort because why not
+    results.sort()
 
     # write results to CSV
     if args.get('output'):
-        merged_results = co.defaultdict(lambda: {})
-        other_fields = []
-
-        # merge?
-        if args.get('merge'):
-            try:
-                with openio(args['merge']) as f:
-                    r = csv.DictReader(f)
-                    for result in r:
-                        file = result.pop('file', '')
-                        func = result.pop('name', '')
-                        for f in DataResult._fields:
-                            result.pop(f, None)
-                        merged_results[(file, func)] = result
-                        other_fields = result.keys()
-            except FileNotFoundError:
-                pass
-
-        for (file, func), result in results.items():
-            merged_results[(file, func)] |= result._asdict()
-
         with openio(args['output'], 'w') as f:
-            w = csv.DictWriter(f, ['file', 'name',
-                *other_fields, *DataResult._fields])
-            w.writeheader()
-            for (file, func), result in sorted(merged_results.items()):
-                w.writerow({'file': file, 'name': func, **result})
-
-    # print results
-    def print_header(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
+            writer = csv.DictWriter(f, DataResult._fields)
+            writer.writeheader()
+            for r in results:
+                writer.writerow(r._asdict())
 
-        if not args.get('diff'):
-            print('%-36s %s' % (by, DataResult._header))
-        else:
-            old = {entry(k) for k in results.keys()}
-            new = {entry(k) for k in prev_results.keys()}
-            print('%-36s %s' % (
-                '%s (%d added, %d removed)' % (by,
-                        sum(1 for k in new if k not in old),
-                        sum(1 for k in old if k not in new))
-                    if by else '',
-                DataDiff._header))
-
-    def print_entries(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
+    # find previous results?
+    if args.get('diff'):
+        diff_results = []
+        try:
+            with openio(args['diff']) as f:
+                reader = csv.DictReader(f)
+                for r in reader:
+                    try:
+                        diff_results.append(DataResult(**{
+                            k: v for k, v in r.items()
+                            if k in DataResult._fields}))
+                    except TypeError:
+                        pass
+        except FileNotFoundError:
+            pass
 
-        entries = co.defaultdict(lambda: DataResult())
-        for k, result in results.items():
-            entries[entry(k)] += result
+        # fold to remove duplicates
+        diff_results = fold(diff_results)
 
-        if not args.get('diff'):
-            for name, result in sorted(entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                print('%-36s %s' % (name, result))
-        else:
-            prev_entries = co.defaultdict(lambda: DataResult())
-            for k, result in prev_results.items():
-                prev_entries[entry(k)] += result
-
-            diff_entries = {name: entries.get(name) - prev_entries.get(name)
-                for name in (entries.keys() | prev_entries.keys())}
-
-            for name, diff in sorted(diff_entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                if diff or args.get('all'):
-                    print('%-36s %s' % (name, diff))
-
-    if args.get('quiet'):
-        pass
-    elif args.get('summary'):
-        print_header('')
-        print_entries('total')
-    elif args.get('files'):
-        print_header('file')
-        print_entries('file')
-        print_entries('total')
-    else:
-        print_header('function')
-        print_entries('function')
-        print_entries('total')
+    # print table
+    if not args.get('quiet'):
+        table(
+            results,
+            diff_results if args.get('diff') else None,
+            **args)
 
 
 if __name__ == "__main__":
@@ -263,42 +412,72 @@ if __name__ == "__main__":
     import sys
     parser = argparse.ArgumentParser(
         description="Find data size at the function level.")
-    parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory \
-            or a list of paths. Defaults to %r." % OBJ_PATHS)
-    parser.add_argument('-v', '--verbose', action='store_true',
+    parser.add_argument(
+        'obj_paths',
+        nargs='*',
+        default=OBJ_PATHS,
+        help="Description of where to find *.o files. May be a directory "
+            "or a list of paths. Defaults to %(default)r.")
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
         help="Output commands that run behind the scenes.")
-    parser.add_argument('-q', '--quiet', action='store_true',
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
         help="Don't show anything, useful with -o.")
-    parser.add_argument('-o', '--output',
+    parser.add_argument(
+        '-o', '--output',
         help="Specify CSV file to store results.")
-    parser.add_argument('-u', '--use',
-        help="Don't compile and find data sizes, instead use this CSV file.")
-    parser.add_argument('-d', '--diff',
-        help="Specify CSV file to diff data size against.")
-    parser.add_argument('-m', '--merge',
-        help="Merge with an existing CSV file when writing to output.")
-    parser.add_argument('-a', '--all', action='store_true',
-        help="Show all functions, not just the ones that changed.")
-    parser.add_argument('-A', '--everything', action='store_true',
-        help="Include builtin and libc specific symbols.")
-    parser.add_argument('-s', '--size-sort', action='store_true',
+    parser.add_argument(
+        '-u', '--use',
+        help="Don't parse anything, use this CSV file.")
+    parser.add_argument(
+        '-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument(
+        '-a', '--all',
+        action='store_true',
+        help="Show all, not just the ones that changed.")
+    parser.add_argument(
+        '-p', '--percent',
+        action='store_true',
+        help="Only show percentage change, not a full diff.")
+    parser.add_argument(
+        '-b', '--by-file',
+        action='store_true',
+        help="Group by file. Note this does not include padding "
+            "so sizes may differ from other tools.")
+    parser.add_argument(
+        '-s', '--size-sort',
+        action='store_true',
         help="Sort by size.")
-    parser.add_argument('-S', '--reverse-size-sort', action='store_true',
+    parser.add_argument(
+        '-S', '--reverse-size-sort',
+        action='store_true',
         help="Sort by size, but backwards.")
-    parser.add_argument('-F', '--files', action='store_true',
-        help="Show file-level data sizes. Note this does not include padding! "
-            "So sizes may differ from other tools.")
-    parser.add_argument('-Y', '--summary', action='store_true',
-        help="Only show the total data size.")
-    parser.add_argument('--type', default='dDbB',
+    parser.add_argument(
+        '-Y', '--summary',
+        action='store_true',
+        help="Only show the total size.")
+    parser.add_argument(
+        '-A', '--everything',
+        action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument(
+        '--type',
+        default=TYPE,
         help="Type of symbols to report, this uses the same single-character "
             "type-names emitted by nm. Defaults to %(default)r.")
-    parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
-        help="Path to the nm tool to use.")
-    parser.add_argument('--build-dir',
-        help="Specify the relative build directory. Used to map object files \
-            to the correct source files.")
+    parser.add_argument(
+        '--nm-tool',
+        type=lambda x: x.split(),
+        default=NM_TOOL,
+        help="Path to the nm tool to use. Defaults to %(default)r")
+    parser.add_argument(
+        '--build-dir',
+        help="Specify the relative build directory. Used to map object files "
+            "to the correct source files.")
     sys.exit(main(**{k: v
         for k, v in vars(parser.parse_args()).items()
         if v is not None}))

+ 489 - 271
scripts/stack.py

@@ -4,111 +4,137 @@
 # report as infinite stack usage.
 #
 
-import os
+import collections as co
+import csv
 import glob
 import itertools as it
-import re
-import csv
-import collections as co
 import math as m
+import os
+import re
 
 
 CI_PATHS = ['*.ci']
 
-def openio(path, mode='r'):
-    if path == '-':
-        if 'r' in mode:
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+
+# integer fields
+class IntField(co.namedtuple('IntField', 'x')):
+    __slots__ = ()
+    def __new__(cls, x):
+        if isinstance(x, IntField):
+            return x
+        if isinstance(x, str):
+            try:
+                x = int(x, 0)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = float('inf')
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = float('-inf')
+                else:
+                    raise
+        return super().__new__(cls, x)
+
+    def __int__(self):
+        assert not m.isinf(self.x)
+        return self.x
+
+    def __float__(self):
+        return float(self.x)
+
+    def __str__(self):
+        if self.x == float('inf'):
+            return '∞'
+        elif self.x == float('-inf'):
+            return '-∞'
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
-    else:
-        return open(path, mode)
+            return str(self.x)
+
+    none = '%7s' % '-'
+    def table(self):
+        return '%7s' % (self,)
+
+    diff_none = '%7s' % '-'
+    diff_table = table
+
+    def diff_diff(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        diff = new - old
+        if diff == float('+inf'):
+            return '%7s' % '+∞'
+        elif diff == float('-inf'):
+            return '%7s' % '-∞'
+        else:
+            return '%+7d' % diff
+
+    def ratio(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        if m.isinf(new) and m.isinf(old):
+            return 0.0
+        elif m.isinf(new):
+            return float('+inf')
+        elif m.isinf(old):
+            return float('-inf')
+        elif not old and not new:
+            return 0.0
+        elif not old:
+            return 1.0
+        else:
+            return (new-old) / old
+
+    def __add__(self, other):
+        return IntField(self.x + other.x)
+
+    def __mul__(self, other):
+        return IntField(self.x * other.x)
+
+    def __lt__(self, other):
+        return self.x < other.x
 
-class StackResult(co.namedtuple('StackResult', 'stack_frame,stack_limit')):
+    def __gt__(self, other):
+        return self.__class__.__lt__(other, self)
+
+    def __le__(self, other):
+        return not self.__gt__(other)
+
+    def __ge__(self, other):
+        return not self.__lt__(other)
+
+    def __truediv__(self, n):
+        if m.isinf(self.x):
+            return self
+        else:
+            return IntField(round(self.x / n))
+
+# size results
+class StackResult(co.namedtuple('StackResult',
+        'file,function,stack_frame,stack_limit')):
     __slots__ = ()
-    def __new__(cls, stack_frame=0, stack_limit=0):
-        return super().__new__(cls,
-            int(stack_frame),
-            float(stack_limit))
+    def __new__(cls, file, function, stack_frame, stack_limit):
+        return super().__new__(cls, file, function,
+            IntField(stack_frame), IntField(stack_limit))
 
     def __add__(self, other):
-        return self.__class__(
+        return StackResult(self.file, self.function,
             self.stack_frame + other.stack_frame,
             max(self.stack_limit, other.stack_limit))
 
-    def __sub__(self, other):
-        return StackDiff(other, self)
-
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
-
-    def key(self, **args):
-        if args.get('limit_sort'):
-            return -self.stack_limit
-        elif args.get('reverse_limit_sort'):
-            return +self.stack_limit
-        elif args.get('frame_sort'):
-            return -self.stack_frame
-        elif args.get('reverse_frame_sort'):
-            return +self.stack_frame
-        else:
-            return None
 
-    _header = '%7s %7s' % ('frame', 'limit')
-    def __str__(self):
-        return '%7d %7s' % (
-            self.stack_frame,
-            '∞' if m.isinf(self.stack_limit) else int(self.stack_limit))
+def openio(path, mode='r'):
+    if path == '-':
+        if 'r' in mode:
+            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+        else:
+            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+    else:
+        return open(path, mode)
 
-class StackDiff(co.namedtuple('StackDiff',  'old,new')):
-    __slots__ = ()
 
-    def ratio(self):
-        old_limit = self.old.stack_limit if self.old is not None else 0
-        new_limit = self.new.stack_limit if self.new is not None else 0
-        return (0.0 if m.isinf(new_limit) and m.isinf(old_limit)
-            else +float('inf') if m.isinf(new_limit)
-            else -float('inf') if m.isinf(old_limit)
-            else 0.0 if not old_limit and not new_limit
-            else 1.0 if not old_limit
-            else (new_limit-old_limit) / old_limit)
-
-    def key(self, **args):
-        return (
-            self.new.key(**args) if self.new is not None else 0,
-            -self.ratio())
-
-    def __bool__(self):
-        return bool(self.ratio())
-
-    _header = '%15s %15s %15s' % ('old', 'new', 'diff')
-    def __str__(self):
-        old_frame = self.old.stack_frame if self.old is not None else 0
-        old_limit = self.old.stack_limit if self.old is not None else 0
-        new_frame = self.new.stack_frame if self.new is not None else 0
-        new_limit = self.new.stack_limit if self.new is not None else 0
-        diff_frame = new_frame - old_frame
-        diff_limit = (0 if m.isinf(new_limit) and m.isinf(old_limit)
-            else new_limit - old_limit)
-        ratio = self.ratio()
-        return '%7s %7s %7s %7s %+7d %7s%s' % (
-            old_frame if self.old is not None else '-',
-            ('∞' if m.isinf(old_limit) else int(old_limit))
-                if self.old is not None else '-',
-            new_frame if self.new is not None else '-',
-            ('∞' if m.isinf(new_limit) else int(new_limit))
-                if self.new is not None else '-',
-            diff_frame,
-            '+∞' if diff_limit > 0 and m.isinf(diff_limit)
-                else '-∞' if diff_limit < 0 and m.isinf(diff_limit)
-                else '%+d' % diff_limit,
-            '' if not ratio
-                else ' (+∞%)' if ratio > 0 and m.isinf(ratio)
-                else ' (-∞%)' if ratio < 0 and m.isinf(ratio)
-                else ' (%+.1f%%)' % (100*ratio))
-
-
-def collect(paths, **args):
+def collect(paths, *,
+        everything=False,
+        **args):
     # parse the vcg format
     k_pattern = re.compile('([a-z]+)\s*:', re.DOTALL)
     v_pattern = re.compile('(?:"(.*?)"|([a-z]+))', re.DOTALL)
@@ -154,9 +180,11 @@ def collect(paths, **args):
                     m = f_pattern.match(info['label'])
                     if m:
                         function, file, size, type = m.groups()
-                        if not args.get('quiet') and type != 'static':
+                        if (not args.get('quiet')
+                                and 'static' not in type
+                                and 'bounded' not in type):
                             print('warning: found non-static stack for %s (%s)'
-                                % (function, type))
+                                % (function, type, size))
                         _, _, _, targets = callgraph[info['title']]
                         callgraph[info['title']] = (
                             file, function, int(size), targets)
@@ -167,7 +195,7 @@ def collect(paths, **args):
                 else:
                     continue
 
-    if not args.get('everything'):
+    if not everything:
         for source, (s_file, s_function, _, _) in list(callgraph.items()):
             # discard internal functions
             if s_file.startswith('<') or s_file.startswith('/usr/include'):
@@ -200,22 +228,266 @@ def collect(paths, **args):
         return calls
 
     # build results
-    results = {}
-    result_calls = {}
+    results = []
+    calls = {}
     for source, (s_file, s_function, frame, targets) in callgraph.items():
         limit = find_limit(source)
-        calls = find_calls(targets)
-        results[(s_file, s_function)] = StackResult(frame, limit)
-        result_calls[(s_file, s_function)] = calls
-
-    return results, result_calls
-
-def main(**args):
+        cs = find_calls(targets)
+        results.append(StackResult(s_file, s_function, frame, limit))
+        calls[(s_file, s_function)] = cs
+
+    return results, calls
+
+
+def fold(results, *,
+        by=['file', 'function'],
+        **_):
+    folding = co.OrderedDict()
+    for r in results:
+        name = tuple(getattr(r, k) for k in by)
+        if name not in folding:
+            folding[name] = []
+        folding[name].append(r)
+
+    folded = []
+    for rs in folding.values():
+        folded.append(sum(rs[1:], start=rs[0]))
+
+    return folded
+
+def fold_calls(calls, *,
+        by=['file', 'function'],
+        **_):
+    def by_(name):
+        file, function = name
+        return (((file,) if 'file' in by else ())
+            + ((function,) if 'function' in by else ()))
+
+    folded = {}
+    for name, cs in calls.items():
+        name = by_(name)
+        if name not in folded:
+            folded[name] = set()
+        folded[name] |= {by_(c) for c in cs}
+
+    return folded
+
+
+def table(results, calls, diff_results=None, *,
+        by_file=False,
+        limit_sort=False,
+        reverse_limit_sort=False,
+        frame_sort=False,
+        reverse_frame_sort=False,
+        summary=False,
+        all=False,
+        percent=False,
+        tree=False,
+        depth=None,
+        **_):
+    all_, all = all, __builtins__.all
+
+    # tree doesn't really make sense with depth=0, assume depth=inf
+    if depth is None:
+        depth = float('inf') if tree else 0
+
+    # fold
+    results = fold(results, by=['file' if by_file else 'function'])
+    calls = fold_calls(calls, by=['file' if by_file else 'function'])
+    if diff_results is not None:
+        diff_results = fold(diff_results,
+            by=['file' if by_file else 'function'])
+
+    table = {
+        r.file if by_file else r.function: r
+        for r in results}
+    diff_table = {
+        r.file if by_file else r.function: r
+        for r in diff_results or []}
+
+    # sort, note that python's sort is stable
+    names = list(table.keys() | diff_table.keys())
+    names.sort()
+    if diff_results is not None:
+        names.sort(key=lambda n: -IntField.ratio(
+            table[n].stack_frame if n in table else None,
+            diff_table[n].stack_frame if n in diff_table else None))
+    if limit_sort:
+        names.sort(key=lambda n: (table[n].stack_limit,) if n in table else (),
+            reverse=True)
+    elif reverse_limit_sort:
+        names.sort(key=lambda n: (table[n].stack_limit,) if n in table else (),
+            reverse=False)
+    elif frame_sort:
+        names.sort(key=lambda n: (table[n].stack_frame,) if n in table else (),
+            reverse=True)
+    elif reverse_frame_sort:
+        names.sort(key=lambda n: (table[n].stack_frame,) if n in table else (),
+            reverse=False)
+
+    # adjust the name width based on the expected call depth, note that we
+    # can't always find the depth due to recursion
+    width = 36 + (4*depth if not m.isinf(depth) else 0)
+
+    # print header
+    if not tree:
+        print('%-*s' % (width, '%s%s' % (
+            'file' if by_file else 'function',
+            ' (%d added, %d removed)' % (
+                sum(1 for n in table if n not in diff_table),
+                sum(1 for n in diff_table if n not in table))
+                if diff_results is not None and not percent else '')
+            if not summary else ''),
+            end='')
+        if diff_results is None:
+            print(' %s %s' % (
+                'frame'.rjust(len(IntField.none)),
+                'limit'.rjust(len(IntField.none))))
+        elif percent:
+            print(' %s %s' % (
+                'frame'.rjust(len(IntField.diff_none)),
+                'limit'.rjust(len(IntField.diff_none))))
+        else:
+            print(' %s %s %s %s %s %s' % (
+                'oframe'.rjust(len(IntField.diff_none)),
+                'olimit'.rjust(len(IntField.diff_none)),
+                'nframe'.rjust(len(IntField.diff_none)),
+                'nlimit'.rjust(len(IntField.diff_none)),
+                'dframe'.rjust(len(IntField.diff_none)),
+                'dlimit'.rjust(len(IntField.diff_none))))
+
+    # print entries
+    if not summary:
+        # print the tree recursively
+        def table_calls(names_, depth,
+                prefixes=('', '', '', '')):
+            for i, name in enumerate(names_):
+                r = table.get(name)
+                if diff_results is not None:
+                    diff_r = diff_table.get(name)
+                    ratio = IntField.ratio(
+                        r.stack_limit if r else None,
+                        diff_r.stack_limit if diff_r else None)
+                    if not ratio and not all_:
+                        continue
+
+                is_last = (i == len(names_)-1)
+                print('%-*s' % (width, prefixes[0+is_last] + name), end='')
+                if tree:
+                    print()
+                elif diff_results is None:
+                    print(' %s %s' % (
+                        r.stack_frame.table()
+                            if r else IntField.none,
+                        r.stack_limit.table()
+                            if r else IntField.none))
+                elif percent:
+                    print(' %s %s%s' % (
+                        r.stack_frame.diff_table()
+                            if r else IntField.diff_none,
+                        r.stack_limit.diff_table()
+                            if r else IntField.diff_none,
+                        ' (%s)' % (
+                            '+∞%' if ratio == float('+inf')
+                            else '-∞%' if ratio == float('-inf')
+                            else '%+.1f%%' % (100*ratio))))
+                else:
+                    print(' %s %s %s %s %s %s%s' % (
+                        diff_r.stack_frame.diff_table()
+                            if diff_r else IntField.diff_none,
+                        diff_r.stack_limit.diff_table()
+                            if diff_r else IntField.diff_none,
+                        r.stack_frame.diff_table()
+                            if r else IntField.diff_none,
+                        r.stack_limit.diff_table()
+                            if r else IntField.diff_none,
+                        IntField.diff_diff(
+                            r.stack_frame if r else None,
+                            diff_r.stack_frame if diff_r else None)
+                            if r or diff_r else IntField.diff_none,
+                        IntField.diff_diff(
+                            r.stack_limit if r else None,
+                            diff_r.stack_limit if diff_r else None)
+                            if r or diff_r else IntField.diff_none,
+                        ' (%s)' % (
+                            '+∞%' if ratio == float('+inf')
+                            else '-∞%' if ratio == float('-inf')
+                            else '%+.1f%%' % (100*ratio))
+                            if ratio else ''))
+
+                # recurse?
+                if depth > 0:
+                    cs = calls.get((name,), set())
+                    table_calls(
+                        [n for n in names if (n,) in cs],
+                        depth-1,
+                        (   prefixes[2+is_last] + "|-> ",
+                            prefixes[2+is_last] + "'-> ",
+                            prefixes[2+is_last] + "|   ",
+                            prefixes[2+is_last] + "    "))
+                
+
+        table_calls(names, depth)
+
+    # print total
+    if not tree:
+        total = fold(results, by=[])
+        r = total[0] if total else None
+        if diff_results is not None:
+            diff_total = fold(diff_results, by=[])
+            diff_r = diff_total[0] if diff_total else None
+            ratio = IntField.ratio(
+                r.stack_limit if r else None,
+                diff_r.stack_limit if diff_r else None)
+
+        print('%-*s' % (width, 'TOTAL'), end='')
+        if diff_results is None:
+            print(' %s %s' % (
+                r.stack_frame.table()
+                    if r else IntField.none,
+                r.stack_limit.table()
+                    if r else IntField.none))
+        elif percent:
+            print(' %s %s%s' % (
+                r.stack_frame.diff_table()
+                    if r else IntField.diff_none,
+                r.stack_limit.diff_table()
+                    if r else IntField.diff_none,
+                ' (%s)' % (
+                    '+∞%' if ratio == float('+inf')
+                    else '-∞%' if ratio == float('-inf')
+                    else '%+.1f%%' % (100*ratio))))
+        else:
+            print(' %s %s %s %s %s %s%s' % (
+                diff_r.stack_frame.diff_table()
+                    if diff_r else IntField.diff_none,
+                diff_r.stack_limit.diff_table()
+                    if diff_r else IntField.diff_none,
+                r.stack_frame.diff_table()
+                    if r else IntField.diff_none,
+                r.stack_limit.diff_table()
+                    if r else IntField.diff_none,
+                IntField.diff_diff(
+                    r.stack_frame if r else None,
+                    diff_r.stack_frame if diff_r else None)
+                    if r or diff_r else IntField.diff_none,
+                IntField.diff_diff(
+                    r.stack_limit if r else None,
+                    diff_r.stack_limit if diff_r else None)
+                    if r or diff_r else IntField.diff_none,
+                ' (%s)' % (
+                    '+∞%' if ratio == float('+inf')
+                    else '-∞%' if ratio == float('-inf')
+                    else '%+.1f%%' % (100*ratio))
+                    if ratio else ''))
+
+
+def main(ci_paths, **args):
     # find sizes
     if not args.get('use', None):
         # find .ci files
         paths = []
-        for path in args['ci_paths']:
+        for path in ci_paths:
             if os.path.isdir(path):
                 path = path + '/*.ci'
 
@@ -223,160 +495,68 @@ def main(**args):
                 paths.append(path)
 
         if not paths:
-            print('no .ci files found in %r?' % args['ci_paths'])
+            print('no .ci files found in %r?' % ci_paths)
             sys.exit(-1)
 
-        results, result_calls = collect(paths, **args)
+        results, calls = collect(paths, **args)
     else:
+        results = []
         with openio(args['use']) as f:
-            r = csv.DictReader(f)
-            results = {
-                (result['file'], result['name']): StackResult(
-                    *(result[f] for f in StackResult._fields))
-                for result in r
-                if all(result.get(f) not in {None, ''}
-                    for f in StackResult._fields)}
+            reader = csv.DictReader(f)
+            for r in reader:
+                try:
+                    results.append(StackResult(**{
+                        k: v for k, v in r.items()
+                        if k in StackResult._fields}))
+                except TypeError:
+                    pass
 
-        result_calls = {}
+        calls = {}
 
-    # find previous results?
-    if args.get('diff'):
-        try:
-            with openio(args['diff']) as f:
-                r = csv.DictReader(f)
-                prev_results = {
-                    (result['file'], result['name']): StackResult(
-                        *(result[f] for f in StackResult._fields))
-                    for result in r
-                    if all(result.get(f) not in {None, ''}
-                        for f in StackResult._fields)}
-        except FileNotFoundError:
-            prev_results = []
+    # fold to remove duplicates
+    results = fold(results)
+
+    # sort because why not
+    results.sort()
 
     # write results to CSV
     if args.get('output'):
-        merged_results = co.defaultdict(lambda: {})
-        other_fields = []
-
-        # merge?
-        if args.get('merge'):
-            try:
-                with openio(args['merge']) as f:
-                    r = csv.DictReader(f)
-                    for result in r:
-                        file = result.pop('file', '')
-                        func = result.pop('name', '')
-                        for f in StackResult._fields:
-                            result.pop(f, None)
-                        merged_results[(file, func)] = result
-                        other_fields = result.keys()
-            except FileNotFoundError:
-                pass
-
-        for (file, func), result in results.items():
-            merged_results[(file, func)] |= result._asdict()
-
         with openio(args['output'], 'w') as f:
-            w = csv.DictWriter(f, ['file', 'name',
-                *other_fields, *StackResult._fields])
-            w.writeheader()
-            for (file, func), result in sorted(merged_results.items()):
-                w.writerow({'file': file, 'name': func, **result})
-
-    # print results
-    def print_header(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
-
-        if not args.get('diff'):
-            print('%-36s %s' % (by, StackResult._header))
-        else:
-            old = {entry(k) for k in results.keys()}
-            new = {entry(k) for k in prev_results.keys()}
-            print('%-36s %s' % (
-                '%s (%d added, %d removed)' % (by,
-                        sum(1 for k in new if k not in old),
-                        sum(1 for k in old if k not in new))
-                    if by else '',
-                StackDiff._header))
-
-    def print_entries(by):
-        # print optional tree of dependencies
-        def print_calls(entries, entry_calls, depth,
-                filter=lambda _: True,
-                prefixes=('', '', '', '')):
-            filtered_entries = {
-                name: result for name, result in entries.items()
-                if filter(name)}
-            for i, (name, result) in enumerate(sorted(filtered_entries.items(),
-                    key=lambda p: (p[1].key(**args), p))):
-                last = (i == len(filtered_entries)-1)
-                print('%-36s %s' % (prefixes[0+last] + name, result))
-
-                if depth > 0 and by != 'total':
-                    calls = entry_calls.get(name, set())
-                    print_calls(entries, entry_calls, depth-1,
-                        lambda name: name in calls,
-                        (   prefixes[2+last] + "|-> ",
-                            prefixes[2+last] + "'-> ",
-                            prefixes[2+last] + "|   ",
-                            prefixes[2+last] + "    "))
-
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
+            writer = csv.DictWriter(f, StackResult._fields)
+            writer.writeheader()
+            for r in results:
+                writer.writerow(r._asdict())
 
-        entries = co.defaultdict(lambda: StackResult())
-        for k, result in results.items():
-            entries[entry(k)] += result
+    # find previous results?
+    if args.get('diff'):
+        diff_results = []
+        try:
+            with openio(args['diff']) as f:
+                reader = csv.DictReader(f)
+                for r in reader:
+                    try:
+                        diff_results.append(StackResult(**{
+                            k: v for k, v in r.items()
+                            if k in StackResult._fields}))
+                    except TypeError:
+                        pass
+        except FileNotFoundError:
+            pass
 
-        entry_calls = co.defaultdict(lambda: set())
-        for k, calls in result_calls.items():
-            entry_calls[entry(k)] |= {entry(c) for c in calls}
+        # fold to remove duplicates
+        diff_results = fold(diff_results)
 
-        if not args.get('diff'):
-            print_calls(
-                entries,
-                entry_calls,
-                args.get('depth', 0))
-        else:
-            prev_entries = co.defaultdict(lambda: StackResult())
-            for k, result in prev_results.items():
-                prev_entries[entry(k)] += result
-
-            diff_entries = {name: entries.get(name) - prev_entries.get(name)
-                for name in (entries.keys() | prev_entries.keys())}
-
-            print_calls(
-                {name: diff for name, diff in diff_entries.items()
-                    if diff or args.get('all')},
-                entry_calls,
-                args.get('depth', 0))
-
-    if args.get('quiet'):
-        pass
-    elif args.get('summary'):
-        print_header('')
-        print_entries('total')
-    elif args.get('files'):
-        print_header('file')
-        print_entries('file')
-        print_entries('total')
-    else:
-        print_header('function')
-        print_entries('function')
-        print_entries('total')
+    # print table
+    if not args.get('quiet'):
+        table(
+            results,
+            calls,
+            diff_results if args.get('diff') else None,
+            **args)
 
-    # catch recursion
+    # error on recursion
     if args.get('error_on_recursion') and any(
-            m.isinf(limit) for _, _, _, limit, _ in results):
+            m.isinf(float(r.stack_limit)) for r in results):
         sys.exit(2)
 
 
@@ -385,45 +565,83 @@ if __name__ == "__main__":
     import sys
     parser = argparse.ArgumentParser(
         description="Find stack usage at the function level.")
-    parser.add_argument('ci_paths', nargs='*', default=CI_PATHS,
-        help="Description of where to find *.ci files. May be a directory \
-            or a list of paths. Defaults to %r." % CI_PATHS)
-    parser.add_argument('-v', '--verbose', action='store_true',
+    parser.add_argument(
+        'ci_paths',
+        nargs='*',
+        default=CI_PATHS,
+        help="Description of where to find *.ci files. May be a directory "
+            "or a list of paths. Defaults to %r." % CI_PATHS)
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
         help="Output commands that run behind the scenes.")
-    parser.add_argument('-q', '--quiet', action='store_true',
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
         help="Don't show anything, useful with -o.")
-    parser.add_argument('-o', '--output',
+    parser.add_argument(
+        '-o', '--output',
         help="Specify CSV file to store results.")
-    parser.add_argument('-u', '--use',
-        help="Don't parse callgraph files, instead use this CSV file.")
-    parser.add_argument('-d', '--diff',
+    parser.add_argument(
+        '-u', '--use',
+        help="Don't parse anything, use this CSV file.")
+    parser.add_argument(
+        '-d', '--diff',
         help="Specify CSV file to diff against.")
-    parser.add_argument('-m', '--merge',
-        help="Merge with an existing CSV file when writing to output.")
-    parser.add_argument('-a', '--all', action='store_true',
-        help="Show all functions, not just the ones that changed.")
-    parser.add_argument('-A', '--everything', action='store_true',
-        help="Include builtin and libc specific symbols.")
-    parser.add_argument('--frame-sort', action='store_true',
-        help="Sort by stack frame size.")
-    parser.add_argument('--reverse-frame-sort', action='store_true',
-        help="Sort by stack frame size, but backwards.")
-    parser.add_argument('-s', '--limit-sort', action='store_true',
+    parser.add_argument(
+        '-a', '--all',
+        action='store_true',
+        help="Show all, not just the ones that changed.")
+    parser.add_argument(
+        '-p', '--percent',
+        action='store_true',
+        help="Only show percentage change, not a full diff.")
+    parser.add_argument(
+        '-t', '--tree',
+        action='store_true',
+        help="Only show the function call tree.")
+    parser.add_argument(
+        '-b', '--by-file',
+        action='store_true',
+        help="Group by file.")
+    parser.add_argument(
+        '-s', '--limit-sort',
+        action='store_true',
         help="Sort by stack limit.")
-    parser.add_argument('-S', '--reverse-limit-sort', action='store_true',
+    parser.add_argument(
+        '-S', '--reverse-limit-sort',
+        action='store_true',
         help="Sort by stack limit, but backwards.")
-    parser.add_argument('-L', '--depth', default=0, type=lambda x: int(x, 0),
-        nargs='?', const=float('inf'),
-        help="Depth of dependencies to show.")
-    parser.add_argument('-F', '--files', action='store_true',
-        help="Show file-level calls.")
-    parser.add_argument('-Y', '--summary', action='store_true',
-        help="Only show the total stack size.")
-    parser.add_argument('-e', '--error-on-recursion', action='store_true',
+    parser.add_argument(
+        '--frame-sort',
+        action='store_true',
+        help="Sort by stack frame.")
+    parser.add_argument(
+        '--reverse-frame-sort',
+        action='store_true',
+        help="Sort by stack frame, but backwards.")
+    parser.add_argument(
+        '-Y', '--summary',
+        action='store_true',
+        help="Only show the total size.")
+    parser.add_argument(
+        '-L', '--depth',
+        nargs='?',
+        type=lambda x: int(x, 0),
+        const=float('inf'),
+        help="Depth of function calls to show.")
+    parser.add_argument(
+        '-e', '--error-on-recursion',
+        action='store_true',
         help="Error if any functions are recursive.")
-    parser.add_argument('--build-dir',
-        help="Specify the relative build directory. Used to map object files \
-            to the correct source files.")
+    parser.add_argument(
+        '-A', '--everything',
+        action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument(
+        '--build-dir',
+        help="Specify the relative build directory. Used to map object files "
+            "to the correct source files.")
     sys.exit(main(**{k: v
         for k, v in vars(parser.parse_args()).items()
         if v is not None}))

+ 522 - 0
scripts/struct.py

@@ -0,0 +1,522 @@
+#!/usr/bin/env python3
+#
+# Script to find struct sizes.
+#
+
+import collections as co
+import csv
+import glob
+import itertools as it
+import math as m
+import os
+import re
+import shlex
+import subprocess as sp
+
+
+OBJ_PATHS = ['*.o']
+OBJDUMP_TOOL = ['objdump']
+
+
+# integer fields
+class IntField(co.namedtuple('IntField', 'x')):
+    __slots__ = ()
+    def __new__(cls, x):
+        if isinstance(x, IntField):
+            return x
+        if isinstance(x, str):
+            try:
+                x = int(x, 0)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = float('inf')
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = float('-inf')
+                else:
+                    raise
+        return super().__new__(cls, x)
+
+    def __int__(self):
+        assert not m.isinf(self.x)
+        return self.x
+
+    def __float__(self):
+        return float(self.x)
+
+    def __str__(self):
+        if self.x == float('inf'):
+            return '∞'
+        elif self.x == float('-inf'):
+            return '-∞'
+        else:
+            return str(self.x)
+
+    none = '%7s' % '-'
+    def table(self):
+        return '%7s' % (self,)
+
+    diff_none = '%7s' % '-'
+    diff_table = table
+
+    def diff_diff(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        diff = new - old
+        if diff == float('+inf'):
+            return '%7s' % '+∞'
+        elif diff == float('-inf'):
+            return '%7s' % '-∞'
+        else:
+            return '%+7d' % diff
+
+    def ratio(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        if m.isinf(new) and m.isinf(old):
+            return 0.0
+        elif m.isinf(new):
+            return float('+inf')
+        elif m.isinf(old):
+            return float('-inf')
+        elif not old and not new:
+            return 0.0
+        elif not old:
+            return 1.0
+        else:
+            return (new-old) / old
+
+    def __add__(self, other):
+        return IntField(self.x + other.x)
+
+    def __mul__(self, other):
+        return IntField(self.x * other.x)
+
+    def __lt__(self, other):
+        return self.x < other.x
+
+    def __gt__(self, other):
+        return self.__class__.__lt__(other, self)
+
+    def __le__(self, other):
+        return not self.__gt__(other)
+
+    def __ge__(self, other):
+        return not self.__lt__(other)
+
+    def __truediv__(self, n):
+        if m.isinf(self.x):
+            return self
+        else:
+            return IntField(round(self.x / n))
+
+# struct size results
+class StructResult(co.namedtuple('StructResult', 'file,struct,struct_size')):
+    __slots__ = ()
+    def __new__(cls, file, struct, struct_size):
+        return super().__new__(cls, file, struct, IntField(struct_size))
+
+    def __add__(self, other):
+        return StructResult(self.file, self.struct,
+            self.struct_size + other.struct_size)
+
+
+def openio(path, mode='r'):
+    if path == '-':
+        if 'r' in mode:
+            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+        else:
+            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+    else:
+        return open(path, mode)
+
+def collect(paths, *,
+        objdump_tool=OBJDUMP_TOOL,
+        build_dir=None,
+        everything=False,
+        **args):
+    decl_pattern = re.compile(
+        '^\s+(?P<no>[0-9]+)'
+            '\s+(?P<dir>[0-9]+)'
+            '\s+.*'
+            '\s+(?P<file>[^\s]+)$')
+    struct_pattern = re.compile(
+        '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
+            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
+            '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
+
+    results = []
+    for path in paths:
+        # find decl, we want to filter by structs in .h files
+        decls = {}
+        # note objdump-tool may contain extra args
+        cmd = objdump_tool + ['--dwarf=rawline', path]
+        if args.get('verbose'):
+            print(' '.join(shlex.quote(c) for c in cmd))
+        proc = sp.Popen(cmd,
+            stdout=sp.PIPE,
+            stderr=sp.PIPE if not args.get('verbose') else None,
+            universal_newlines=True,
+            errors='replace')
+        for line in proc.stdout:
+            # find file numbers
+            m = decl_pattern.match(line)
+            if m:
+                decls[int(m.group('no'))] = m.group('file')
+        proc.wait()
+        if proc.returncode != 0:
+            if not args.get('verbose'):
+                for line in proc.stderr:
+                    sys.stdout.write(line)
+            sys.exit(-1)
+
+        # collect structs as we parse dwarf info
+        found = False
+        name = None
+        decl = None
+        size = None
+
+        # note objdump-tool may contain extra args
+        cmd = objdump_tool + ['--dwarf=info', path]
+        if args.get('verbose'):
+            print(' '.join(shlex.quote(c) for c in cmd))
+        proc = sp.Popen(cmd,
+            stdout=sp.PIPE,
+            stderr=sp.PIPE if not args.get('verbose') else None,
+            universal_newlines=True,
+            errors='replace')
+        for line in proc.stdout:
+            # state machine here to find structs
+            m = struct_pattern.match(line)
+            if m:
+                if m.group('tag'):
+                    if (name is not None
+                            and decl is not None
+                            and size is not None):
+                        file = decls.get(decl, '?')
+                        # map to source file
+                        file = re.sub('\.o$', '.c', file)
+                        if build_dir:
+                            file = re.sub(
+                                '%s/*' % re.escape(build_dir), '',
+                                file)
+                        # only include structs declared in header files in the
+                        # current directory, ignore internal-only structs (
+                        # these are represented in other measurements)
+                        if everything or file.endswith('.h'):
+                            results.append(StructResult(file, name, size))
+
+                    found = (m.group('tag') == 'structure_type')
+                    name = None
+                    decl = None
+                    size = None
+                elif found and m.group('name'):
+                    name = m.group('name')
+                elif found and name and m.group('decl'):
+                    decl = int(m.group('decl'))
+                elif found and name and m.group('size'):
+                    size = int(m.group('size'))
+        proc.wait()
+        if proc.returncode != 0:
+            if not args.get('verbose'):
+                for line in proc.stderr:
+                    sys.stdout.write(line)
+            sys.exit(-1)
+
+    return results
+
+
+def fold(results, *,
+        by=['file', 'struct'],
+        **_):
+    folding = co.OrderedDict()
+    for r in results:
+        name = tuple(getattr(r, k) for k in by)
+        if name not in folding:
+            folding[name] = []
+        folding[name].append(r)
+
+    folded = []
+    for rs in folding.values():
+        folded.append(sum(rs[1:], start=rs[0]))
+
+    return folded
+
+
+def table(results, diff_results=None, *,
+        by_file=False,
+        size_sort=False,
+        reverse_size_sort=False,
+        summary=False,
+        all=False,
+        percent=False,
+        **_):
+    all_, all = all, __builtins__.all
+
+    # fold
+    results = fold(results, by=['file' if by_file else 'struct'])
+    if diff_results is not None:
+        diff_results = fold(diff_results,
+            by=['file' if by_file else 'struct'])
+
+    table = {
+        r.file if by_file else r.struct: r
+        for r in results}
+    diff_table = {
+        r.file if by_file else r.struct: r
+        for r in diff_results or []}
+
+    # sort, note that python's sort is stable
+    names = list(table.keys() | diff_table.keys())
+    names.sort()
+    if diff_results is not None:
+        names.sort(key=lambda n: -IntField.ratio(
+            table[n].struct_size if n in table else None,
+            diff_table[n].struct_size if n in diff_table else None))
+    if size_sort:
+        names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
+            reverse=True)
+    elif reverse_size_sort:
+        names.sort(key=lambda n: (table[n].struct_size,) if n in table else (),
+            reverse=False)
+
+    # print header
+    print('%-36s' % ('%s%s' % (
+        'file' if by_file else 'struct',
+        ' (%d added, %d removed)' % (
+            sum(1 for n in table if n not in diff_table),
+            sum(1 for n in diff_table if n not in table))
+            if diff_results is not None and not percent else '')
+        if not summary else ''),
+        end='')
+    if diff_results is None:
+        print(' %s' % ('size'.rjust(len(IntField.none))))
+    elif percent:
+        print(' %s' % ('size'.rjust(len(IntField.diff_none))))
+    else:
+        print(' %s %s %s' % (
+            'old'.rjust(len(IntField.diff_none)),
+            'new'.rjust(len(IntField.diff_none)),
+            'diff'.rjust(len(IntField.diff_none))))
+
+    # print entries
+    if not summary:
+        for name in names:
+            r = table.get(name)
+            if diff_results is not None:
+                diff_r = diff_table.get(name)
+                ratio = IntField.ratio(
+                    r.struct_size if r else None,
+                    diff_r.struct_size if diff_r else None)
+                if not ratio and not all_:
+                    continue
+
+            print('%-36s' % name, end='')
+            if diff_results is None:
+                print(' %s' % (
+                    r.struct_size.table()
+                        if r else IntField.none))
+            elif percent:
+                print(' %s%s' % (
+                    r.struct_size.diff_table()
+                        if r else IntField.diff_none,
+                    ' (%s)' % (
+                        '+∞%' if ratio == float('+inf')
+                        else '-∞%' if ratio == float('-inf')
+                        else '%+.1f%%' % (100*ratio))))
+            else:
+                print(' %s %s %s%s' % (
+                    diff_r.struct_size.diff_table()
+                        if diff_r else IntField.diff_none,
+                    r.struct_size.diff_table()
+                        if r else IntField.diff_none,
+                    IntField.diff_diff(
+                        r.struct_size if r else None,
+                        diff_r.struct_size if diff_r else None)
+                        if r or diff_r else IntField.diff_none,
+                    ' (%s)' % (
+                        '+∞%' if ratio == float('+inf')
+                        else '-∞%' if ratio == float('-inf')
+                        else '%+.1f%%' % (100*ratio))
+                        if ratio else ''))
+
+    # print total
+    total = fold(results, by=[])
+    r = total[0] if total else None
+    if diff_results is not None:
+        diff_total = fold(diff_results, by=[])
+        diff_r = diff_total[0] if diff_total else None
+        ratio = IntField.ratio(
+            r.struct_size if r else None,
+            diff_r.struct_size if diff_r else None)
+
+    print('%-36s' % 'TOTAL', end='')
+    if diff_results is None:
+        print(' %s' % (
+            r.struct_size.table()
+                if r else IntField.none))
+    elif percent:
+        print(' %s%s' % (
+            r.struct_size.diff_table()
+                if r else IntField.diff_none,
+            ' (%s)' % (
+                '+∞%' if ratio == float('+inf')
+                else '-∞%' if ratio == float('-inf')
+                else '%+.1f%%' % (100*ratio))))
+    else:
+        print(' %s %s %s%s' % (
+            diff_r.struct_size.diff_table()
+                if diff_r else IntField.diff_none,
+            r.struct_size.diff_table()
+                if r else IntField.diff_none,
+            IntField.diff_diff(
+                r.struct_size if r else None,
+                diff_r.struct_size if diff_r else None)
+                if r or diff_r else IntField.diff_none,
+            ' (%s)' % (
+                '+∞%' if ratio == float('+inf')
+                else '-∞%' if ratio == float('-inf')
+                else '%+.1f%%' % (100*ratio))
+                if ratio else ''))
+
+
+def main(obj_paths, **args):
+    # find sizes
+    if not args.get('use', None):
+        # find .o files
+        paths = []
+        for path in obj_paths:
+            if os.path.isdir(path):
+                path = path + '/*.o'
+
+            for path in glob.glob(path):
+                paths.append(path)
+
+        if not paths:
+            print('no .obj files found in %r?' % obj_paths)
+            sys.exit(-1)
+
+        results = collect(paths, **args)
+    else:
+        results = []
+        with openio(args['use']) as f:
+            reader = csv.DictReader(f)
+            for r in reader:
+                try:
+                    results.append(StructResult(**{
+                        k: v for k, v in r.items()
+                        if k in StructResult._fields}))
+                except TypeError:
+                    pass
+
+    # fold to remove duplicates
+    results = fold(results)
+
+    # sort because why not
+    results.sort()
+
+    # write results to CSV
+    if args.get('output'):
+        with openio(args['output'], 'w') as f:
+            writer = csv.DictWriter(f, StructResult._fields)
+            writer.writeheader()
+            for r in results:
+                writer.writerow(r._asdict())
+
+    # find previous results?
+    if args.get('diff'):
+        diff_results = []
+        try:
+            with openio(args['diff']) as f:
+                reader = csv.DictReader(f)
+                for r in reader:
+                    try:
+                        diff_results.append(StructResult(**{
+                            k: v for k, v in r.items()
+                            if k in StructResult._fields}))
+                    except TypeError:
+                        pass
+        except FileNotFoundError:
+            pass
+
+        # fold to remove duplicates
+        diff_results = fold(diff_results)
+
+    # print table
+    if not args.get('quiet'):
+        table(
+            results,
+            diff_results if args.get('diff') else None,
+            **args)
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Find struct sizes.")
+    parser.add_argument(
+        'obj_paths',
+        nargs='*',
+        default=OBJ_PATHS,
+        help="Description of where to find *.o files. May be a directory "
+            "or a list of paths. Defaults to %(default)r.")
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help="Output commands that run behind the scenes.")
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
+        help="Don't show anything, useful with -o.")
+    parser.add_argument(
+        '-o', '--output',
+        help="Specify CSV file to store results.")
+    parser.add_argument(
+        '-u', '--use',
+        help="Don't parse anything, use this CSV file.")
+    parser.add_argument(
+        '-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument(
+        '-a', '--all',
+        action='store_true',
+        help="Show all, not just the ones that changed.")
+    parser.add_argument(
+        '-p', '--percent',
+        action='store_true',
+        help="Only show percentage change, not a full diff.")
+    parser.add_argument(
+        '-b', '--by-file',
+        action='store_true',
+        help="Group by file. Note this does not include padding "
+            "so sizes may differ from other tools.")
+    parser.add_argument(
+        '-s', '--size-sort',
+        action='store_true',
+        help="Sort by size.")
+    parser.add_argument(
+        '-S', '--reverse-size-sort',
+        action='store_true',
+        help="Sort by size, but backwards.")
+    parser.add_argument(
+        '-Y', '--summary',
+        action='store_true',
+        help="Only show the total size.")
+    parser.add_argument(
+        '-A', '--everything',
+        action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument(
+        '--objdump-tool',
+        type=lambda x: x.split(),
+        default=OBJDUMP_TOOL,
+        help="Path to the objdump tool to use.")
+    parser.add_argument(
+        '--build-dir',
+        help="Specify the relative build directory. Used to map object files "
+            "to the correct source files.")
+    sys.exit(main(**{k: v
+        for k, v in vars(parser.parse_args()).items()
+        if v is not None}))

+ 0 - 348
scripts/structs.py

@@ -1,348 +0,0 @@
-#!/usr/bin/env python3
-#
-# Script to find struct sizes.
-#
-
-import os
-import glob
-import itertools as it
-import subprocess as sp
-import shlex
-import re
-import csv
-import collections as co
-
-
-OBJ_PATHS = ['*.o']
-
-def openio(path, mode='r'):
-    if path == '-':
-        if 'r' in mode:
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
-        else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
-    else:
-        return open(path, mode)
-
-class StructsResult(co.namedtuple('StructsResult', 'struct_size')):
-    __slots__ = ()
-    def __new__(cls, struct_size=0):
-        return super().__new__(cls, int(struct_size))
-
-    def __add__(self, other):
-        return self.__class__(self.struct_size + other.struct_size)
-
-    def __sub__(self, other):
-        return StructsDiff(other, self)
-
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
-
-    def key(self, **args):
-        if args.get('size_sort'):
-            return -self.struct_size
-        elif args.get('reverse_size_sort'):
-            return +self.struct_size
-        else:
-            return None
-
-    _header = '%7s' % 'size'
-    def __str__(self):
-        return '%7d' % self.struct_size
-
-class StructsDiff(co.namedtuple('StructsDiff',  'old,new')):
-    __slots__ = ()
-
-    def ratio(self):
-        old = self.old.struct_size if self.old is not None else 0
-        new = self.new.struct_size if self.new is not None else 0
-        return (new-old) / old if old else 1.0
-
-    def key(self, **args):
-        return (
-            self.new.key(**args) if self.new is not None else 0,
-            -self.ratio())
-
-    def __bool__(self):
-        return bool(self.ratio())
-
-    _header = '%7s %7s %7s' % ('old', 'new', 'diff')
-    def __str__(self):
-        old = self.old.struct_size if self.old is not None else 0
-        new = self.new.struct_size if self.new is not None else 0
-        diff = new - old
-        ratio = self.ratio()
-        return '%7s %7s %+7d%s' % (
-            old or "-",
-            new or "-",
-            diff,
-            ' (%+.1f%%)' % (100*ratio) if ratio else '')
-
-def collect(paths, **args):
-    decl_pattern = re.compile(
-        '^\s+(?P<no>[0-9]+)'
-            '\s+(?P<dir>[0-9]+)'
-            '\s+.*'
-            '\s+(?P<file>[^\s]+)$')
-    struct_pattern = re.compile(
-        '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
-            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
-            '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
-            '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
-
-    results = {}
-    for path in paths:
-        # find decl, we want to filter by structs in .h files
-        decls = {}
-        # note objdump-tool may contain extra args
-        cmd = args['objdump_tool'] + ['--dwarf=rawline', path]
-        if args.get('verbose'):
-            print(' '.join(shlex.quote(c) for c in cmd))
-        proc = sp.Popen(cmd,
-            stdout=sp.PIPE,
-            stderr=sp.PIPE if not args.get('verbose') else None,
-            universal_newlines=True,
-            errors='replace')
-        for line in proc.stdout:
-            # find file numbers
-            m = decl_pattern.match(line)
-            if m:
-                decls[int(m.group('no'))] = m.group('file')
-        proc.wait()
-        if proc.returncode != 0:
-            if not args.get('verbose'):
-                for line in proc.stderr:
-                    sys.stdout.write(line)
-            sys.exit(-1)
-
-        # collect structs as we parse dwarf info
-        found = False
-        name = None
-        decl = None
-        size = None
-
-        # note objdump-tool may contain extra args
-        cmd = args['objdump_tool'] + ['--dwarf=info', path]
-        if args.get('verbose'):
-            print(' '.join(shlex.quote(c) for c in cmd))
-        proc = sp.Popen(cmd,
-            stdout=sp.PIPE,
-            stderr=sp.PIPE if not args.get('verbose') else None,
-            universal_newlines=True,
-            errors='replace')
-        for line in proc.stdout:
-            # state machine here to find structs
-            m = struct_pattern.match(line)
-            if m:
-                if m.group('tag'):
-                    if (name is not None
-                            and decl is not None
-                            and size is not None):
-                        file = decls.get(decl, '?')
-                        # map to source file
-                        file = re.sub('\.o$', '.c', file)
-                        if args.get('build_dir'):
-                            file = re.sub(
-                                '%s/*' % re.escape(args['build_dir']), '',
-                                file)
-                        # only include structs declared in header files in the
-                        # current directory, ignore internal-only structs (
-                        # these are represented in other measurements)
-                        if args.get('everything') or file.endswith('.h'):
-                            results[(file, name)] = StructsResult(size)
-                    found = (m.group('tag') == 'structure_type')
-                    name = None
-                    decl = None
-                    size = None
-                elif found and m.group('name'):
-                    name = m.group('name')
-                elif found and name and m.group('decl'):
-                    decl = int(m.group('decl'))
-                elif found and name and m.group('size'):
-                    size = int(m.group('size'))
-        proc.wait()
-        if proc.returncode != 0:
-            if not args.get('verbose'):
-                for line in proc.stderr:
-                    sys.stdout.write(line)
-            sys.exit(-1)
-
-    return results
-
-
-def main(**args):
-    # find sizes
-    if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in args['obj_paths']:
-            if os.path.isdir(path):
-                path = path + '/*.o'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print('no .obj files found in %r?' % args['obj_paths'])
-            sys.exit(-1)
-
-        results = collect(paths, **args)
-    else:
-        with openio(args['use']) as f:
-            r = csv.DictReader(f)
-            results = {
-                (result['file'], result['name']): StructsResult(
-                    *(result[f] for f in StructsResult._fields))
-                for result in r
-                if all(result.get(f) not in {None, ''}
-                    for f in StructsResult._fields)}
-
-    # find previous results?
-    if args.get('diff'):
-        try:
-            with openio(args['diff']) as f:
-                r = csv.DictReader(f)
-                prev_results = {
-                    (result['file'], result['name']): StructsResult(
-                        *(result[f] for f in StructsResult._fields))
-                    for result in r
-                    if all(result.get(f) not in {None, ''}
-                        for f in StructsResult._fields)}
-        except FileNotFoundError:
-            prev_results = []
-
-    # write results to CSV
-    if args.get('output'):
-        merged_results = co.defaultdict(lambda: {})
-        other_fields = []
-
-        # merge?
-        if args.get('merge'):
-            try:
-                with openio(args['merge']) as f:
-                    r = csv.DictReader(f)
-                    for result in r:
-                        file = result.pop('file', '')
-                        func = result.pop('name', '')
-                        for f in StructsResult._fields:
-                            result.pop(f, None)
-                        merged_results[(file, func)] = result
-                        other_fields = result.keys()
-            except FileNotFoundError:
-                pass
-
-        for (file, func), result in results.items():
-            merged_results[(file, func)] |= result._asdict()
-
-        with openio(args['output'], 'w') as f:
-            w = csv.DictWriter(f, ['file', 'name',
-                *other_fields, *StructsResult._fields])
-            w.writeheader()
-            for (file, func), result in sorted(merged_results.items()):
-                w.writerow({'file': file, 'name': func, **result})
-
-    # print results
-    def print_header(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
-
-        if not args.get('diff'):
-            print('%-36s %s' % (by, StructsResult._header))
-        else:
-            old = {entry(k) for k in results.keys()}
-            new = {entry(k) for k in prev_results.keys()}
-            print('%-36s %s' % (
-                '%s (%d added, %d removed)' % (by,
-                        sum(1 for k in new if k not in old),
-                        sum(1 for k in old if k not in new))
-                    if by else '',
-                StructsDiff._header))
-
-    def print_entries(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
-
-        entries = co.defaultdict(lambda: StructsResult())
-        for k, result in results.items():
-            entries[entry(k)] += result
-
-        if not args.get('diff'):
-            for name, result in sorted(entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                print('%-36s %s' % (name, result))
-        else:
-            prev_entries = co.defaultdict(lambda: StructsResult())
-            for k, result in prev_results.items():
-                prev_entries[entry(k)] += result
-
-            diff_entries = {name: entries.get(name) - prev_entries.get(name)
-                for name in (entries.keys() | prev_entries.keys())}
-
-            for name, diff in sorted(diff_entries.items(),
-                    key=lambda p: (p[1].key(**args), p)):
-                if diff or args.get('all'):
-                    print('%-36s %s' % (name, diff))
-
-    if args.get('quiet'):
-        pass
-    elif args.get('summary'):
-        print_header('')
-        print_entries('total')
-    elif args.get('files'):
-        print_header('file')
-        print_entries('file')
-        print_entries('total')
-    else:
-        print_header('struct')
-        print_entries('struct')
-        print_entries('total')
-
-
-if __name__ == "__main__":
-    import argparse
-    import sys
-    parser = argparse.ArgumentParser(
-        description="Find struct sizes.")
-    parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory \
-            or a list of paths. Defaults to %r." % OBJ_PATHS)
-    parser.add_argument('-v', '--verbose', action='store_true',
-        help="Output commands that run behind the scenes.")
-    parser.add_argument('-q', '--quiet', action='store_true',
-        help="Don't show anything, useful with -o.")
-    parser.add_argument('-o', '--output',
-        help="Specify CSV file to store results.")
-    parser.add_argument('-u', '--use',
-        help="Don't compile and find struct sizes, instead use this CSV file.")
-    parser.add_argument('-d', '--diff',
-        help="Specify CSV file to diff struct size against.")
-    parser.add_argument('-m', '--merge',
-        help="Merge with an existing CSV file when writing to output.")
-    parser.add_argument('-a', '--all', action='store_true',
-        help="Show all structs, not just the ones that changed.")
-    parser.add_argument('-A', '--everything', action='store_true',
-        help="Include builtin and libc specific symbols.")
-    parser.add_argument('-s', '--size-sort', action='store_true',
-        help="Sort by size.")
-    parser.add_argument('-S', '--reverse-size-sort', action='store_true',
-        help="Sort by size, but backwards.")
-    parser.add_argument('-F', '--files', action='store_true',
-        help="Show file-level struct sizes.")
-    parser.add_argument('-Y', '--summary', action='store_true',
-        help="Only show the total struct size.")
-    parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(),
-        help="Path to the objdump tool to use.")
-    parser.add_argument('--build-dir',
-        help="Specify the relative build directory. Used to map object files \
-            to the correct source files.")
-    sys.exit(main(**{k: v
-        for k, v in vars(parser.parse_args()).items()
-        if v is not None}))

+ 664 - 361
scripts/summary.py

@@ -6,416 +6,719 @@
 import collections as co
 import csv
 import functools as ft
+import glob
 import math as m
 import os
 import re
 
-# each result is a type generated by another script
-RESULTS = []
-FIELDS = 'code,data,stack,structs'
-def result(cls):
-    RESULTS.append(cls)
-    return cls
 
-@result
-class CodeResult(co.namedtuple('CodeResult', 'code_size')):
-    __slots__ = ()
-    def __new__(cls, code_size=0):
-        return super().__new__(cls, int(code_size))
+CSV_PATHS = ['*.csv']
+
+# Defaults are common fields generated by other littlefs scripts
+MERGES = {
+    'add': (
+        ['code_size', 'data_size', 'stack_frame', 'struct_size',
+            'coverage_lines', 'coverage_branches'],
+        lambda xs: sum(xs[1:], start=xs[0])
+    ),
+    'mul': (
+        [],
+        lambda xs: m.prod(xs[1:], start=xs[0])
+    ),
+    'min': (
+        [],
+        min
+    ),
+    'max': (
+        ['stack_limit', 'coverage_hits'],
+        max
+    ),
+    'avg': (
+        [],
+        lambda xs: sum(xs[1:], start=xs[0]) / len(xs)
+    ),
+}
 
-    def __add__(self, other):
-        return self.__class__(self.code_size + other.code_size)
 
-    def __sub__(self, other):
-        old = other.code_size if other is not None else 0
-        new = self.code_size if self is not None else 0
-        return (new-old) / old if old else 1.0
+def openio(path, mode='r'):
+    if path == '-':
+        if 'r' in mode:
+            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+        else:
+            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+    else:
+        return open(path, mode)
 
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
 
-    def key(self):
-        return -self.code_size
+# integer fields
+class IntField(co.namedtuple('IntField', 'x')):
+    __slots__ = ()
+    def __new__(cls, x):
+        if isinstance(x, IntField):
+            return x
+        if isinstance(x, str):
+            try:
+                x = int(x, 0)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = float('inf')
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = float('-inf')
+                else:
+                    raise
+        return super().__new__(cls, x)
+
+    def __int__(self):
+        assert not m.isinf(self.x)
+        return self.x
+
+    def __float__(self):
+        return float(self.x)
 
-    _header = '%7s' % 'code'
-    _nil    = '%7s' % '-'
     def __str__(self):
-        return '%7s' % self.code_size
-
-@result
-class DataResult(co.namedtuple('DataResult', 'data_size')):
-    __slots__ = ()
-    def __new__(cls, data_size=0):
-        return super().__new__(cls, int(data_size))
+        if self.x == float('inf'):
+            return '∞'
+        elif self.x == float('-inf'):
+            return '-∞'
+        else:
+            return str(self.x)
+
+    none = '%7s' % '-'
+    def table(self):
+        return '%7s' % (self,)
+
+    diff_none = '%7s' % '-'
+    diff_table = table
+
+    def diff_diff(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        diff = new - old
+        if diff == float('+inf'):
+            return '%7s' % '+∞'
+        elif diff == float('-inf'):
+            return '%7s' % '-∞'
+        else:
+            return '%+7d' % diff
+
+    def ratio(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        if m.isinf(new) and m.isinf(old):
+            return 0.0
+        elif m.isinf(new):
+            return float('+inf')
+        elif m.isinf(old):
+            return float('-inf')
+        elif not old and not new:
+            return 0.0
+        elif not old:
+            return 1.0
+        else:
+            return (new-old) / old
 
     def __add__(self, other):
-        return self.__class__(self.data_size + other.data_size)
+        return IntField(self.x + other.x)
 
-    def __sub__(self, other):
-        old = other.data_size if other is not None else 0
-        new = self.data_size if self is not None else 0
-        return (new-old) / old if old else 1.0
+    def __mul__(self, other):
+        return IntField(self.x * other.x)
 
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
+    def __lt__(self, other):
+        return self.x < other.x
 
-    def key(self):
-        return -self.data_size
+    def __gt__(self, other):
+        return self.__class__.__lt__(other, self)
 
-    _header = '%7s' % 'data'
-    _nil    = '%7s' % '-'
-    def __str__(self):
-        return '%7s' % self.data_size
+    def __le__(self, other):
+        return not self.__gt__(other)
 
-@result
-class StackResult(co.namedtuple('StackResult', 'stack_limit')):
-    __slots__ = ()
-    def __new__(cls, stack_limit=0):
-        return super().__new__(cls, float(stack_limit))
+    def __ge__(self, other):
+        return not self.__lt__(other)
 
-    def __add__(self, other):
-        return self.__class__(max(self.stack_limit, other.stack_limit))
-
-    def __sub__(self, other):
-        old_limit = other.stack_limit if other is not None else 0
-        new_limit = self.stack_limit if self is not None else 0
-        return (0.0 if m.isinf(new_limit) and m.isinf(old_limit)
-            else +float('inf') if m.isinf(new_limit)
-            else -float('inf') if m.isinf(old_limit)
-            else 0.0 if not old_limit and not new_limit
-            else 1.0 if not old_limit
-            else (new_limit-old_limit) / old_limit)
-
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
-
-    def key(self):
-        return -self.stack_limit
-
-    _header = '%7s' % 'stack'
-    _nil    = '%7s' % '-'
-    def __str__(self):
-        return '%7s' % (
-            '∞' if m.isinf(self.stack_limit)
-                else int(self.stack_limit))
+    def __truediv__(self, n):
+        if m.isinf(self.x):
+            return self
+        else:
+            return IntField(round(self.x / n))
 
-@result
-class StructsResult(co.namedtuple('StructsResult', 'struct_size')):
+# float fields
+class FloatField(co.namedtuple('FloatField', 'x')):
     __slots__ = ()
-    def __new__(cls, struct_size=0):
-        return super().__new__(cls, int(struct_size))
-
-    def __add__(self, other):
-        return self.__class__(self.struct_size + other.struct_size)
-
-    def __sub__(self, other):
-        old = other.struct_size if other is not None else 0
-        new = self.struct_size if self is not None else 0
-        return (new-old) / old if old else 1.0
-
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
-
-    def key(self):
-        return -self.struct_size
+    def __new__(cls, x):
+        if isinstance(x, FloatField):
+            return x
+        if isinstance(x, str):
+            try:
+                x = float(x)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = float('inf')
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = float('-inf')
+                else:
+                    raise
+        return super().__new__(cls, x)
+
+    def __float__(self):
+        return float(self.x)
 
-    _header = '%7s' % 'structs'
-    _nil    = '%7s' % '-'
     def __str__(self):
-        return '%7s' % self.struct_size
+        if self.x == float('inf'):
+            return '∞'
+        elif self.x == float('-inf'):
+            return '-∞'
+        else:
+            return '%.1f' % self.x
+
+    none = IntField.none
+    table = IntField.table
+    diff_none = IntField.diff_none
+    diff_table = IntField.diff_table
+    diff_diff = IntField.diff_diff
+    ratio = IntField.ratio
+    __add__ = IntField.__add__
+    __mul__ = IntField.__mul__
+    __lt__ = IntField.__lt__
+    __gt__ = IntField.__gt__
+    __le__ = IntField.__le__
+    __ge__ = IntField.__ge__
+
+    def __truediv__(self, n):
+        if m.isinf(self.x):
+            return self
+        else:
+            return FloatField(self.x / n)
 
-@result
-class CoverageLineResult(co.namedtuple('CoverageResult',
-        'coverage_line_hits,coverage_line_count')):
+# fractional fields, a/b
+class FracField(co.namedtuple('FracField', 'a,b')):
     __slots__ = ()
-    def __new__(cls, coverage_line_hits=0, coverage_line_count=0):
-        return super().__new__(cls,
-            int(coverage_line_hits),
-            int(coverage_line_count))
+    def __new__(cls, a, b=None):
+        if isinstance(a, FracField) and b is None:
+            return a
+        if isinstance(a, str) and b is None:
+            a, b = a.split('/', 1)
+        if b is None:
+            b = a
+        return super().__new__(cls, IntField(a), IntField(b))
 
-    def __add__(self, other):
-        return self.__class__(
-            self.coverage_line_hits + other.coverage_line_hits,
-            self.coverage_line_count + other.coverage_line_count)
-
-    def __sub__(self, other):
-        old_hits = other.coverage_line_hits if other is not None else 0
-        old_count = other.coverage_line_count if other is not None else 0
-        new_hits = self.coverage_line_hits if self is not None else 0
-        new_count = self.coverage_line_count if self is not None else 0
-        return ((new_hits/new_count if new_count else 1.0)
-            - (old_hits/old_count if old_count else 1.0))
-
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
-
-    def key(self):
-        return -(self.coverage_line_hits/self.coverage_line_count
-            if self.coverage_line_count else -1)
-
-    _header = '%19s' % 'coverage/line'
-    _nil    = '%11s %7s' % ('-', '-')
     def __str__(self):
-        return '%11s %7s' % (
-            '%d/%d' % (self.coverage_line_hits, self.coverage_line_count)
-                if self.coverage_line_count else '-',
-            '%.1f%%' % (100*self.coverage_line_hits/self.coverage_line_count)
-                if self.coverage_line_count else '-')
-
-@result
-class CoverageBranchResult(co.namedtuple('CoverageResult',
-        'coverage_branch_hits,coverage_branch_count')):
-    __slots__ = ()
-    def __new__(cls, coverage_branch_hits=0, coverage_branch_count=0):
-        return super().__new__(cls,
-            int(coverage_branch_hits),
-            int(coverage_branch_count))
+        return '%s/%s' % (self.a, self.b)
 
-    def __add__(self, other):
-        return self.__class__(
-            self.coverage_branch_hits + other.coverage_branch_hits,
-            self.coverage_branch_count + other.coverage_branch_count)
-
-    def __sub__(self, other):
-        old_hits = other.coverage_branch_hits if other is not None else 0
-        old_count = other.coverage_branch_count if other is not None else 0
-        new_hits = self.coverage_branch_hits if self is not None else 0
-        new_count = self.coverage_branch_count if self is not None else 0
-        return ((new_hits/new_count if new_count else 1.0)
-            - (old_hits/old_count if old_count else 1.0))
-
-    def __rsub__(self, other):
-        return self.__class__.__sub__(other, self)
-
-    def key(self):
-        return -(self.coverage_branch_hits/self.coverage_branch_count
-            if self.coverage_branch_count else -1)
-
-    _header = '%19s' % 'coverage/branch'
-    _nil    = '%11s %7s' % ('-', '-')
-    def __str__(self):
-        return '%11s %7s' % (
-            '%d/%d' % (self.coverage_branch_hits, self.coverage_branch_count)
-                if self.coverage_branch_count else '-',
-            '%.1f%%' % (100*self.coverage_branch_hits/self.coverage_branch_count)
-                if self.coverage_branch_count else '-')
+    none = '%11s %7s' % ('-', '-')
+    def table(self):
+        if not self.b.x:
+            return self.none
 
+        t = self.a.x/self.b.x
+        return '%11s %7s' % (
+            self,
+            '∞%' if t == float('+inf')
+            else '-∞%' if t == float('-inf')
+            else '%.1f%%' % (100*t))
+
+    diff_none = '%11s' % '-'
+    def diff_table(self):
+        if not self.b.x:
+            return self.diff_none
+
+        return '%11s' % (self,)
+
+    def diff_diff(self, other):
+        new_a, new_b = self if self else (IntField(0), IntField(0))
+        old_a, old_b = other if other else (IntField(0), IntField(0))
+        return '%11s' % ('%s/%s' % (
+            new_a.diff_diff(old_a).strip(),
+            new_b.diff_diff(old_b).strip()))
+
+    def ratio(self, other):
+        new_a, new_b = self if self else (IntField(0), IntField(0))
+        old_a, old_b = other if other else (IntField(0), IntField(0))
+        new = new_a.x/new_b.x if new_b.x else 1.0
+        old = old_a.x/old_b.x if old_b.x else 1.0
+        return new - old
 
-def openio(path, mode='r'):
-    if path == '-':
-        if 'r' in mode:
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
-        else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+    def __add__(self, other):
+        return FracField(self.a + other.a, self.b + other.b)
+
+    def __mul__(self, other):
+        return FracField(self.a * other.a, self.b + other.b)
+
+    def __lt__(self, other):
+        self_r = self.a.x/self.b.x if self.b.x else float('-inf')
+        other_r = other.a.x/other.b.x if other.b.x else float('-inf')
+        return self_r < other_r
+
+    def __gt__(self, other):
+        return self.__class__.__lt__(other, self)
+
+    def __le__(self, other):
+        return not self.__gt__(other)
+
+    def __ge__(self, other):
+        return not self.__lt__(other)
+
+    def __truediv__(self, n):
+        return FracField(self.a / n, self.b / n)
+
+
+def homogenize(results, *,
+        fields=None,
+        merges=None,
+        renames=None,
+        types=None,
+        **_):
+    # rename fields?
+    if renames is not None:
+        results_ = []
+        for r in results:
+            results_.append({renames.get(k, k): v for k, v in r.items()})
+        results = results_
+
+    # find all fields
+    if not fields:
+        fields = co.OrderedDict()
+        for r in results:
+            # also remove None fields, these can get introduced by
+            # csv.DictReader when header and rows mismatch
+            fields.update((k, v) for k, v in r.items() if k is not None)
+        fields = list(fields.keys())
+
+    # go ahead and clean up none values, these can have a few forms
+    results_ = []
+    for r in results:
+        results_.append({
+            k: r[k] for k in fields
+            if r.get(k) is not None and not(
+                isinstance(r[k], str)
+                and re.match('^\s*[+-]?\s*$', r[k]))})
+
+    # find best type for all fields
+    def try_(x, type):
+        try:
+            type(x)
+            return True
+        except ValueError:
+            return False
+
+    if types is None:
+        types = {}
+        for k in fields:
+            if merges is not None and merges.get(k):
+                for type in [IntField, FloatField, FracField]:
+                    if all(k not in r or try_(r[k], type) for r in results_):
+                        types[k] = type
+                        break
+                else:
+                    print("no type matches field %r?" % k)
+                    sys.exit(-1)
+
+    # homogenize types
+    for k in fields:
+        if k in types:
+            for r in results_:
+                if k in r:
+                    r[k] = types[k](r[k])
+
+    return fields, types, results_
+
+
+def fold(results, *,
+        fields=None,
+        merges=None,
+        by=None,
+        **_):
+    folding = co.OrderedDict()
+    if by is None:
+        by = [k for k in fields if k not in merges]
+
+    for r in results:
+        name = tuple(r.get(k) for k in by)
+        if name not in folding:
+            folding[name] = {k: [] for k in fields if k in merges}
+        for k in fields:
+            # drop all fields fields without a type
+            if k in merges and k in r:
+                folding[name][k].append(r[k])
+
+    # merge fields, we need the count at this point for averages
+    folded = []
+    types = {}
+    for name, r in folding.items():
+        r_ = {}
+        for k, vs in r.items():
+            if vs:
+                _, merge = MERGES[merges[k]]
+                r_[k] = merge(vs)
+
+        # drop all rows without any fields
+        # and drop all empty keys
+        if r_:
+            folded.append(dict(
+                {k: n for k, n in zip(by, name) if n},
+                **r_))
+
+    fields_ = by + [k for k in fields if k in merges]
+    return fields_, folded
+
+
+def table(results, diff_results=None, *,
+        fields=None,
+        types=None,
+        merges=None,
+        by=None,
+        sort=None,
+        reverse_sort=None,
+        summary=False,
+        all=False,
+        percent=False,
+        **_):
+    all_, all = all, __builtins__.all
+
+    # fold
+    if by is not None:
+        fields, results = fold(results, fields=fields, merges=merges, by=by)
+        if diff_results is not None:
+            _, diff_results = fold(diff_results,
+                fields=fields, merges=merges, by=by)
+
+    table = {
+        tuple(r.get(k,'') for k in fields if k not in merges): r
+        for r in results}
+    diff_table = {
+        tuple(r.get(k,'') for k in fields if k not in merges): r
+        for r in diff_results or []}
+
+    # sort, note that python's sort is stable
+    names = list(table.keys() | diff_table.keys())
+    names.sort()
+    if diff_results is not None:
+        names.sort(key=lambda n: [
+            -types[k].ratio(
+                table.get(n,{}).get(k),
+                diff_table.get(n,{}).get(k))
+                for k in fields if k in merges])
+    if sort:
+        names.sort(key=lambda n: tuple(
+            (table[n][k],) if k in table.get(n,{}) else ()
+            for k in sort),
+            reverse=True)
+    elif reverse_sort:
+        names.sort(key=lambda n: tuple(
+            (table[n][k],) if k in table.get(n,{}) else ()
+            for k in reverse_sort),
+            reverse=False)
+
+    # print header
+    print('%-36s' % ('%s%s' % (
+        ','.join(k for k in fields if k not in merges),
+        ' (%d added, %d removed)' % (
+            sum(1 for n in table if n not in diff_table),
+            sum(1 for n in diff_table if n not in table))
+            if diff_results is not None and not percent else '')
+        if not summary else ''),
+        end='')
+    if diff_results is None:
+        print(' %s' % (
+            ' '.join(k.rjust(len(types[k].none))
+                for k in fields if k in merges)))
+    elif percent:
+        print(' %s' % (
+            ' '.join(k.rjust(len(types[k].diff_none))
+                for k in fields if k in merges)))
     else:
-        return open(path, mode)
-
-def main(**args):
-    # find results
-    results = co.defaultdict(lambda: {})
-    for path in args.get('csv_paths', '-'):
+        print(' %s %s %s' % (
+            ' '.join(('o'+k).rjust(len(types[k].diff_none))
+                for k in fields if k in merges),
+            ' '.join(('n'+k).rjust(len(types[k].diff_none))
+                for k in fields if k in merges),
+            ' '.join(('d'+k).rjust(len(types[k].diff_none))
+                for k in fields if k in merges)))
+
+    # print entries
+    if not summary:
+        for name in names:
+            r = table.get(name, {})
+            if diff_results is not None:
+                diff_r = diff_table.get(name, {})
+                ratios = [types[k].ratio(r.get(k), diff_r.get(k))
+                    for k in fields if k in merges]
+                if not any(ratios) and not all_:
+                    continue
+
+            print('%-36s' % ','.join(name), end='')
+            if diff_results is None:
+                print(' %s' % (
+                    ' '.join(r[k].table()
+                        if k in r else types[k].none
+                        for k in fields if k in merges)))
+            elif percent:
+                print(' %s%s' % (
+                    ' '.join(r[k].diff_table()
+                        if k in r else types[k].diff_none
+                        for k in fields if k in merges),
+                    ' (%s)' % ', '.join(
+                            '+∞%' if t == float('+inf')
+                            else '-∞%' if t == float('-inf')
+                            else '%+.1f%%' % (100*t)
+                            for t in ratios)))
+            else:
+                print(' %s %s %s%s' % (
+                    ' '.join(diff_r[k].diff_table()
+                        if k in diff_r else types[k].diff_none
+                        for k in fields if k in merges),
+                    ' '.join(r[k].diff_table()
+                        if k in r else types[k].diff_none
+                        for k in fields if k in merges),
+                    ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
+                        if k in r or k in diff_r else types[k].diff_none
+                        for k in fields if k in merges),
+                    ' (%s)' % ', '.join(
+                            '+∞%' if t == float('+inf')
+                            else '-∞%' if t == float('-inf')
+                            else '%+.1f%%' % (100*t)
+                            for t in ratios
+                            if t)
+                        if any(ratios) else ''))
+
+    # print total
+    _, total = fold(results, fields=fields, merges=merges, by=[])
+    r = total[0] if total else {}
+    if diff_results is not None:
+        _, diff_total = fold(diff_results,
+            fields=fields, merges=merges, by=[])
+        diff_r = diff_total[0] if diff_total else {}
+        ratios = [types[k].ratio(r.get(k), diff_r.get(k))
+            for k in fields if k in merges]
+
+    print('%-36s' % 'TOTAL', end='')
+    if diff_results is None:
+        print(' %s' % (
+            ' '.join(r[k].table()
+                if k in r else types[k].none
+                for k in fields if k in merges)))
+    elif percent:
+        print(' %s%s' % (
+            ' '.join(r[k].diff_table()
+                if k in r else types[k].diff_none
+                for k in fields if k in merges),
+            ' (%s)' % ', '.join(
+                    '+∞%' if t == float('+inf')
+                    else '-∞%' if t == float('-inf')
+                    else '%+.1f%%' % (100*t)
+                    for t in ratios)))
+    else:
+        print(' %s %s %s%s' % (
+            ' '.join(diff_r[k].diff_table()
+                if k in diff_r else types[k].diff_none
+                for k in fields if k in merges),
+            ' '.join(r[k].diff_table()
+                if k in r else types[k].diff_none
+                for k in fields if k in merges),
+            ' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
+                if k in r or k in diff_r else types[k].diff_none
+                for k in fields if k in merges),
+            ' (%s)' % ', '.join(
+                    '+∞%' if t == float('+inf')
+                    else '-∞%' if t == float('-inf')
+                    else '%+.1f%%' % (100*t)
+                    for t in ratios
+                    if t)
+                if any(ratios) else ''))
+
+
+def main(csv_paths, *, fields=None, by=None, **args):
+    # figure out what fields to use
+    renames = {}
+
+    if fields is not None:
+        fields_ = []
+        for name in fields:
+            if '=' in name:
+                a, b = name.split('=', 1)
+                renames[b] = a
+                name = a
+            fields_.append(name)
+        fields = fields_
+
+    if by is not None:
+        by_ = []
+        for name in by:
+            if '=' in name:
+                a, b = name.split('=', 1)
+                renames[b] = a
+                name = a
+            by_.append(name)
+        by = by_
+
+    # include 'by' fields in fields, it doesn't make sense to not
+    if fields is not None and by is not None:
+        fields[:0] = [k for k in by if k not in fields]
+
+    # use preconfigured merge operations unless any merge operation is
+    # explictly specified
+    merge_args = (args
+        if any(args.get(m) for m in MERGES.keys())
+        else {m: k for m, (k, _) in MERGES.items()})
+    merges = {}
+    for m in MERGES.keys():
+        for k in merge_args.get(m, []):
+            if k in merges:
+                print("conflicting merge type for field %r?" % k)
+                sys.exit(-1)
+            merges[k] = m
+    # allow renames to apply to merges
+    for m in MERGES.keys():
+        for k in merge_args.get(m, []):
+            if renames.get(k, k) not in merges:
+                merges[renames.get(k, k)] = m
+    # ignore merges that conflict with 'by' fields
+    if by is not None:
+        for k in by:
+            if k in merges:
+                del merges[k]
+
+    # find CSV files
+    paths = []
+    for path in csv_paths:
+        if os.path.isdir(path):
+            path = path + '/*.csv'
+
+        for path in glob.glob(path):
+            paths.append(path)
+
+    if not paths:
+        print('no .csv files found in %r?' % csv_paths)
+        sys.exit(-1)
+
+    results = []
+    for path in paths:
         try:
             with openio(path) as f:
-                r = csv.DictReader(f)
-                for result in r:
-                    file = result.pop('file', '')
-                    name = result.pop('name', '')
-                    for Result in RESULTS:
-                        if all(result.get(f) not in {None, ''}
-                                for f in Result._fields):
-                            results[(file, name)][Result.__name__] = (
-                                results[(file, name)].get(
-                                    Result.__name__, Result())
-                                + Result(*(result[f]
-                                    for f in Result._fields)))
+                reader = csv.DictReader(f)
+                for r in reader:
+                    results.append(r)
         except FileNotFoundError:
             pass
 
-    # find previous results?
-    if args.get('diff'):
-        prev_results = co.defaultdict(lambda: {})
-        for path in args.get('csv_paths', '-'):
-            try:
-                with openio(args['diff']) as f:
-                    r = csv.DictReader(f)
-                    for result in r:
-                        file = result.pop('file', '')
-                        name = result.pop('name', '')
-                        for Result in RESULTS:
-                            if all(result.get(f) not in {None, ''}
-                                    for f in Result._fields):
-                                prev_results[(file, name)][Result.__name__] = (
-                                    prev_results[(file, name)].get(
-                                        Result.__name__, Result())
-                                    + Result(*(result[f]
-                                        for f in Result._fields)))
-            except FileNotFoundError:
-                pass
-
-    # filter our result types by results that are present
-    if 'all' in args['fields']:
-        filtered_results = RESULTS
-    else:
-        filtered_results = [
-            Result for Result in RESULTS
-            if (any(f.startswith(r)
-                    for r in args['fields']
-                    for f in Result._fields)
-                or any(Result._header.strip().startswith(r)
-                    for r in args['fields']))]
-
-    # figure out a sort key
-    if args.get('sort'):
-        key_Result = next(
-            Result for Result in RESULTS
-            if (any(f.startswith(args['sort'])
-                    for f in Result._fields)
-                or Result._header.strip().startswith(args['sort'])))
-        key = lambda result: result.get(key_Result.__name__, key_Result()).key()
-        reverse = False
-    elif args.get('reverse_sort'):
-        key_Result = next(
-            Result for Result in RESULTS
-            if (any(f.startswith(args['reverse_sort'])
-                    for f in Result._fields)
-                or Result._header.strip().startswith(args['reverse_sort'])))
-        key = lambda result: result.get(key_Result.__name__, key_Result()).key()
-        reverse = True
-    else:
-        key = lambda _: None
-        reverse = False
+    # homogenize
+    fields, types, results = homogenize(results,
+        fields=fields, merges=merges, renames=renames)
 
-    # write merged results to CSV
+    # fold to remove duplicates
+    fields, results = fold(results,
+        fields=fields, merges=merges)
+
+    # write results to CSV
     if args.get('output'):
         with openio(args['output'], 'w') as f:
-            w = csv.DictWriter(f, sum(
-                (Result._fields for Result in filtered_results),
-                ('file', 'name')))
-            w.writeheader()
-            for (file, name), result in sorted(results.items()):
-                w.writerow(ft.reduce(dict.__or__,
-                    (r._asdict() for r in result.values()),
-                    {'file': file, 'name': name}))
-
-    # print results
-    def print_header(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
+            writer = csv.DictWriter(f, fields)
+            writer.writeheader()
+            for r in results:
+                writer.writerow(r)
 
-        if not args.get('diff'):
-            print('%-36s %s' % (by,
-                ' '.join(Result._header for Result in filtered_results)))
-        else:
-            old = {entry(k) for k in results.keys()}
-            new = {entry(k) for k in prev_results.keys()}
-            print('%-36s %s' % (
-                '%s (%d added, %d removed)' % (by,
-                        sum(1 for k in new if k not in old),
-                        sum(1 for k in old if k not in new))
-                    if by else '',
-                ' '.join('%s%-10s' % (Result._header, '')
-                    for Result in filtered_results)))
-
-    def print_entries(by):
-        if by == 'total':
-            entry = lambda k: 'TOTAL'
-        elif by == 'file':
-            entry = lambda k: k[0]
-        else:
-            entry = lambda k: k[1]
-
-        entries = co.defaultdict(lambda: {})
-        for k, result in results.items():
-            entries[entry(k)] |= {
-                r.__class__.__name__: entries[entry(k)].get(
-                    r.__class__.__name__, r.__class__()) + r
-                for r in result.values()}
-
-        if not args.get('diff'):
-            for name, result in sorted(entries.items(),
-                    key=lambda p: (key(p[1]), p),
-                    reverse=reverse):
-                print('%-36s %s' % (name, ' '.join(
-                    str(result.get(Result.__name__, Result._nil))
-                    for Result in filtered_results)))
-        else:
-            prev_entries = co.defaultdict(lambda: {})
-            for k, result in prev_results.items():
-                prev_entries[entry(k)] |= {
-                    r.__class__.__name__: prev_entries[entry(k)].get(
-                        r.__class__.__name__, r.__class__()) + r
-                    for r in result.values()}
-
-            diff_entries = {
-                name: (prev_entries.get(name), entries.get(name))
-                for name in (entries.keys() | prev_entries.keys())}
-
-            for name, (old, new) in sorted(diff_entries.items(),
-                    key=lambda p: (key(p[1][1]), p)):
-                fields = []
-                changed = False
-                for Result in filtered_results:
-                    o = old.get(Result.__name__) if old is not None else None
-                    n = new.get(Result.__name__) if new is not None else None
-                    ratio = n - o if n is not None or o is not None else 0
-                    changed = changed or ratio
-                    fields.append('%s%-10s' % (
-                        n if n is not None else Result._nil,
-                        '' if not ratio
-                            else ' (+∞%)' if ratio > 0 and m.isinf(ratio)
-                            else ' (-∞%)' if ratio < 0 and m.isinf(ratio)
-                            else ' (%+.1f%%)' % (100*ratio)))
-                if changed or args.get('all'):
-                    print('%-36s %s' % (name, ' '.join(fields)))
-
-    if args.get('quiet'):
-        pass
-    elif args.get('summary'):
-        print_header('')
-        print_entries('total')
-    elif args.get('files'):
-        print_header('file')
-        print_entries('file')
-        print_entries('total')
-    else:
-        print_header('name')
-        print_entries('name')
-        print_entries('total')
+    # find previous results?
+    if args.get('diff'):
+        diff_results = []
+        try:
+            with openio(args['diff']) as f:
+                reader = csv.DictReader(f)
+                for r in reader:
+                    diff_results.append(r)
+        except FileNotFoundError:
+            pass
+
+        # homogenize
+        _, _, diff_results = homogenize(diff_results,
+            fields=fields, merges=merges, renames=renames, types=types)
+
+        # fold to remove duplicates
+        _, diff_results = fold(diff_results,
+            fields=fields, merges=merges)
+
+    # print table
+    if not args.get('quiet'):
+        table(
+            results,
+            diff_results if args.get('diff') else None,
+            fields=fields,
+            types=types,
+            merges=merges,
+            by=by,
+            **args)
 
 
 if __name__ == "__main__":
     import argparse
     import sys
     parser = argparse.ArgumentParser(
-        description="Summarize measurements")
-    parser.add_argument('csv_paths', nargs='*', default='-',
-        help="Description of where to find *.csv files. May be a directory \
-            or list of paths.")
-    parser.add_argument('-q', '--quiet', action='store_true',
+        description="Summarize measurements in CSV files.")
+    parser.add_argument(
+        'csv_paths',
+        nargs='*',
+        default=CSV_PATHS,
+        help="Description of where to find *.csv files. May be a directory "
+            "or list of paths. Defaults to %(default)r.")
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
         help="Don't show anything, useful with -o.")
-    parser.add_argument('-o', '--output',
+    parser.add_argument(
+        '-o', '--output',
         help="Specify CSV file to store results.")
-    parser.add_argument('-d', '--diff',
+    parser.add_argument(
+        '-d', '--diff',
         help="Specify CSV file to diff against.")
-    parser.add_argument('-a', '--all', action='store_true',
-        help="Show all objects, not just the ones that changed.")
-    parser.add_argument('-f', '--fields',
-        type=lambda x: set(re.split('\s*,\s*', x)),
-        default=FIELDS,
-        help="Comma separated list of fields to print, by default all fields \
-            that are found in the CSV files are printed. \"all\" prints all \
-            fields this script knows. Defaults to %r." % FIELDS)
-    parser.add_argument('-s', '--sort',
-        help="Sort by this field.")
-    parser.add_argument('-S', '--reverse-sort',
-        help="Sort by this field, but backwards.")
-    parser.add_argument('-F', '--files', action='store_true',
-        help="Show file-level calls.")
-    parser.add_argument('-Y', '--summary', action='store_true',
+    parser.add_argument(
+        '-a', '--all',
+        action='store_true',
+        help="Show all, not just the ones that changed.")
+    parser.add_argument(
+        '-p', '--percent',
+        action='store_true',
+        help="Only show percentage change, not a full diff.")
+    parser.add_argument(
+        '-f', '--fields',
+        type=lambda x: [x.strip() for x in x.split(',')],
+        help="Only show these fields. Can rename fields "
+            "with old_name=new_name.")
+    parser.add_argument(
+        '-b', '--by',
+        type=lambda x: [x.strip() for x in x.split(',')],
+        help="Group by these fields. Can rename fields "
+            "with old_name=new_name.")
+    parser.add_argument(
+        '--add',
+        type=lambda x: [x.strip() for x in x.split(',')],
+        help="Add these fields when merging.")
+    parser.add_argument(
+        '--mul',
+        type=lambda x: [x.strip() for x in x.split(',')],
+        help="Multiply these fields when merging.")
+    parser.add_argument(
+        '--min',
+        type=lambda x: [x.strip() for x in x.split(',')],
+        help="Take the minimum of these fields when merging.")
+    parser.add_argument(
+        '--max',
+        type=lambda x: [x.strip() for x in x.split(',')],
+        help="Take the maximum of these fields when merging.")
+    parser.add_argument(
+        '--avg',
+        type=lambda x: [x.strip() for x in x.split(',')],
+        help="Average these fields when merging.")
+    parser.add_argument(
+        '-s', '--sort',
+        type=lambda x: [x.strip() for x in x.split(',')],
+        help="Sort by these fields.")
+    parser.add_argument(
+        '-S', '--reverse-sort',
+        type=lambda x: [x.strip() for x in x.split(',')],
+        help="Sort by these fields, but backwards.")
+    parser.add_argument(
+        '-Y', '--summary',
+        action='store_true',
         help="Only show the totals.")
     sys.exit(main(**{k: v
         for k, v in vars(parser.parse_args()).items()