浏览代码

Added perfbd.py and block device performance sampling in bench-runner

Based loosely on Linux's perf tool, perfbd.py uses trace output with
backtraces to aggregate and show the block device usage of all functions
in a program, propagating block devices operation cost up the backtrace
for each operation.

This combined with --trace-period and --trace-freq for
sampling/filtering trace events allow the bench-runner to very
efficiently record the general cost of block device operations with very
little overhead.

Adopted this as the default side-effect of make bench, replacing
cycle-based performance measurements which are less important for
littlefs.
Christopher Haster 3 年之前
父节点
当前提交
3a33c3795b
共有 20 个文件被更改,包括 2022 次插入606 次删除
  1. 67 22
      Makefile
  2. 2 1
      bd/lfs_emubd.c
  3. 4 2
      bd/lfs_filebd.c
  4. 2 1
      bd/lfs_rambd.c
  5. 102 10
      runners/bench_runner.c
  6. 102 10
      runners/test_runner.c
  7. 36 26
      scripts/bench.py
  8. 14 31
      scripts/code.py
  9. 6 24
      scripts/cov.py
  10. 14 31
      scripts/data.py
  11. 313 297
      scripts/perf.py
  12. 1252 0
      scripts/perfbd.py
  13. 6 18
      scripts/plot.py
  14. 4 4
      scripts/prettyasserts.py
  15. 8 26
      scripts/stack.py
  16. 13 30
      scripts/struct_.py
  17. 6 23
      scripts/summary.py
  18. 7 4
      scripts/tailpipe.py
  19. 36 26
      scripts/test.py
  20. 28 20
      scripts/tracebd.py

+ 67 - 22
Makefile

@@ -41,30 +41,32 @@ TEST_SRC ?= $(SRC) \
 		$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
 		$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
 		runners/test_runner.c
 		runners/test_runner.c
 TEST_RUNNER ?= $(BUILDDIR)runners/test_runner
 TEST_RUNNER ?= $(BUILDDIR)runners/test_runner
-TEST_TC   := $(TESTS:%.toml=$(BUILDDIR)%.t.c) \
+TEST_TC    := $(TESTS:%.toml=$(BUILDDIR)%.t.c) \
 		$(TEST_SRC:%.c=$(BUILDDIR)%.t.c)
 		$(TEST_SRC:%.c=$(BUILDDIR)%.t.c)
-TEST_TAC  := $(TEST_TC:%.t.c=%.t.a.c)
-TEST_OBJ  := $(TEST_TAC:%.t.a.c=%.t.a.o)
-TEST_DEP  := $(TEST_TAC:%.t.a.c=%.t.a.d)
-TEST_CI	  := $(TEST_TAC:%.t.a.c=%.t.a.ci)
-TEST_GCNO := $(TEST_TAC:%.t.a.c=%.t.a.gcno)
-TEST_GCDA := $(TEST_TAC:%.t.a.c=%.t.a.gcda)
-TEST_PERF := $(TEST_RUNNER:%=%.perf)
+TEST_TAC   := $(TEST_TC:%.t.c=%.t.a.c)
+TEST_OBJ   := $(TEST_TAC:%.t.a.c=%.t.a.o)
+TEST_DEP   := $(TEST_TAC:%.t.a.c=%.t.a.d)
+TEST_CI	   := $(TEST_TAC:%.t.a.c=%.t.a.ci)
+TEST_GCNO  := $(TEST_TAC:%.t.a.c=%.t.a.gcno)
+TEST_GCDA  := $(TEST_TAC:%.t.a.c=%.t.a.gcda)
+TEST_PERF  := $(TEST_RUNNER:%=%.perf)
+TEST_TRACE := $(TEST_RUNNER:%=%.trace)
 
 
 BENCHES ?= $(wildcard benches/*.toml)
 BENCHES ?= $(wildcard benches/*.toml)
 BENCH_SRC ?= $(SRC) \
 BENCH_SRC ?= $(SRC) \
 		$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
 		$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
 		runners/bench_runner.c
 		runners/bench_runner.c
 BENCH_RUNNER ?= $(BUILDDIR)runners/bench_runner
 BENCH_RUNNER ?= $(BUILDDIR)runners/bench_runner
-BENCH_BC   := $(BENCHES:%.toml=$(BUILDDIR)%.b.c) \
+BENCH_BC    := $(BENCHES:%.toml=$(BUILDDIR)%.b.c) \
 		$(BENCH_SRC:%.c=$(BUILDDIR)%.b.c)
 		$(BENCH_SRC:%.c=$(BUILDDIR)%.b.c)
-BENCH_BAC  := $(BENCH_BC:%.b.c=%.b.a.c)
-BENCH_OBJ  := $(BENCH_BAC:%.b.a.c=%.b.a.o)
-BENCH_DEP  := $(BENCH_BAC:%.b.a.c=%.b.a.d)
-BENCH_CI   := $(BENCH_BAC:%.b.a.c=%.b.a.ci)
-BENCH_GCNO := $(BENCH_BAC:%.b.a.c=%.b.a.gcno)
-BENCH_GCDA := $(BENCH_BAC:%.b.a.c=%.b.a.gcda)
-BENCH_PERF := $(BENCH_RUNNER:%=%.perf)
+BENCH_BAC   := $(BENCH_BC:%.b.c=%.b.a.c)
+BENCH_OBJ   := $(BENCH_BAC:%.b.a.c=%.b.a.o)
+BENCH_DEP   := $(BENCH_BAC:%.b.a.c=%.b.a.d)
+BENCH_CI    := $(BENCH_BAC:%.b.a.c=%.b.a.ci)
+BENCH_GCNO  := $(BENCH_BAC:%.b.a.c=%.b.a.gcno)
+BENCH_GCDA  := $(BENCH_BAC:%.b.a.c=%.b.a.gcda)
+BENCH_PERF  := $(BENCH_RUNNER:%=%.perf)
+BENCH_TRACE := $(BENCH_RUNNER:%=%.trace)
 
 
 ifdef DEBUG
 ifdef DEBUG
 override CFLAGS += -O0
 override CFLAGS += -O0
@@ -85,6 +87,9 @@ endif
 ifdef YES_PERF
 ifdef YES_PERF
 override CFLAGS += -fno-omit-frame-pointer
 override CFLAGS += -fno-omit-frame-pointer
 endif
 endif
+ifdef YES_PERFBD
+override CFLAGS += -fno-omit-frame-pointer
+endif
 
 
 ifdef VERBOSE
 ifdef VERBOSE
 override CODEFLAGS   += -v
 override CODEFLAGS   += -v
@@ -93,7 +98,11 @@ override STACKFLAGS  += -v
 override STRUCTFLAGS += -v
 override STRUCTFLAGS += -v
 override COVFLAGS    += -v
 override COVFLAGS    += -v
 override PERFFLAGS   += -v
 override PERFFLAGS   += -v
+override PERFBDFLAGS += -v
 endif
 endif
+# forward -j flag
+override PERFFLAGS   += $(filter -j%,$(MAKEFLAGS))
+override PERFBDFLAGS += $(filter -j%,$(MAKEFLAGS))
 ifneq ($(NM),nm)
 ifneq ($(NM),nm)
 override CODEFLAGS += --nm-tool="$(NM)"
 override CODEFLAGS += --nm-tool="$(NM)"
 override DATAFLAGS += --nm-tool="$(NM)"
 override DATAFLAGS += --nm-tool="$(NM)"
@@ -103,6 +112,7 @@ override CODEFLAGS   += --objdump-tool="$(OBJDUMP)"
 override DATAFLAGS   += --objdump-tool="$(OBJDUMP)"
 override DATAFLAGS   += --objdump-tool="$(OBJDUMP)"
 override STRUCTFLAGS += --objdump-tool="$(OBJDUMP)"
 override STRUCTFLAGS += --objdump-tool="$(OBJDUMP)"
 override PERFFLAGS   += --objdump-tool="$(OBJDUMP)"
 override PERFFLAGS   += --objdump-tool="$(OBJDUMP)"
+override PERFBDFLAGS += --objdump-tool="$(OBJDUMP)"
 endif
 endif
 ifneq ($(PERF),perf)
 ifneq ($(PERF),perf)
 override PERFFLAGS += --perf-tool="$(PERF)"
 override PERFFLAGS += --perf-tool="$(PERF)"
@@ -114,10 +124,14 @@ override BENCHFLAGS += -b
 override TESTFLAGS  += $(filter -j%,$(MAKEFLAGS))
 override TESTFLAGS  += $(filter -j%,$(MAKEFLAGS))
 override BENCHFLAGS += $(filter -j%,$(MAKEFLAGS))
 override BENCHFLAGS += $(filter -j%,$(MAKEFLAGS))
 ifdef YES_PERF
 ifdef YES_PERF
-override TESTFLAGS += --perf=$(TEST_PERF)
+override TESTFLAGS  += -p$(TEST_PERF)
+override BENCHFLAGS += -p$(BENCH_PERF)
+endif
+ifdef YES_PERFBD
+override TESTFLAGS  += -t$(TEST_TRACE) --trace-backtrace --trace-freq=100
 endif
 endif
-ifndef NO_PERF
-override BENCHFLAGS += --perf=$(BENCH_PERF)
+ifndef NO_PERFBD
+override BENCHFLAGS  += -t$(BENCH_TRACE) --trace-backtrace --trace-freq=100
 endif
 endif
 ifdef VERBOSE
 ifdef VERBOSE
 override TESTFLAGS   += -v
 override TESTFLAGS   += -v
@@ -165,6 +179,11 @@ endif
 ifdef YES_PERF
 ifdef YES_PERF
 bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
 bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
 endif
 endif
+ifdef YES_PERFBD
+bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
+endif
+# note we remove some binary dependent files during compilation,
+# otherwise it's way to easy to end up with outdated results
 test-runner build-test: $(TEST_RUNNER)
 test-runner build-test: $(TEST_RUNNER)
 ifndef NO_COV
 ifndef NO_COV
 	rm -f $(TEST_GCDA)
 	rm -f $(TEST_GCDA)
@@ -172,6 +191,9 @@ endif
 ifdef YES_PERF
 ifdef YES_PERF
 	rm -f $(TEST_PERF)
 	rm -f $(TEST_PERF)
 endif
 endif
+ifdef YES_PERFBD
+	rm -f $(TEST_TRACE)
+endif
 
 
 .PHONY: test
 .PHONY: test
 test: test-runner
 test: test-runner
@@ -185,16 +207,24 @@ test-list: test-runner
 ifdef YES_COV
 ifdef YES_COV
 bench-runner build-bench: override CFLAGS+=--coverage
 bench-runner build-bench: override CFLAGS+=--coverage
 endif
 endif
-ifndef NO_PERF
+ifdef YES_PERF
+bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
+endif
+ifndef NO_PERFBD
 bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
 bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
 endif
 endif
+# note we remove some binary dependent files during compilation,
+# otherwise it's way to easy to end up with outdated results
 bench-runner build-bench: $(BENCH_RUNNER)
 bench-runner build-bench: $(BENCH_RUNNER)
 ifdef YES_COV 
 ifdef YES_COV 
 	rm -f $(BENCH_GCDA)
 	rm -f $(BENCH_GCDA)
 endif
 endif
-ifndef NO_PERF
+ifdef YES_PERF
 	rm -f $(BENCH_PERF)
 	rm -f $(BENCH_PERF)
 endif
 endif
+ifndef NO_PERFBD
+	rm -f $(BENCH_TRACE)
+endif
 
 
 .PHONY: bench
 .PHONY: bench
 bench: bench-runner
 bench: bench-runner
@@ -234,6 +264,13 @@ perf: $(BENCH_PERF)
 		-Scycles \
 		-Scycles \
 		$(PERFFLAGS))
 		$(PERFFLAGS))
 
 
+.PHONY: perfbd
+perfbd: $(BENCH_TRACE)
+	$(strip ./scripts/perfbd.py \
+		$(BENCH_RUNNER) $^ $(patsubst %,-F%,$(SRC)) \
+		-Serased -Sproged -Sreaded \
+		$(PERFBDFLAGS))
+
 .PHONY: summary sizes
 .PHONY: summary sizes
 summary sizes: $(BUILDDIR)lfs.csv
 summary sizes: $(BUILDDIR)lfs.csv
 	$(strip ./scripts/summary.py -Y $^ \
 	$(strip ./scripts/summary.py -Y $^ \
@@ -275,6 +312,11 @@ $(BUILDDIR)lfs.cov.csv: $(GCDA)
 $(BUILDDIR)lfs.perf.csv: $(BENCH_PERF)
 $(BUILDDIR)lfs.perf.csv: $(BENCH_PERF)
 	./scripts/perf.py $^ $(patsubst %,-F%,$(SRC)) -q $(PERFFLAGS) -o $@
 	./scripts/perf.py $^ $(patsubst %,-F%,$(SRC)) -q $(PERFFLAGS) -o $@
 
 
+$(BUILDDIR)lfs.perfbd.csv: $(BENCH_TRACE)
+	$(strip ./scripts/perfbd.py \
+		$(BENCH_RUNNER) $^ $(patsubst %,-F%,$(SRC)) \
+		-q $(PERFBDFLAGS) -o $@)
+
 $(BUILDDIR)lfs.csv: \
 $(BUILDDIR)lfs.csv: \
 		$(BUILDDIR)lfs.code.csv \
 		$(BUILDDIR)lfs.code.csv \
 		$(BUILDDIR)lfs.data.csv \
 		$(BUILDDIR)lfs.data.csv \
@@ -326,7 +368,8 @@ clean:
 		$(BUILDDIR)lfs.stack.csv \
 		$(BUILDDIR)lfs.stack.csv \
 		$(BUILDDIR)lfs.struct.csv \
 		$(BUILDDIR)lfs.struct.csv \
 		$(BUILDDIR)lfs.cov.csv \
 		$(BUILDDIR)lfs.cov.csv \
-		$(BUILDDIR)lfs.perf.csv)
+		$(BUILDDIR)lfs.perf.csv \
+		$(BUILDDIR)lfs.perfbd.csv)
 	rm -f $(OBJ)
 	rm -f $(OBJ)
 	rm -f $(DEP)
 	rm -f $(DEP)
 	rm -f $(ASM)
 	rm -f $(ASM)
@@ -340,6 +383,7 @@ clean:
 	rm -f $(TEST_GCNO)
 	rm -f $(TEST_GCNO)
 	rm -f $(TEST_GCDA)
 	rm -f $(TEST_GCDA)
 	rm -f $(TEST_PERF)
 	rm -f $(TEST_PERF)
+	rm -f $(TEST_TRACE)
 	rm -f $(BENCH_RUNNER)
 	rm -f $(BENCH_RUNNER)
 	rm -f $(BENCH_BC)
 	rm -f $(BENCH_BC)
 	rm -f $(BENCH_BAC)
 	rm -f $(BENCH_BAC)
@@ -349,3 +393,4 @@ clean:
 	rm -f $(BENCH_GCNO)
 	rm -f $(BENCH_GCNO)
 	rm -f $(BENCH_GCDA)
 	rm -f $(BENCH_GCDA)
 	rm -f $(BENCH_PERF)
 	rm -f $(BENCH_PERF)
+	rm -f $(BENCH_TRACE)

+ 2 - 1
bd/lfs_emubd.c

@@ -358,7 +358,8 @@ int lfs_emubd_prog(const struct lfs_config *cfg, lfs_block_t block,
 }
 }
 
 
 int lfs_emubd_erase(const struct lfs_config *cfg, lfs_block_t block) {
 int lfs_emubd_erase(const struct lfs_config *cfg, lfs_block_t block) {
-    LFS_EMUBD_TRACE("lfs_emubd_erase(%p, 0x%"PRIx32")", (void*)cfg, block);
+    LFS_EMUBD_TRACE("lfs_emubd_erase(%p, 0x%"PRIx32" (%"PRIu32"))",
+            (void*)cfg, block, cfg->block_size);
     lfs_emubd_t *bd = cfg->context;
     lfs_emubd_t *bd = cfg->context;
 
 
     // check if erase is valid
     // check if erase is valid

+ 4 - 2
bd/lfs_filebd.c

@@ -96,7 +96,8 @@ int lfs_filebd_read(const struct lfs_config *cfg, lfs_block_t block,
 
 
 int lfs_filebd_prog(const struct lfs_config *cfg, lfs_block_t block,
 int lfs_filebd_prog(const struct lfs_config *cfg, lfs_block_t block,
         lfs_off_t off, const void *buffer, lfs_size_t size) {
         lfs_off_t off, const void *buffer, lfs_size_t size) {
-    LFS_FILEBD_TRACE("lfs_filebd_prog(%p, 0x%"PRIx32", %"PRIu32", %p, %"PRIu32")",
+    LFS_FILEBD_TRACE("lfs_filebd_prog(%p, "
+                "0x%"PRIx32", %"PRIu32", %p, %"PRIu32")",
             (void*)cfg, block, off, buffer, size);
             (void*)cfg, block, off, buffer, size);
     lfs_filebd_t *bd = cfg->context;
     lfs_filebd_t *bd = cfg->context;
 
 
@@ -127,7 +128,8 @@ int lfs_filebd_prog(const struct lfs_config *cfg, lfs_block_t block,
 }
 }
 
 
 int lfs_filebd_erase(const struct lfs_config *cfg, lfs_block_t block) {
 int lfs_filebd_erase(const struct lfs_config *cfg, lfs_block_t block) {
-    LFS_FILEBD_TRACE("lfs_filebd_erase(%p, 0x%"PRIx32")", (void*)cfg, block);
+    LFS_FILEBD_TRACE("lfs_filebd_erase(%p, 0x%"PRIx32" (%"PRIu32"))",
+            (void*)cfg, block, cfg->block_size);
 
 
     // check if erase is valid
     // check if erase is valid
     LFS_ASSERT(block < cfg->block_count);
     LFS_ASSERT(block < cfg->block_count);

+ 2 - 1
bd/lfs_rambd.c

@@ -107,7 +107,8 @@ int lfs_rambd_prog(const struct lfs_config *cfg, lfs_block_t block,
 }
 }
 
 
 int lfs_rambd_erase(const struct lfs_config *cfg, lfs_block_t block) {
 int lfs_rambd_erase(const struct lfs_config *cfg, lfs_block_t block) {
-    LFS_RAMBD_TRACE("lfs_rambd_erase(%p, 0x%"PRIx32")", (void*)cfg, block);
+    LFS_RAMBD_TRACE("lfs_rambd_erase(%p, 0x%"PRIx32" (%"PRIu32"))",
+            (void*)cfg, block, cfg->block_size);
 
 
     // check if erase is valid
     // check if erase is valid
     LFS_ASSERT(block < cfg->block_count);
     LFS_ASSERT(block < cfg->block_count);

+ 102 - 10
runners/bench_runner.c

@@ -14,6 +14,8 @@
 #include <stdarg.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <unistd.h>
+#include <execinfo.h>
+#include <time.h>
 
 
 
 
 // some helpers
 // some helpers
@@ -405,26 +407,63 @@ size_t bench_step_step = 1;
 
 
 const char *bench_disk_path = NULL;
 const char *bench_disk_path = NULL;
 const char *bench_trace_path = NULL;
 const char *bench_trace_path = NULL;
+bool bench_trace_backtrace = false;
+uint32_t bench_trace_period = 0;
+uint32_t bench_trace_freq = 0;
 FILE *bench_trace_file = NULL;
 FILE *bench_trace_file = NULL;
 uint32_t bench_trace_cycles = 0;
 uint32_t bench_trace_cycles = 0;
+uint64_t bench_trace_time = 0;
+uint64_t bench_trace_open_time = 0;
 lfs_emubd_sleep_t bench_read_sleep = 0.0;
 lfs_emubd_sleep_t bench_read_sleep = 0.0;
 lfs_emubd_sleep_t bench_prog_sleep = 0.0;
 lfs_emubd_sleep_t bench_prog_sleep = 0.0;
 lfs_emubd_sleep_t bench_erase_sleep = 0.0;
 lfs_emubd_sleep_t bench_erase_sleep = 0.0;
 
 
+// this determines both the backtrace buffer and the trace printf buffer, if
+// trace ends up interleaved or truncated this may need to be increased
+#ifndef BENCH_TRACE_BACKTRACE_BUFFER_SIZE
+#define BENCH_TRACE_BACKTRACE_BUFFER_SIZE 8192
+#endif
+void *bench_trace_backtrace_buffer[
+    BENCH_TRACE_BACKTRACE_BUFFER_SIZE / sizeof(void*)];
 
 
 // trace printing
 // trace printing
 void bench_trace(const char *fmt, ...) {
 void bench_trace(const char *fmt, ...) {
     if (bench_trace_path) {
     if (bench_trace_path) {
+        // sample at a specific period?
+        if (bench_trace_period) {
+            if (bench_trace_cycles % bench_trace_period != 0) {
+                bench_trace_cycles += 1;
+                return;
+            }
+            bench_trace_cycles += 1;
+        }
+
+        // sample at a specific frequency?
+        if (bench_trace_freq) {
+            struct timespec t;
+            clock_gettime(CLOCK_MONOTONIC, &t);
+            uint64_t now = (uint64_t)t.tv_sec*1000*1000*1000
+                    + (uint64_t)t.tv_nsec;
+            if (now - bench_trace_time < (1000*1000*1000) / bench_trace_freq) {
+                return;
+            }
+            bench_trace_time = now;
+        }
+
         if (!bench_trace_file) {
         if (!bench_trace_file) {
             // Tracing output is heavy and trying to open every trace
             // Tracing output is heavy and trying to open every trace
             // call is slow, so we only try to open the trace file every
             // call is slow, so we only try to open the trace file every
             // so often. Note this doesn't affect successfully opened files
             // so often. Note this doesn't affect successfully opened files
-            if (bench_trace_cycles % 128 != 0) {
-                bench_trace_cycles += 1;
+            struct timespec t;
+            clock_gettime(CLOCK_MONOTONIC, &t);
+            uint64_t now = (uint64_t)t.tv_sec*1000*1000*1000
+                    + (uint64_t)t.tv_nsec;
+            if (now - bench_trace_open_time < 100*1000*1000) {
                 return;
                 return;
             }
             }
-            bench_trace_cycles += 1;
+            bench_trace_open_time = now;
 
 
+            // try to open the trace file
             int fd;
             int fd;
             if (strcmp(bench_trace_path, "-") == 0) {
             if (strcmp(bench_trace_path, "-") == 0) {
                 fd = dup(1);
                 fd = dup(1);
@@ -445,19 +484,42 @@ void bench_trace(const char *fmt, ...) {
 
 
             FILE *f = fdopen(fd, "a");
             FILE *f = fdopen(fd, "a");
             assert(f);
             assert(f);
-            int err = setvbuf(f, NULL, _IOLBF, BUFSIZ);
+            int err = setvbuf(f, NULL, _IOFBF,
+                    BENCH_TRACE_BACKTRACE_BUFFER_SIZE);
             assert(!err);
             assert(!err);
             bench_trace_file = f;
             bench_trace_file = f;
         }
         }
 
 
+        // print trace
         va_list va;
         va_list va;
         va_start(va, fmt);
         va_start(va, fmt);
         int res = vfprintf(bench_trace_file, fmt, va);
         int res = vfprintf(bench_trace_file, fmt, va);
+        va_end(va);
         if (res < 0) {
         if (res < 0) {
             fclose(bench_trace_file);
             fclose(bench_trace_file);
             bench_trace_file = NULL;
             bench_trace_file = NULL;
+            return;
         }
         }
-        va_end(va);
+
+        if (bench_trace_backtrace) {
+            // print backtrace
+            size_t count = backtrace(
+                    bench_trace_backtrace_buffer,
+                    BENCH_TRACE_BACKTRACE_BUFFER_SIZE);
+            // note we skip our own stack frame
+            for (size_t i = 1; i < count; i++) {
+                res = fprintf(bench_trace_file, "\tat %p\n",
+                        bench_trace_backtrace_buffer[i]);
+                if (res < 0) {
+                    fclose(bench_trace_file);
+                    bench_trace_file = NULL;
+                    return;
+                }
+            }
+        }
+
+        // flush immediately
+        fflush(bench_trace_file);
     }
     }
 }
 }
 
 
@@ -1312,9 +1374,12 @@ enum opt_flags {
     OPT_STEP                     = 's',
     OPT_STEP                     = 's',
     OPT_DISK                     = 'd',
     OPT_DISK                     = 'd',
     OPT_TRACE                    = 't',
     OPT_TRACE                    = 't',
-    OPT_READ_SLEEP               = 7,
-    OPT_PROG_SLEEP               = 8,
-    OPT_ERASE_SLEEP              = 9,
+    OPT_TRACE_BACKTRACE          = 7,
+    OPT_TRACE_PERIOD             = 8,
+    OPT_TRACE_FREQ               = 9,
+    OPT_READ_SLEEP               = 10,
+    OPT_PROG_SLEEP               = 11,
+    OPT_ERASE_SLEEP              = 12,
 };
 };
 
 
 const char *short_opts = "hYlLD:G:s:d:t:";
 const char *short_opts = "hYlLD:G:s:d:t:";
@@ -1337,6 +1402,9 @@ const struct option long_opts[] = {
     {"step",             required_argument, NULL, OPT_STEP},
     {"step",             required_argument, NULL, OPT_STEP},
     {"disk",             required_argument, NULL, OPT_DISK},
     {"disk",             required_argument, NULL, OPT_DISK},
     {"trace",            required_argument, NULL, OPT_TRACE},
     {"trace",            required_argument, NULL, OPT_TRACE},
+    {"trace-backtrace",  no_argument,       NULL, OPT_TRACE_BACKTRACE},
+    {"trace-period",     required_argument, NULL, OPT_TRACE_PERIOD},
+    {"trace-freq",       required_argument, NULL, OPT_TRACE_FREQ},
     {"read-sleep",       required_argument, NULL, OPT_READ_SLEEP},
     {"read-sleep",       required_argument, NULL, OPT_READ_SLEEP},
     {"prog-sleep",       required_argument, NULL, OPT_PROG_SLEEP},
     {"prog-sleep",       required_argument, NULL, OPT_PROG_SLEEP},
     {"erase-sleep",      required_argument, NULL, OPT_ERASE_SLEEP},
     {"erase-sleep",      required_argument, NULL, OPT_ERASE_SLEEP},
@@ -1357,8 +1425,11 @@ const char *const help_text[] = {
     "Override a bench define.",
     "Override a bench define.",
     "Comma-separated list of disk geometries to bench.",
     "Comma-separated list of disk geometries to bench.",
     "Comma-separated range of bench permutations to run (start,stop,step).",
     "Comma-separated range of bench permutations to run (start,stop,step).",
-    "Redirect block device operations to this file.",
-    "Redirect trace output to this file.",
+    "Direct block device operations to this file.",
+    "Direct trace output to this file.",
+    "Include a backtrace with every trace statement.",
+    "Sample trace output at this period in cycles.",
+    "Sample trace output at this frequency in hz.",
     "Artificial read delay in seconds.",
     "Artificial read delay in seconds.",
     "Artificial prog delay in seconds.",
     "Artificial prog delay in seconds.",
     "Artificial erase delay in seconds.",
     "Artificial erase delay in seconds.",
@@ -1790,6 +1861,27 @@ step_unknown:
             case OPT_TRACE:
             case OPT_TRACE:
                 bench_trace_path = optarg;
                 bench_trace_path = optarg;
                 break;
                 break;
+            case OPT_TRACE_BACKTRACE:
+                bench_trace_backtrace = true;
+                break;
+            case OPT_TRACE_PERIOD: {
+                char *parsed = NULL;
+                bench_trace_period = strtoumax(optarg, &parsed, 0);
+                if (parsed == optarg) {
+                    fprintf(stderr, "error: invalid trace-period: %s\n", optarg);
+                    exit(-1);
+                }
+                break;
+            }
+            case OPT_TRACE_FREQ: {
+                char *parsed = NULL;
+                bench_trace_freq = strtoumax(optarg, &parsed, 0);
+                if (parsed == optarg) {
+                    fprintf(stderr, "error: invalid trace-freq: %s\n", optarg);
+                    exit(-1);
+                }
+                break;
+            }
             case OPT_READ_SLEEP: {
             case OPT_READ_SLEEP: {
                 char *parsed = NULL;
                 char *parsed = NULL;
                 double read_sleep = strtod(optarg, &parsed);
                 double read_sleep = strtod(optarg, &parsed);

+ 102 - 10
runners/test_runner.c

@@ -14,6 +14,8 @@
 #include <stdarg.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <unistd.h>
+#include <time.h>
+#include <execinfo.h>
 
 
 
 
 // some helpers
 // some helpers
@@ -421,26 +423,63 @@ size_t test_step_step = 1;
 
 
 const char *test_disk_path = NULL;
 const char *test_disk_path = NULL;
 const char *test_trace_path = NULL;
 const char *test_trace_path = NULL;
+bool test_trace_backtrace = false;
+uint32_t test_trace_period = 0;
+uint32_t test_trace_freq = 0;
 FILE *test_trace_file = NULL;
 FILE *test_trace_file = NULL;
 uint32_t test_trace_cycles = 0;
 uint32_t test_trace_cycles = 0;
+uint64_t test_trace_time = 0;
+uint64_t test_trace_open_time = 0;
 lfs_emubd_sleep_t test_read_sleep = 0.0;
 lfs_emubd_sleep_t test_read_sleep = 0.0;
 lfs_emubd_sleep_t test_prog_sleep = 0.0;
 lfs_emubd_sleep_t test_prog_sleep = 0.0;
 lfs_emubd_sleep_t test_erase_sleep = 0.0;
 lfs_emubd_sleep_t test_erase_sleep = 0.0;
 
 
+// this determines both the backtrace buffer and the trace printf buffer, if
+// trace ends up interleaved or truncated this may need to be increased
+#ifndef TEST_TRACE_BACKTRACE_BUFFER_SIZE
+#define TEST_TRACE_BACKTRACE_BUFFER_SIZE 8192
+#endif
+void *test_trace_backtrace_buffer[
+    TEST_TRACE_BACKTRACE_BUFFER_SIZE / sizeof(void*)];
 
 
 // trace printing
 // trace printing
 void test_trace(const char *fmt, ...) {
 void test_trace(const char *fmt, ...) {
     if (test_trace_path) {
     if (test_trace_path) {
+        // sample at a specific period?
+        if (test_trace_period) {
+            if (test_trace_cycles % test_trace_period != 0) {
+                test_trace_cycles += 1;
+                return;
+            }
+            test_trace_cycles += 1;
+        }
+
+        // sample at a specific frequency?
+        if (test_trace_freq) {
+            struct timespec t;
+            clock_gettime(CLOCK_MONOTONIC, &t);
+            uint64_t now = (uint64_t)t.tv_sec*1000*1000*1000
+                    + (uint64_t)t.tv_nsec;
+            if (now - test_trace_time < (1000*1000*1000) / test_trace_freq) {
+                return;
+            }
+            test_trace_time = now;
+        }
+
         if (!test_trace_file) {
         if (!test_trace_file) {
             // Tracing output is heavy and trying to open every trace
             // Tracing output is heavy and trying to open every trace
             // call is slow, so we only try to open the trace file every
             // call is slow, so we only try to open the trace file every
             // so often. Note this doesn't affect successfully opened files
             // so often. Note this doesn't affect successfully opened files
-            if (test_trace_cycles % 128 != 0) {
-                test_trace_cycles += 1;
+            struct timespec t;
+            clock_gettime(CLOCK_MONOTONIC, &t);
+            uint64_t now = (uint64_t)t.tv_sec*1000*1000*1000
+                    + (uint64_t)t.tv_nsec;
+            if (now - test_trace_open_time < 100*1000*1000) {
                 return;
                 return;
             }
             }
-            test_trace_cycles += 1;
+            test_trace_open_time = now;
 
 
+            // try to open the trace file
             int fd;
             int fd;
             if (strcmp(test_trace_path, "-") == 0) {
             if (strcmp(test_trace_path, "-") == 0) {
                 fd = dup(1);
                 fd = dup(1);
@@ -461,19 +500,42 @@ void test_trace(const char *fmt, ...) {
 
 
             FILE *f = fdopen(fd, "a");
             FILE *f = fdopen(fd, "a");
             assert(f);
             assert(f);
-            int err = setvbuf(f, NULL, _IOLBF, BUFSIZ);
+            int err = setvbuf(f, NULL, _IOFBF,
+                    TEST_TRACE_BACKTRACE_BUFFER_SIZE);
             assert(!err);
             assert(!err);
             test_trace_file = f;
             test_trace_file = f;
         }
         }
 
 
+        // print trace
         va_list va;
         va_list va;
         va_start(va, fmt);
         va_start(va, fmt);
         int res = vfprintf(test_trace_file, fmt, va);
         int res = vfprintf(test_trace_file, fmt, va);
+        va_end(va);
         if (res < 0) {
         if (res < 0) {
             fclose(test_trace_file);
             fclose(test_trace_file);
             test_trace_file = NULL;
             test_trace_file = NULL;
+            return;
         }
         }
-        va_end(va);
+
+        if (test_trace_backtrace) {
+            // print backtrace
+            size_t count = backtrace(
+                    test_trace_backtrace_buffer,
+                    TEST_TRACE_BACKTRACE_BUFFER_SIZE);
+            // note we skip our own stack frame
+            for (size_t i = 1; i < count; i++) {
+                res = fprintf(test_trace_file, "\tat %p\n",
+                        test_trace_backtrace_buffer[i]);
+                if (res < 0) {
+                    fclose(test_trace_file);
+                    test_trace_file = NULL;
+                    return;
+                }
+            }
+        }
+
+        // flush immediately
+        fflush(test_trace_file);
     }
     }
 }
 }
 
 
@@ -1838,9 +1900,12 @@ enum opt_flags {
     OPT_STEP                     = 's',
     OPT_STEP                     = 's',
     OPT_DISK                     = 'd',
     OPT_DISK                     = 'd',
     OPT_TRACE                    = 't',
     OPT_TRACE                    = 't',
-    OPT_READ_SLEEP               = 8,
-    OPT_PROG_SLEEP               = 9,
-    OPT_ERASE_SLEEP              = 10,
+    OPT_TRACE_BACKTRACE          = 8,
+    OPT_TRACE_PERIOD             = 9,
+    OPT_TRACE_FREQ               = 10,
+    OPT_READ_SLEEP               = 11,
+    OPT_PROG_SLEEP               = 12,
+    OPT_ERASE_SLEEP              = 13,
 };
 };
 
 
 const char *short_opts = "hYlLD:G:P:s:d:t:";
 const char *short_opts = "hYlLD:G:P:s:d:t:";
@@ -1865,6 +1930,9 @@ const struct option long_opts[] = {
     {"step",             required_argument, NULL, OPT_STEP},
     {"step",             required_argument, NULL, OPT_STEP},
     {"disk",             required_argument, NULL, OPT_DISK},
     {"disk",             required_argument, NULL, OPT_DISK},
     {"trace",            required_argument, NULL, OPT_TRACE},
     {"trace",            required_argument, NULL, OPT_TRACE},
+    {"trace-backtrace",  no_argument,       NULL, OPT_TRACE_BACKTRACE},
+    {"trace-period",     required_argument, NULL, OPT_TRACE_PERIOD},
+    {"trace-freq",       required_argument, NULL, OPT_TRACE_FREQ},
     {"read-sleep",       required_argument, NULL, OPT_READ_SLEEP},
     {"read-sleep",       required_argument, NULL, OPT_READ_SLEEP},
     {"prog-sleep",       required_argument, NULL, OPT_PROG_SLEEP},
     {"prog-sleep",       required_argument, NULL, OPT_PROG_SLEEP},
     {"erase-sleep",      required_argument, NULL, OPT_ERASE_SLEEP},
     {"erase-sleep",      required_argument, NULL, OPT_ERASE_SLEEP},
@@ -1887,8 +1955,11 @@ const char *const help_text[] = {
     "Comma-separated list of disk geometries to test.",
     "Comma-separated list of disk geometries to test.",
     "Comma-separated list of power-loss scenarios to test.",
     "Comma-separated list of power-loss scenarios to test.",
     "Comma-separated range of test permutations to run (start,stop,step).",
     "Comma-separated range of test permutations to run (start,stop,step).",
-    "Redirect block device operations to this file.",
-    "Redirect trace output to this file.",
+    "Direct block device operations to this file.",
+    "Direct trace output to this file.",
+    "Include a backtrace with every trace statement.",
+    "Sample trace output at this period in cycles.",
+    "Sample trace output at this frequency in hz.",
     "Artificial read delay in seconds.",
     "Artificial read delay in seconds.",
     "Artificial prog delay in seconds.",
     "Artificial prog delay in seconds.",
     "Artificial erase delay in seconds.",
     "Artificial erase delay in seconds.",
@@ -2460,6 +2531,27 @@ step_unknown:
             case OPT_TRACE:
             case OPT_TRACE:
                 test_trace_path = optarg;
                 test_trace_path = optarg;
                 break;
                 break;
+            case OPT_TRACE_BACKTRACE:
+                test_trace_backtrace = true;
+                break;
+            case OPT_TRACE_PERIOD: {
+                char *parsed = NULL;
+                test_trace_period = strtoumax(optarg, &parsed, 0);
+                if (parsed == optarg) {
+                    fprintf(stderr, "error: invalid trace-period: %s\n", optarg);
+                    exit(-1);
+                }
+                break;
+            }
+            case OPT_TRACE_FREQ: {
+                char *parsed = NULL;
+                test_trace_freq = strtoumax(optarg, &parsed, 0);
+                if (parsed == optarg) {
+                    fprintf(stderr, "error: invalid trace-freq: %s\n", optarg);
+                    exit(-1);
+                }
+                break;
+            }
             case OPT_READ_SLEEP: {
             case OPT_READ_SLEEP: {
                 char *parsed = NULL;
                 char *parsed = NULL;
                 double read_sleep = strtod(optarg, &parsed);
                 double read_sleep = strtod(optarg, &parsed);

+ 36 - 26
scripts/bench.py

@@ -35,19 +35,12 @@ VALGRIND_TOOL = ['valgrind']
 PERF_SCRIPT = ['./scripts/perf.py']
 PERF_SCRIPT = ['./scripts/perf.py']
 
 
 
 
-def openio(path, mode='r', buffering=-1, nb=False):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r', buffering)
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w', buffering)
-    elif nb and 'a' in mode:
-        return os.fdopen(os.open(
-                path,
-                os.O_WRONLY | os.O_CREAT | os.O_APPEND | os.O_NONBLOCK,
-                0o666),
-            mode,
-            buffering)
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
         return open(path, mode, buffering)
         return open(path, mode, buffering)
 
 
@@ -533,6 +526,12 @@ def find_runner(runner, **args):
         cmd.append('-d%s' % args['disk'])
         cmd.append('-d%s' % args['disk'])
     if args.get('trace'):
     if args.get('trace'):
         cmd.append('-t%s' % args['trace'])
         cmd.append('-t%s' % args['trace'])
+    if args.get('trace_backtrace'):
+        cmd.append('--trace-backtrace')
+    if args.get('trace_period'):
+        cmd.append('--trace-period=%s' % args['trace_period'])
+    if args.get('trace_freq'):
+        cmd.append('--trace-freq=%s' % args['trace_freq'])
     if args.get('read_sleep'):
     if args.get('read_sleep'):
         cmd.append('--read-sleep=%s' % args['read_sleep'])
         cmd.append('--read-sleep=%s' % args['read_sleep'])
     if args.get('prog_sleep'):
     if args.get('prog_sleep'):
@@ -747,7 +746,7 @@ class BenchFailure(Exception):
         self.stdout = stdout
         self.stdout = stdout
         self.assert_ = assert_
         self.assert_ = assert_
 
 
-def run_stage(name, runner_, ids, output_, **args):
+def run_stage(name, runner_, ids, stdout_, trace_, output_, **args):
     # get expected suite/case/perm counts
     # get expected suite/case/perm counts
     (case_suites,
     (case_suites,
         expected_suite_perms,
         expected_suite_perms,
@@ -795,7 +794,6 @@ def run_stage(name, runner_, ids, output_, **args):
         os.close(spty)
         os.close(spty)
         children.add(proc)
         children.add(proc)
         mpty = os.fdopen(mpty, 'r', 1)
         mpty = os.fdopen(mpty, 'r', 1)
-        stdout = None
 
 
         last_id = None
         last_id = None
         last_stdout = []
         last_stdout = []
@@ -812,18 +810,12 @@ def run_stage(name, runner_, ids, output_, **args):
                 if not line:
                 if not line:
                     break
                     break
                 last_stdout.append(line)
                 last_stdout.append(line)
-                if args.get('stdout'):
+                if stdout_:
                     try:
                     try:
-                        if not stdout:
-                            stdout = openio(args['stdout'], 'a', 1, nb=True)
-                        stdout.write(line)
-                    except OSError as e:
-                        if e.errno not in [
-                                errno.ENXIO,
-                                errno.EPIPE,
-                                errno.EAGAIN]:
-                            raise
-                        stdout = None
+                        stdout_.write(line)
+                        stdout_.flush()
+                    except BrokenPipeError:
+                        pass
                 if args.get('verbose'):
                 if args.get('verbose'):
                     sys.stdout.write(line)
                     sys.stdout.write(line)
 
 
@@ -1061,6 +1053,8 @@ def run(runner, bench_ids=[], **args):
                 by or 'benches',
                 by or 'benches',
                 runner_,
                 runner_,
                 [by] if by is not None else bench_ids,
                 [by] if by is not None else bench_ids,
+                stdout,
+                trace,
                 output,
                 output,
                 **args)
                 **args)
         # collect passes/failures
         # collect passes/failures
@@ -1076,9 +1070,15 @@ def run(runner, bench_ids=[], **args):
     stop = time.time()
     stop = time.time()
 
 
     if stdout:
     if stdout:
-        stdout.close()
+        try:
+            stdout.close()
+        except BrokenPipeError:
+            pass
     if trace:
     if trace:
-        trace.close()
+        try:
+            trace.close()
+        except BrokenPipeError:
+            pass
     if output:
     if output:
         output.close()
         output.close()
 
 
@@ -1276,6 +1276,16 @@ if __name__ == "__main__":
     bench_parser.add_argument(
     bench_parser.add_argument(
         '-t', '--trace',
         '-t', '--trace',
         help="Direct trace output to this file.")
         help="Direct trace output to this file.")
+    bench_parser.add_argument(
+        '--trace-backtrace',
+        action='store_true',
+        help="Include a backtrace with every trace statement.")
+    bench_parser.add_argument(
+        '--trace-period',
+        help="Sample trace output at this period in cycles.")
+    bench_parser.add_argument(
+        '--trace-freq',
+        help="Sample trace output at this frequency in hz.")
     bench_parser.add_argument(
     bench_parser.add_argument(
         '-O', '--stdout',
         '-O', '--stdout',
         help="Direct stdout to this file. Note stderr is already merged here.")
         help="Direct stdout to this file. Note stderr is already merged here.")
@@ -1353,7 +1363,7 @@ if __name__ == "__main__":
         default=VALGRIND_TOOL,
         default=VALGRIND_TOOL,
         help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL)
         help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL)
     bench_parser.add_argument(
     bench_parser.add_argument(
-        '--perf',
+        '-p', '--perf',
         help="Run under Linux's perf to sample performance counters, writing "
         help="Run under Linux's perf to sample performance counters, writing "
             "samples to this file.")
             "samples to this file.")
     bench_parser.add_argument(
     bench_parser.add_argument(

+ 14 - 31
scripts/code.py

@@ -15,7 +15,6 @@
 import collections as co
 import collections as co
 import csv
 import csv
 import difflib
 import difflib
-import glob
 import itertools as it
 import itertools as it
 import math as m
 import math as m
 import os
 import os
@@ -24,7 +23,6 @@ import shlex
 import subprocess as sp
 import subprocess as sp
 
 
 
 
-OBJ_PATHS = ['*.o']
 NM_TOOL = ['nm']
 NM_TOOL = ['nm']
 NM_TYPES = 'tTrRdD'
 NM_TYPES = 'tTrRdD'
 OBJDUMP_TOOL = ['objdump']
 OBJDUMP_TOOL = ['objdump']
@@ -126,16 +124,16 @@ class CodeResult(co.namedtuple('CodeResult', [
             self.size + other.size)
             self.size + other.size)
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
-def collect(paths, *,
+def collect(obj_paths, *,
         nm_tool=NM_TOOL,
         nm_tool=NM_TOOL,
         nm_types=NM_TYPES,
         nm_types=NM_TYPES,
         objdump_tool=OBJDUMP_TOOL,
         objdump_tool=OBJDUMP_TOOL,
@@ -147,17 +145,17 @@ def collect(paths, *,
         ' (?P<type>[%s])' % re.escape(nm_types) +
         ' (?P<type>[%s])' % re.escape(nm_types) +
         ' (?P<func>.+?)$')
         ' (?P<func>.+?)$')
     line_pattern = re.compile(
     line_pattern = re.compile(
-        '^\s+(?P<no>[0-9]+)\s+'
-            '(?:(?P<dir>[0-9]+)\s+)?'
-            '.*\s+'
-            '(?P<path>[^\s]+)$')
+        '^\s+(?P<no>[0-9]+)'
+            '(?:\s+(?P<dir>[0-9]+))?'
+            '\s+.*'
+            '\s+(?P<path>[^\s]+)$')
     info_pattern = re.compile(
     info_pattern = re.compile(
         '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
         '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
-            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
-            '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
+            '|.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
 
 
     results = []
     results = []
-    for path in paths:
+    for path in obj_paths:
         # guess the source, if we have debug-info we'll replace this later
         # guess the source, if we have debug-info we'll replace this later
         file = re.sub('(\.o)?$', '.c', path, 1)
         file = re.sub('(\.o)?$', '.c', path, 1)
 
 
@@ -520,20 +518,7 @@ def main(obj_paths, *,
         **args):
         **args):
     # find sizes
     # find sizes
     if not args.get('use', None):
     if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in obj_paths:
-            if os.path.isdir(path):
-                path = path + '/*.o'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .o files found in %r?" % obj_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(obj_paths, **args)
     else:
     else:
         results = []
         results = []
         with openio(args['use']) as f:
         with openio(args['use']) as f:
@@ -613,9 +598,7 @@ if __name__ == "__main__":
     parser.add_argument(
     parser.add_argument(
         'obj_paths',
         'obj_paths',
         nargs='*',
         nargs='*',
-        default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory "
-            "or a list of paths. Defaults to %r." % OBJ_PATHS)
+        help="Input *.o files.")
     parser.add_argument(
     parser.add_argument(
         '-v', '--verbose',
         '-v', '--verbose',
         action='store_true',
         action='store_true',

+ 6 - 24
scripts/cov.py

@@ -14,7 +14,6 @@
 
 
 import collections as co
 import collections as co
 import csv
 import csv
-import glob
 import itertools as it
 import itertools as it
 import json
 import json
 import math as m
 import math as m
@@ -26,8 +25,6 @@ import subprocess as sp
 # TODO use explode_asserts to avoid counting assert branches?
 # TODO use explode_asserts to avoid counting assert branches?
 # TODO use dwarf=info to find functions for inline functions?
 # TODO use dwarf=info to find functions for inline functions?
 
 
-
-GCDA_PATHS = ['*.gcda']
 GCOV_TOOL = ['gcov']
 GCOV_TOOL = ['gcov']
 
 
 
 
@@ -202,14 +199,14 @@ class CovResult(co.namedtuple('CovResult', [
             self.branches + other.branches)
             self.branches + other.branches)
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
 def collect(gcda_paths, *,
 def collect(gcda_paths, *,
         gcov_tool=GCOV_TOOL,
         gcov_tool=GCOV_TOOL,
@@ -592,20 +589,7 @@ def main(gcda_paths, *,
 
 
     # find sizes
     # find sizes
     if not args.get('use', None):
     if not args.get('use', None):
-        # find .gcda files
-        paths = []
-        for path in gcda_paths:
-            if os.path.isdir(path):
-                path = path + '/*.gcda'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .gcda files found in %r?" % gcda_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(gcda_paths, **args)
     else:
     else:
         results = []
         results = []
         with openio(args['use']) as f:
         with openio(args['use']) as f:
@@ -707,9 +691,7 @@ if __name__ == "__main__":
     parser.add_argument(
     parser.add_argument(
         'gcda_paths',
         'gcda_paths',
         nargs='*',
         nargs='*',
-        default=GCDA_PATHS,
-        help="Description of where to find *.gcda files. May be a directory "
-            "or a list of paths. Defaults to %r." % GCDA_PATHS)
+        help="Input *.gcda files.")
     parser.add_argument(
     parser.add_argument(
         '-v', '--verbose',
         '-v', '--verbose',
         action='store_true',
         action='store_true',

+ 14 - 31
scripts/data.py

@@ -15,7 +15,6 @@
 import collections as co
 import collections as co
 import csv
 import csv
 import difflib
 import difflib
-import glob
 import itertools as it
 import itertools as it
 import math as m
 import math as m
 import os
 import os
@@ -24,7 +23,6 @@ import shlex
 import subprocess as sp
 import subprocess as sp
 
 
 
 
-OBJ_PATHS = ['*.o']
 NM_TOOL = ['nm']
 NM_TOOL = ['nm']
 NM_TYPES = 'dDbB'
 NM_TYPES = 'dDbB'
 OBJDUMP_TOOL = ['objdump']
 OBJDUMP_TOOL = ['objdump']
@@ -126,16 +124,16 @@ class DataResult(co.namedtuple('DataResult', [
             self.size + other.size)
             self.size + other.size)
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
-def collect(paths, *,
+def collect(obj_paths, *,
         nm_tool=NM_TOOL,
         nm_tool=NM_TOOL,
         nm_types=NM_TYPES,
         nm_types=NM_TYPES,
         objdump_tool=OBJDUMP_TOOL,
         objdump_tool=OBJDUMP_TOOL,
@@ -147,17 +145,17 @@ def collect(paths, *,
         ' (?P<type>[%s])' % re.escape(nm_types) +
         ' (?P<type>[%s])' % re.escape(nm_types) +
         ' (?P<func>.+?)$')
         ' (?P<func>.+?)$')
     line_pattern = re.compile(
     line_pattern = re.compile(
-        '^\s+(?P<no>[0-9]+)\s+'
-            '(?:(?P<dir>[0-9]+)\s+)?'
-            '.*\s+'
-            '(?P<path>[^\s]+)$')
+        '^\s+(?P<no>[0-9]+)'
+            '(?:\s+(?P<dir>[0-9]+))?'
+            '\s+.*'
+            '\s+(?P<path>[^\s]+)$')
     info_pattern = re.compile(
     info_pattern = re.compile(
         '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
         '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
-            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
-            '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
+            '|.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
 
 
     results = []
     results = []
-    for path in paths:
+    for path in obj_paths:
         # guess the source, if we have debug-info we'll replace this later
         # guess the source, if we have debug-info we'll replace this later
         file = re.sub('(\.o)?$', '.c', path, 1)
         file = re.sub('(\.o)?$', '.c', path, 1)
 
 
@@ -520,20 +518,7 @@ def main(obj_paths, *,
         **args):
         **args):
     # find sizes
     # find sizes
     if not args.get('use', None):
     if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in obj_paths:
-            if os.path.isdir(path):
-                path = path + '/*.o'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .o files found in %r?" % obj_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(obj_paths, **args)
     else:
     else:
         results = []
         results = []
         with openio(args['use']) as f:
         with openio(args['use']) as f:
@@ -613,9 +598,7 @@ if __name__ == "__main__":
     parser.add_argument(
     parser.add_argument(
         'obj_paths',
         'obj_paths',
         nargs='*',
         nargs='*',
-        default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory "
-            "or a list of paths. Defaults to %r." % OBJ_PATHS)
+        help="Input *.o files.")
     parser.add_argument(
     parser.add_argument(
         '-v', '--verbose',
         '-v', '--verbose',
         action='store_true',
         action='store_true',

+ 313 - 297
scripts/perf.py

@@ -4,7 +4,7 @@
 #
 #
 # Example:
 # Example:
 # ./scripts/perf.py -R -obench.perf ./runners/bench_runner
 # ./scripts/perf.py -R -obench.perf ./runners/bench_runner
-# ./scripts/perf.py bench.perf -Flfs.c -Flfs_util.c -Scycles
+# ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles
 #
 #
 # Copyright (c) 2022, The littlefs authors.
 # Copyright (c) 2022, The littlefs authors.
 # SPDX-License-Identifier: BSD-3-Clause
 # SPDX-License-Identifier: BSD-3-Clause
@@ -16,7 +16,6 @@ import csv
 import errno
 import errno
 import fcntl
 import fcntl
 import functools as ft
 import functools as ft
-import glob
 import itertools as it
 import itertools as it
 import math as m
 import math as m
 import multiprocessing as mp
 import multiprocessing as mp
@@ -31,7 +30,6 @@ import zipfile
 # TODO support non-zip perf results?
 # TODO support non-zip perf results?
 
 
 
 
-PERF_PATHS = ['*.perf']
 PERF_TOOL = ['perf']
 PERF_TOOL = ['perf']
 PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
 PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
 PERF_FREQ = 100
 PERF_FREQ = 100
@@ -147,14 +145,14 @@ class PerfResult(co.namedtuple('PerfResult', [
             self.children + other.children)
             self.children + other.children)
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
 # run perf as a subprocess, storing measurements into a zip file
 # run perf as a subprocess, storing measurements into a zip file
 def record(command, *,
 def record(command, *,
@@ -164,14 +162,6 @@ def record(command, *,
         perf_events=PERF_EVENTS,
         perf_events=PERF_EVENTS,
         perf_tool=PERF_TOOL,
         perf_tool=PERF_TOOL,
         **args):
         **args):
-    if not command:
-        print('error: no command specified?')
-        sys.exit(-1)
-
-    if not output:
-        print('error: no output file specified?')
-        sys.exit(-1)
-
     # create a temporary file for perf to write to, as far as I can tell
     # create a temporary file for perf to write to, as far as I can tell
     # this is strictly needed because perf's pipe-mode only works with stdout
     # this is strictly needed because perf's pipe-mode only works with stdout
     with tempfile.NamedTemporaryFile('rb') as f:
     with tempfile.NamedTemporaryFile('rb') as f:
@@ -214,8 +204,187 @@ def record(command, *,
     return err
     return err
 
 
 
 
+# try to only process each dso onceS
+#
+# note this only caches with the non-keyword arguments
+def multiprocessing_cache(f):
+    local_cache = {}
+    manager = mp.Manager()
+    global_cache = manager.dict()
+    lock = mp.Lock()
+
+    def multiprocessing_cache(*args, **kwargs):
+        # check local cache?
+        if args in local_cache:
+            return local_cache[args]
+        # check global cache?
+        with lock:
+            if args in global_cache:
+                v = global_cache[args]
+                local_cache[args] = v
+                return v
+            # fall back to calling the function
+            v = f(*args, **kwargs)
+            global_cache[args] = v
+            local_cache[args] = v
+            return v
+
+    return multiprocessing_cache
+
+@multiprocessing_cache
+def collect_syms_and_lines(obj_path, *,
+        objdump_tool=None,
+        **args):
+    symbol_pattern = re.compile(
+        '^(?P<addr>[0-9a-fA-F]+)'
+            '\s+.*'
+            '\s+(?P<size>[0-9a-fA-F]+)'
+            '\s+(?P<name>[^\s]+)\s*$')
+    line_pattern = re.compile(
+        '^\s+(?:'
+            # matches dir/file table
+            '(?P<no>[0-9]+)'
+                '(?:\s+(?P<dir>[0-9]+))?'
+                '\s+.*'
+                '\s+(?P<path>[^\s]+)'
+            # matches line opcodes
+            '|' '\[[^\]]*\]\s+'
+                '(?:'
+                    '(?P<op_special>Special)'
+                    '|' '(?P<op_copy>Copy)'
+                    '|' '(?P<op_end>End of Sequence)'
+                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
+                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
+                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
+                    '|' '.' ')*'
+            ')$', re.IGNORECASE)
+
+    # figure out symbol addresses and file+line ranges
+    syms = {}
+    sym_at = []
+    cmd = objdump_tool + ['-t', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = symbol_pattern.match(line)
+        if m:
+            name = m.group('name')
+            addr = int(m.group('addr'), 16)
+            size = int(m.group('size'), 16)
+            # ignore zero-sized symbols
+            if not size:
+                continue
+            # note multiple symbols can share a name
+            if name not in syms:
+                syms[name] = set()
+            syms[name].add((addr, size))
+            sym_at.append((addr, name, size))
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep largest/first when duplicates
+    sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
+    sym_at_ = []
+    for addr, name, size in sym_at:
+        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
+            sym_at_.append((addr, name, size))
+    sym_at = sym_at_
+
+    # state machine for dwarf line numbers, note that objdump's
+    # decodedline seems to have issues with multiple dir/file
+    # tables, which is why we need this
+    lines = []
+    line_at = []
+    dirs = {}
+    files = {}
+    op_file = 1
+    op_line = 1
+    op_addr = 0
+    cmd = objdump_tool + ['--dwarf=rawline', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = line_pattern.match(line)
+        if m:
+            if m.group('no') and not m.group('dir'):
+                # found a directory entry
+                dirs[int(m.group('no'))] = m.group('path')
+            elif m.group('no'):
+                # found a file entry
+                dir = int(m.group('dir'))
+                if dir in dirs:
+                    files[int(m.group('no'))] = os.path.join(
+                        dirs[dir],
+                        m.group('path'))
+                else:
+                    files[int(m.group('no'))] = m.group('path')
+            else:
+                # found a state machine update
+                if m.group('op_file'):
+                    op_file = int(m.group('op_file'), 0)
+                if m.group('op_line'):
+                    op_line = int(m.group('op_line'), 0)
+                if m.group('op_addr'):
+                    op_addr = int(m.group('op_addr'), 0)
+
+                if (m.group('op_special')
+                        or m.group('op_copy')
+                        or m.group('op_end')):
+                    file = os.path.abspath(files.get(op_file, '?'))
+                    lines.append((file, op_line, op_addr))
+                    line_at.append((op_addr, file, op_line))
+
+                if m.group('op_end'):
+                    op_file = 1
+                    op_line = 1
+                    op_addr = 0
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep first when duplicates
+    lines.sort()
+    lines_ = []
+    for file, line, addr in lines:
+        if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
+            lines_.append((file, line, addr))
+    lines = lines_
+
+    # sort and keep first when duplicates
+    line_at.sort()
+    line_at_ = []
+    for addr, file, line in line_at:
+        if len(line_at_) == 0 or line_at_[-1][0] != addr:
+            line_at_.append((addr, file, line))
+    line_at = line_at_
+
+    return syms, sym_at, lines, line_at
+
+
 def collect_decompressed(path, *,
 def collect_decompressed(path, *,
         perf_tool=PERF_TOOL,
         perf_tool=PERF_TOOL,
+        sources=None,
         everything=False,
         everything=False,
         propagate=0,
         propagate=0,
         depth=1,
         depth=1,
@@ -228,7 +397,7 @@ def collect_decompressed(path, *,
         '\s+(?P<event>[^:]+):')
         '\s+(?P<event>[^:]+):')
     frame_pattern = re.compile(
     frame_pattern = re.compile(
         '\s+(?P<addr>\w+)'
         '\s+(?P<addr>\w+)'
-        '\s+(?P<sym>[^\s]+)'
+        '\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?'
         '\s+\((?P<dso>[^\)]+)\)')
         '\s+\((?P<dso>[^\)]+)\)')
     events = {
     events = {
         'cycles':           'cycles',
         'cycles':           'cycles',
@@ -254,6 +423,9 @@ def collect_decompressed(path, *,
     last_event = ''
     last_event = ''
     last_period = 0
     last_period = 0
     last_stack = []
     last_stack = []
+    deltas = co.defaultdict(lambda: {})
+    syms_ = co.defaultdict(lambda: {})
+    at_cache = {}
     results = {}
     results = {}
 
 
     def commit():
     def commit():
@@ -276,36 +448,117 @@ def collect_decompressed(path, *,
     for line in proc.stdout:
     for line in proc.stdout:
         # we need to process a lot of data, so wait to use regex as late
         # we need to process a lot of data, so wait to use regex as late
         # as possible
         # as possible
-        if not line:
-            continue
         if not line.startswith('\t'):
         if not line.startswith('\t'):
-            m = sample_pattern.match(line)
-            if m:
-                if last_stack:
-                    commit()
-                last_event = m.group('event')
-                last_filtered = last_event in events
-                last_period = int(m.group('period'), 0)
-                last_stack = []
+            if last_filtered:
+                commit()
+            last_filtered = False
+
+            if line:
+                m = sample_pattern.match(line)
+                if m and m.group('event') in events:
+                    last_filtered = True
+                    last_event = m.group('event')
+                    last_period = int(m.group('period'), 0)
+                    last_stack = []
+
         elif last_filtered:
         elif last_filtered:
             m = frame_pattern.match(line)
             m = frame_pattern.match(line)
             if m:
             if m:
                 # filter out internal/kernel functions
                 # filter out internal/kernel functions
                 if not everything and (
                 if not everything and (
                         m.group('sym').startswith('__')
                         m.group('sym').startswith('__')
-                        or m.group('dso').startswith('/usr/lib')
-                        or not m.group('sym')[:1].isalpha()):
+                        or m.group('sym').startswith('0')
+                        or m.group('sym').startswith('-')
+                        or m.group('sym').startswith('[')
+                        or m.group('dso').startswith('/usr/lib')):
                     continue
                     continue
 
 
-                last_stack.append((
-                    m.group('dso'),
-                    m.group('sym'),
-                    int(m.group('addr'), 16)))
+                dso = m.group('dso')
+                sym = m.group('sym')
+                off = int(m.group('off'), 0) if m.group('off') else 0
+                addr_ = int(m.group('addr'), 16)
+
+                # get the syms/lines for the dso, this is cached
+                syms, sym_at, lines, line_at = collect_syms_and_lines(
+                    dso,
+                    **args)
+
+                # ASLR is tricky, we have symbols+offsets, but static symbols
+                # means we may have multiple options for each symbol.
+                #
+                # To try to solve this, we use previous seen symbols to build
+                # confidence for the correct ASLR delta. This means we may
+                # guess incorrectly for early symbols, but this will only affect
+                # a few samples.
+                if sym in syms:
+                    sym_addr_ = addr_ - off
+
+                    # track possible deltas?
+                    for sym_addr, size in syms[sym]:
+                        delta = sym_addr - sym_addr_
+                        if delta not in deltas[dso]:
+                            deltas[dso][delta] = sum(
+                                abs(a_+delta - a)
+                                for s, (a_, _) in syms_[dso].items()
+                                for a, _ in syms[s])
+                    for delta in deltas[dso].keys():
+                        deltas[dso][delta] += abs(sym_addr_+delta - sym_addr)
+                    syms_[dso][sym] = sym_addr_, size
+
+                    # guess the best delta
+                    delta, _ = min(deltas[dso].items(),
+                        key=lambda x: (x[1], x[0]))
+                    addr = addr_ + delta
+
+                    # cached?
+                    if (dso,addr) in at_cache:
+                        cached = at_cache[(dso,addr)]
+                        if cached is None:
+                            # cache says to skip
+                            continue
+                        file, line = cached
+                    else:
+                        # find file+line
+                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
+                        if i > 0:
+                            _, file, line = line_at[i-1]
+                        else:
+                            file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
+
+                        # ignore filtered sources
+                        if sources is not None:
+                            if not any(
+                                    os.path.abspath(file) == os.path.abspath(s)
+                                    for s in sources):
+                                at_cache[(dso,addr)] = None
+                                continue
+                        else:
+                            # default to only cwd
+                            if not everything and not os.path.commonpath([
+                                    os.getcwd(),
+                                    os.path.abspath(file)]) == os.getcwd():
+                                at_cache[(dso,addr)] = None
+                                continue
+
+                        # simplify path
+                        if os.path.commonpath([
+                                os.getcwd(),
+                                os.path.abspath(file)]) == os.getcwd():
+                            file = os.path.relpath(file)
+                        else:
+                            file = os.path.abspath(file)
+
+                        at_cache[(dso,addr)] = file, line
+                else:
+                    file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
+
+                last_stack.append((file, sym, line))
 
 
                 # stop propogating?
                 # stop propogating?
                 if propagate and len(last_stack) >= propagate:
                 if propagate and len(last_stack) >= propagate:
+                    commit()
                     last_filtered = False
                     last_filtered = False
-    if last_stack:
+    if last_filtered:
         commit()
         commit()
 
 
     proc.wait()
     proc.wait()
@@ -341,35 +594,15 @@ def starapply(args):
     f, args, kwargs = args
     f, args, kwargs = args
     return f(*args, **kwargs)
     return f(*args, **kwargs)
 
 
-def collect(paths, *,
+def collect(perf_paths, *,
         jobs=None,
         jobs=None,
-        objdump_tool=None,
-        sources=None,
-        everything=False,
         **args):
         **args):
-    symbol_pattern = re.compile(
-        '^(?P<addr>[0-9a-fA-F]+)\s.*\s(?P<name>[^\s]+)\s*$')
-    line_pattern = re.compile(
-        '^\s+(?:'
-            # matches dir/file table
-            '(?P<no>[0-9]+)\s+'
-                '(?:(?P<dir>[0-9]+)\s+)?'
-                '.*\s+'
-                '(?P<path>[^\s]+)'
-            # matches line opcodes
-            '|' '\[[^\]]*\]\s+'
-                '(?:'
-                    '(?P<op_special>Special)'
-                    '|' '(?P<op_copy>Copy)'
-                    '|' '(?P<op_end>End of Sequence)'
-                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
-                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
-                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
-                    '|' '.' ')*'
-            ')$', re.IGNORECASE)
+    # automatic job detection?
+    if jobs == 0:
+        jobs = len(os.sched_getaffinity(0))
 
 
     records = []
     records = []
-    for path in paths:
+    for path in perf_paths:
         # each .perf file is actually a zip file containing perf files from
         # each .perf file is actually a zip file containing perf files from
         # multiple runs
         # multiple runs
         with zipfile.ZipFile(path) as z:
         with zipfile.ZipFile(path) as z:
@@ -377,225 +610,17 @@ def collect(paths, *,
 
 
     # we're dealing with a lot of data but also surprisingly
     # we're dealing with a lot of data but also surprisingly
     # parallelizable
     # parallelizable
-    dsos = {}
-    results = []
-    with mp.Pool(jobs or len(os.sched_getaffinity(0))) as p:
-        for results_ in p.imap_unordered(
-                starapply,
-                ((collect_job, (path, i), dict(
-                    everything=everything,
-                    **args))
-                    for path, i in records)):
-
-            # organize by dso
-            results__ = {}
-            for r in results_:
-                if r.file not in results__:
-                    results__[r.file] = []
-                results__[r.file].append(r)
-            results_ = results__
-
-            for dso, results_ in results_.items():
-                if dso not in dsos:
-                    # find file+line ranges for dsos
-                    #
-                    # do this here so we only process each dso once
-                    syms = {}
-                    sym_at = []
-                    cmd = objdump_tool + ['-t', dso]
-                    if args.get('verbose'):
-                        print(' '.join(shlex.quote(c) for c in cmd))
-                    proc = sp.Popen(cmd,
-                        stdout=sp.PIPE,
-                        stderr=sp.PIPE if not args.get('verbose') else None,
-                        universal_newlines=True,
-                        errors='replace',
-                        close_fds=False)
-                    for line in proc.stdout:
-                        m = symbol_pattern.match(line)
-                        if m:
-                            name = m.group('name')
-                            addr = int(m.group('addr'), 16)
-                            # note multiple symbols can share a name
-                            if name not in syms:
-                                syms[name] = set()
-                            syms[name].add(addr)
-                            sym_at.append((addr, name))
-                    proc.wait()
-                    if proc.returncode != 0:
-                        if not args.get('verbose'):
-                            for line in proc.stderr:
-                                sys.stdout.write(line)
-                        # assume no debug-info on failure
-                        pass
-
-                    # sort and keep first when duplicates
-                    sym_at.sort()
-                    sym_at_ = []
-                    for addr, name in sym_at:
-                        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
-                            sym_at_.append((addr, name))
-                    sym_at = sym_at_
-
-                    # state machine for dwarf line numbers, note that objdump's
-                    # decodedline seems to have issues with multiple dir/file
-                    # tables, which is why we need this
-                    line_at = []
-                    dirs = {}
-                    files = {}
-                    op_file = 1
-                    op_line = 1
-                    op_addr = 0
-                    cmd = objdump_tool + ['--dwarf=rawline', dso]
-                    if args.get('verbose'):
-                        print(' '.join(shlex.quote(c) for c in cmd))
-                    proc = sp.Popen(cmd,
-                        stdout=sp.PIPE,
-                        stderr=sp.PIPE if not args.get('verbose') else None,
-                        universal_newlines=True,
-                        errors='replace',
-                        close_fds=False)
-                    for line in proc.stdout:
-                        m = line_pattern.match(line)
-                        if m:
-                            if m.group('no') and not m.group('dir'):
-                                # found a directory entry
-                                dirs[int(m.group('no'))] = m.group('path')
-                            elif m.group('no'):
-                                # found a file entry
-                                dir = int(m.group('dir'))
-                                if dir in dirs:
-                                    files[int(m.group('no'))] = os.path.join(
-                                        dirs[dir],
-                                        m.group('path'))
-                                else:
-                                    files[int(m.group('no'))] = m.group('path')
-                            else:
-                                # found a state machine update
-                                if m.group('op_file'):
-                                    op_file = int(m.group('op_file'), 0)
-                                if m.group('op_line'):
-                                    op_line = int(m.group('op_line'), 0)
-                                if m.group('op_addr'):
-                                    op_addr = int(m.group('op_addr'), 0)
-
-                                if (m.group('op_special')
-                                        or m.group('op_copy')
-                                        or m.group('op_end')):
-                                    line_at.append((
-                                        op_addr,
-                                        files.get(op_file, '?'),
-                                        op_line))
-
-                                if m.group('op_end'):
-                                    op_file = 1
-                                    op_line = 1
-                                    op_addr = 0
-                    proc.wait()
-                    if proc.returncode != 0:
-                        if not args.get('verbose'):
-                            for line in proc.stderr:
-                                sys.stdout.write(line)
-                        # assume no debug-info on failure
-                        pass
-
-                    # sort and keep first when duplicates
-                    #
-                    # I think dwarf requires this to be sorted but just in case
-                    line_at.sort()
-                    line_at_ = []
-                    for addr, file, line in line_at:
-                        if len(line_at_) == 0 or line_at_[-1][0] != addr:
-                            line_at_.append((addr, file, line))
-                    line_at = line_at_
-
-                    # discard lines outside of the range of the containing
-                    # function, these are introduced by dwarf for inlined
-                    # functions but don't map to elf-level symbols
-                    sym_at_ = []
-                    for addr, sym in sym_at:
-                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
-                        if i > 0:
-                            _, file, line = line_at[i-1]
-                            sym_at_.append((file, line, sym))
-                    sym_at_.sort()
-
-                    line_at_ = []
-                    for addr, file, line in line_at:
-                        # only keep if sym-at-addr and sym-at-line match
-                        i = bisect.bisect(
-                            sym_at, addr, key=lambda x: x[0])
-                        j = bisect.bisect(
-                            sym_at_, (file, line), key=lambda x: (x[0], x[1]))
-                        if i > 0 and j > 0 and (
-                                sym_at[i-1][1] == sym_at_[j-1][2]):
-                            line_at_.append((addr, file, line))
-                    line_at = line_at_
-
-                    dsos[dso] = (syms, sym_at, line_at)
-
-                syms, _, line_at = dsos[dso]
-
-                # first try to reverse ASLR
-                def deltas(r, d):
-                    if '+' in r.function:
-                        sym, off = r.function.split('+', 1)
-                        off = int(off, 0)
-                    else:
-                        sym, off = r.function, 0
-                    addr = r.line - off + d
-
-                    for addr_ in syms.get(sym, []):
-                        yield addr_ - addr
-
-                delta = min(
-                    it.chain.from_iterable(
-                        deltas(r, 0) for r in results_),
-                    key=lambda d: sum(it.chain.from_iterable(
-                        deltas(r, d) for r in results_)),
-                    default=0)
-
-                # then try to map addrs -> file+line
-                #
-                # note we need to do this recursively
-                def remap(results):
-                    results_ = []
-                    for r in results:
-                        addr = r.line + delta
-                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
-                        if i > 0:
-                            _, file, line = line_at[i-1]
-                        else:
-                            file, line = re.sub('(\.o)?$', '.c', r.file, 1), 0
-
-                        # ignore filtered sources
-                        if sources is not None:
-                            if not any(
-                                    os.path.abspath(file) == os.path.abspath(s)
-                                    for s in sources):
-                                continue
-                        else:
-                            # default to only cwd
-                            if not everything and not os.path.commonpath([
-                                    os.getcwd(),
-                                    os.path.abspath(file)]) == os.getcwd():
-                                continue
-
-                        # simplify path
-                        if os.path.commonpath([
-                                os.getcwd(),
-                                os.path.abspath(file)]) == os.getcwd():
-                            file = os.path.relpath(file)
-                        else:
-                            file = os.path.abspath(file)
-
-                        function, *_ = r.function.split('+', 1)
-                        results_.append(r._replace(
-                            file=file, function=function, line=line,
-                            children=remap(r.children)))
-                    return results_
-
-                results.extend(remap(results_))
+    if jobs is not None:
+        results = []
+        with mp.Pool(jobs) as p:
+            for results_ in p.imap_unordered(
+                    starapply,
+                    ((collect_job, (path, i), args) for path, i in records)):
+                results.extend(results_)
+    else:
+        results = []
+        for path, i in records:
+            results.extend(collect_job(path, i, **args))
 
 
     return results
     return results
 
 
@@ -640,7 +665,7 @@ def fold(Result, results, *,
             Result, r.children,
             Result, r.children,
             by=by,
             by=by,
             defines=defines)))
             defines=defines)))
-    folded = folded_ 
+    folded = folded_
 
 
     return folded
     return folded
 
 
@@ -983,7 +1008,6 @@ def report(perf_paths, *,
         fields=None,
         fields=None,
         defines=None,
         defines=None,
         sort=None,
         sort=None,
-        self=False,
         branches=False,
         branches=False,
         caches=False,
         caches=False,
         **args):
         **args):
@@ -1001,20 +1025,7 @@ def report(perf_paths, *,
 
 
     # find sizes
     # find sizes
     if not args.get('use', None):
     if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in perf_paths:
-            if os.path.isdir(path):
-                path = path + '/*.perf'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .perf files found in %r?" % perf_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(perf_paths, **args)
     else:
     else:
         results = []
         results = []
         with openio(args['use']) as f:
         with openio(args['use']) as f:
@@ -1124,8 +1135,7 @@ if __name__ == "__main__":
     parser.add_argument(
     parser.add_argument(
         'perf_paths',
         'perf_paths',
         nargs=nargs,
         nargs=nargs,
-        help="Description of where to find *.perf files. May be a directory "
-            "or a list of paths. Defaults to %r." % PERF_PATHS)
+        help="Input *.perf files.")
     parser.add_argument(
     parser.add_argument(
         '-v', '--verbose',
         '-v', '--verbose',
         action='store_true',
         action='store_true',
@@ -1224,7 +1234,7 @@ if __name__ == "__main__":
         nargs='?',
         nargs='?',
         type=lambda x: tuple(float(x) for x in x.split(',')),
         type=lambda x: tuple(float(x) for x in x.split(',')),
         const=THRESHOLD,
         const=THRESHOLD,
-        help="Show lines wth samples above this threshold as a percent of "
+        help="Show lines with samples above this threshold as a percent of "
             "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
             "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
     parser.add_argument(
     parser.add_argument(
         '-c', '--context',
         '-c', '--context',
@@ -1295,7 +1305,13 @@ if __name__ == "__main__":
 
 
     # perf_paths/command overlap, so need to do some munging here
     # perf_paths/command overlap, so need to do some munging here
     args.command = args.perf_paths
     args.command = args.perf_paths
-    args.perf_paths = args.perf_paths or PERF_PATHS
+    if args.record:
+        if not args.command:
+            print('error: no command specified?')
+            sys.exit(-1)
+        if not args.output:
+            print('error: no output file specified?')
+            sys.exit(-1)
 
 
     sys.exit(main(**{k: v
     sys.exit(main(**{k: v
         for k, v in vars(args).items()
         for k, v in vars(args).items()

+ 1252 - 0
scripts/perfbd.py

@@ -0,0 +1,1252 @@
+#!/usr/bin/env python3
+#
+# Aggregate and report call-stack propagated block-device operations
+# from trace output.
+#
+# Example:
+# ./scripts/bench.py -ttrace
+# ./scripts/perfbd.py trace -j -Flfs.c -Flfs_util.c -Serased -Sproged -Sreaded
+#
+# Copyright (c) 2022, The littlefs authors.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+import bisect
+import collections as co
+import csv
+import functools as ft
+import itertools as it
+import math as m
+import multiprocessing as mp
+import os
+import re
+import shlex
+import subprocess as sp
+
+
+OBJDUMP_TOOL = ['objdump']
+THRESHOLD = (0.5, 0.85)
+
+
+# integer fields
+class Int(co.namedtuple('Int', 'x')):
+    __slots__ = ()
+    def __new__(cls, x=0):
+        if isinstance(x, Int):
+            return x
+        if isinstance(x, str):
+            try:
+                x = int(x, 0)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = m.inf
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = -m.inf
+                else:
+                    raise
+        assert isinstance(x, int) or m.isinf(x), x
+        return super().__new__(cls, x)
+
+    def __str__(self):
+        if self.x == m.inf:
+            return '∞'
+        elif self.x == -m.inf:
+            return '-∞'
+        else:
+            return str(self.x)
+
+    def __int__(self):
+        assert not m.isinf(self.x)
+        return self.x
+
+    def __float__(self):
+        return float(self.x)
+
+    none = '%7s' % '-'
+    def table(self):
+        return '%7s' % (self,)
+
+    diff_none = '%7s' % '-'
+    diff_table = table
+
+    def diff_diff(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        diff = new - old
+        if diff == +m.inf:
+            return '%7s' % '+∞'
+        elif diff == -m.inf:
+            return '%7s' % '-∞'
+        else:
+            return '%+7d' % diff
+
+    def ratio(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        if m.isinf(new) and m.isinf(old):
+            return 0.0
+        elif m.isinf(new):
+            return +m.inf
+        elif m.isinf(old):
+            return -m.inf
+        elif not old and not new:
+            return 0.0
+        elif not old:
+            return 1.0
+        else:
+            return (new-old) / old
+
+    def __add__(self, other):
+        return self.__class__(self.x + other.x)
+
+    def __sub__(self, other):
+        return self.__class__(self.x - other.x)
+
+    def __mul__(self, other):
+        return self.__class__(self.x * other.x)
+
+# perf results
+class PerfBdResult(co.namedtuple('PerfBdResult', [
+        'file', 'function', 'line',
+        'readed', 'proged', 'erased',
+        'children'])):
+    _by = ['file', 'function', 'line']
+    _fields = ['readed', 'proged', 'erased']
+    _types = {'readed': Int, 'proged': Int, 'erased': Int}
+
+    __slots__ = ()
+    def __new__(cls, file='', function='', line=0,
+            readed=0, proged=0, erased=0,
+            children=[]):
+        return super().__new__(cls, file, function, int(Int(line)),
+            Int(readed), Int(proged), Int(erased),
+            children)
+
+    def __add__(self, other):
+        return PerfBdResult(self.file, self.function, self.line,
+            self.readed + other.readed,
+            self.proged + other.proged,
+            self.erased + other.erased,
+            self.children + other.children)
+
+
+def openio(path, mode='r', buffering=-1):
+    if path == '-':
+        if mode == 'r':
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
+        else:
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
+    else:
+        return open(path, mode, buffering)
+
+def collect_syms_and_lines(obj_path, *,
+        objdump_tool=None,
+        **args):
+    symbol_pattern = re.compile(
+        '^(?P<addr>[0-9a-fA-F]+)'
+            '\s+.*'
+            '\s+(?P<size>[0-9a-fA-F]+)'
+            '\s+(?P<name>[^\s]+)\s*$')
+    line_pattern = re.compile(
+        '^\s+(?:'
+            # matches dir/file table
+            '(?P<no>[0-9]+)'
+                '(?:\s+(?P<dir>[0-9]+))?'
+                '\s+.*'
+                '\s+(?P<path>[^\s]+)'
+            # matches line opcodes
+            '|' '\[[^\]]*\]\s+'
+                '(?:'
+                    '(?P<op_special>Special)'
+                    '|' '(?P<op_copy>Copy)'
+                    '|' '(?P<op_end>End of Sequence)'
+                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
+                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
+                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
+                    '|' '.' ')*'
+            ')$', re.IGNORECASE)
+
+    # figure out symbol addresses
+    syms = {}
+    sym_at = []
+    cmd = objdump_tool + ['-t', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = symbol_pattern.match(line)
+        if m:
+            name = m.group('name')
+            addr = int(m.group('addr'), 16)
+            size = int(m.group('size'), 16)
+            # ignore zero-sized symbols
+            if not size:
+                continue
+            # note multiple symbols can share a name
+            if name not in syms:
+                syms[name] = set()
+            syms[name].add((addr, size))
+            sym_at.append((addr, name, size))
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep largest/first when duplicates
+    sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
+    sym_at_ = []
+    for addr, name, size in sym_at:
+        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
+            sym_at_.append((addr, name, size))
+    sym_at = sym_at_
+
+    # state machine for dwarf line numbers, note that objdump's
+    # decodedline seems to have issues with multiple dir/file
+    # tables, which is why we need this
+    lines = []
+    line_at = []
+    dirs = {}
+    files = {}
+    op_file = 1
+    op_line = 1
+    op_addr = 0
+    cmd = objdump_tool + ['--dwarf=rawline', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = line_pattern.match(line)
+        if m:
+            if m.group('no') and not m.group('dir'):
+                # found a directory entry
+                dirs[int(m.group('no'))] = m.group('path')
+            elif m.group('no'):
+                # found a file entry
+                dir = int(m.group('dir'))
+                if dir in dirs:
+                    files[int(m.group('no'))] = os.path.join(
+                        dirs[dir],
+                        m.group('path'))
+                else:
+                    files[int(m.group('no'))] = m.group('path')
+            else:
+                # found a state machine update
+                if m.group('op_file'):
+                    op_file = int(m.group('op_file'), 0)
+                if m.group('op_line'):
+                    op_line = int(m.group('op_line'), 0)
+                if m.group('op_addr'):
+                    op_addr = int(m.group('op_addr'), 0)
+
+                if (m.group('op_special')
+                        or m.group('op_copy')
+                        or m.group('op_end')):
+                    file = os.path.abspath(files.get(op_file, '?'))
+                    lines.append((file, op_line, op_addr))
+                    line_at.append((op_addr, file, op_line))
+
+                if m.group('op_end'):
+                    op_file = 1
+                    op_line = 1
+                    op_addr = 0
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep first when duplicates
+    lines.sort()
+    lines_ = []
+    for file, line, addr in lines:
+        if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
+            lines_.append((file, line, addr))
+    lines = lines_
+
+    # sort and keep first when duplicates
+    line_at.sort()
+    line_at_ = []
+    for addr, file, line in line_at:
+        if len(line_at_) == 0 or line_at_[-1][0] != addr:
+            line_at_.append((addr, file, line))
+    line_at = line_at_
+
+    return syms, sym_at, lines, line_at
+
+
+def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
+        sources=None,
+        everything=False,
+        propagate=0,
+        depth=1,
+        **args):
+    trace_pattern = re.compile(
+        '^(?P<file>[^:]*):(?P<line>[0-9]+):trace:\s*(?P<prefix>[^\s]*?bd_)(?:'
+            '(?P<read>read)\('
+                '\s*(?P<read_ctx>\w+)' '\s*,'
+                '\s*(?P<read_block>\w+)' '\s*,'
+                '\s*(?P<read_off>\w+)' '\s*,'
+                '\s*(?P<read_buffer>\w+)' '\s*,'
+                '\s*(?P<read_size>\w+)' '\s*\)'
+            '|' '(?P<prog>prog)\('
+                '\s*(?P<prog_ctx>\w+)' '\s*,'
+                '\s*(?P<prog_block>\w+)' '\s*,'
+                '\s*(?P<prog_off>\w+)' '\s*,'
+                '\s*(?P<prog_buffer>\w+)' '\s*,'
+                '\s*(?P<prog_size>\w+)' '\s*\)'
+            '|' '(?P<erase>erase)\('
+                '\s*(?P<erase_ctx>\w+)' '\s*,'
+                '\s*(?P<erase_block>\w+)'
+                '\s*\(\s*(?P<erase_size>\w+)\s*\)' '\s*\)' ')\s*$')
+    frame_pattern = re.compile(
+        '^\s+at (?P<addr>\w+)\s*$')
+
+    # parse all of the trace files for read/prog/erase operations
+    last_filtered = False
+    last_file = None
+    last_line = None
+    last_sym = None
+    last_readed = 0
+    last_proged = 0
+    last_erased = 0
+    last_stack = []
+    last_delta = None
+    at_cache = {}
+    results = {}
+
+    def commit():
+        # fallback to just capturing top-level measurements
+        if not last_stack:
+            file = last_file
+            sym = last_sym
+            line = last_line
+
+            # ignore filtered sources
+            if sources is not None:
+                if not any(
+                        os.path.abspath(file)
+                            == os.path.abspath(s)
+                        for s in sources):
+                    return
+            else:
+                # default to only cwd
+                if not everything and not os.path.commonpath([
+                        os.getcwd(),
+                        os.path.abspath(file)]) == os.getcwd():
+                    return
+
+            # simplify path
+            if os.path.commonpath([
+                    os.getcwd(),
+                    os.path.abspath(file)]) == os.getcwd():
+                file = os.path.relpath(file)
+            else:
+                file = os.path.abspath(file)
+
+            results[(file, sym, line)] = (
+                last_readed,
+                last_proged,
+                last_erased,
+                {})
+        else:
+            # tail-recursively propagate measurements
+            for i in range(len(last_stack)):
+                results_ = results
+                for j in reversed(range(i+1)):
+                    if i+1-j > depth:
+                        break
+
+                    # propagate
+                    name = last_stack[j]
+                    if name in results_:
+                        r, p, e, children = results_[name]
+                    else:
+                        r, p, e, children = 0, 0, 0, {}
+                    results_[name] = (
+                        r+last_readed,
+                        p+last_proged,
+                        e+last_erased,
+                        children)
+
+                    # recurse
+                    results_ = results_[name][-1]
+
+    with openio(path) as f:
+        # try to jump to middle of file? need step out of utf8-safe mode and
+        # then resync up with the next newline to avoid parsing half a line
+        if start is not None and start > 0:
+            fd = f.fileno()
+            os.lseek(fd, start, os.SEEK_SET)
+            while os.read(fd, 1) not in {b'\n', b'\r', b''}:
+                pass
+            f = os.fdopen(fd)
+
+        for line in f:
+            # we have a lot of data, try to take a few shortcuts,
+            # string search is much faster than regex so try to use
+            # regex as late as possible.
+            if not line.startswith('\t'):
+                if last_filtered:
+                    commit()
+                last_filtered = False
+
+                # done processing our slice?
+                if stop is not None:
+                    if os.lseek(f.fileno(), 0, os.SEEK_CUR) > stop:
+                        break
+
+                if 'trace' in line and 'bd' in line:
+                    m = trace_pattern.match(line)
+                    if m:
+                        last_filtered = True
+                        last_file = os.path.abspath(m.group('file'))
+                        last_line = int(m.group('line'), 0)
+                        last_sym = m.group('prefix')
+                        last_readed = 0
+                        last_proged = 0
+                        last_erased = 0
+                        last_stack = []
+                        last_delta = None
+
+                        if m.group('read'):
+                            last_sym += m.group('read')
+                            last_readed += int(m.group('read_size'))
+                        elif m.group('prog'):
+                            last_sym += m.group('prog')
+                            last_proged += int(m.group('prog_size'))
+                        elif m.group('erase'):
+                            last_sym += m.group('erase')
+                            last_erased += int(m.group('erase_size'))
+
+            elif last_filtered:
+                m = frame_pattern.match(line)
+                if m:
+                    addr_ = int(m.group('addr'), 0)
+
+                    # before we can do anything with addr, we need to
+                    # reverse ASLR, fortunately we know the file+line of
+                    # the first stack frame, so we can use that as a point
+                    # of reference
+                    if last_delta is None:
+                        i = bisect.bisect(lines, (last_file, last_line),
+                            key=lambda x: (x[0], x[1]))
+                        if i > 0:
+                            last_delta = lines[i-1][2] - addr_
+                        else:
+                            # can't reverse ASLR, give up on backtrace
+                            commit()
+                            last_filtered = False
+                            continue
+
+                    addr = addr_ + last_delta
+
+                    # cached?
+                    if addr in at_cache:
+                        cached = at_cache[addr]
+                        if cached is None:
+                            # cache says to skip
+                            continue
+                        file, sym, line = cached
+                    else:
+                        # find sym
+                        i = bisect.bisect(sym_at, addr, key=lambda x: x[0])
+                        # check that we're actually in the sym's size
+                        if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]:
+                            _, sym, _ = sym_at[i-1]
+                        else:
+                            sym = hex(addr)
+
+                        # filter out internal/unknown functions
+                        if not everything and (
+                                sym.startswith('__')
+                                or sym.startswith('0')
+                                or sym.startswith('-')
+                                or sym == '_start'):
+                            at_cache[addr] = None
+                            continue
+
+                        # find file+line
+                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
+                        if i > 0:
+                            _, file, line = line_at[i-1]
+                        elif len(last_stack) == 0:
+                            file, line = last_file, last_line
+                        else:
+                            file, line = re.sub('(\.o)?$', '.c', obj_path, 1), 0
+
+                        # ignore filtered sources
+                        if sources is not None:
+                            if not any(
+                                    os.path.abspath(file)
+                                        == os.path.abspath(s)
+                                    for s in sources):
+                                at_cache[addr] = None
+                                continue
+                        else:
+                            # default to only cwd
+                            if not everything and not os.path.commonpath([
+                                    os.getcwd(),
+                                    os.path.abspath(file)]) == os.getcwd():
+                                at_cache[addr] = None
+                                continue
+
+                        # simplify path
+                        if os.path.commonpath([
+                                os.getcwd(),
+                                os.path.abspath(file)]) == os.getcwd():
+                            file = os.path.relpath(file)
+                        else:
+                            file = os.path.abspath(file)
+
+                        at_cache[addr] = file, sym, line
+
+                    last_stack.append((file, sym, line))
+
+                    # stop propagating?
+                    if propagate and len(last_stack) >= propagate:
+                        commit()
+                        last_filtered = False
+        if last_filtered:
+            commit()
+
+    # rearrange results into result type
+    def to_results(results):
+        results_ = []
+        for name, (r, p, e, children) in results.items():
+            results_.append(PerfBdResult(*name,
+                r, p, e,
+                children=to_results(children)))
+        return results_
+
+    return to_results(results)
+
+def starapply(args):
+    f, args, kwargs = args
+    return f(*args, **kwargs)
+
+def collect(obj_path, trace_paths, *,
+        jobs=None,
+        **args):
+    # automatic job detection?
+    if jobs == 0:
+        jobs = len(os.sched_getaffinity(0))
+
+    # find sym/line info to reverse ASLR
+    syms, sym_at, lines, line_at = collect_syms_and_lines(obj_path, **args)
+
+    if jobs is not None:
+        # try to split up files so that even single files can be processed
+        # in parallel
+        #
+        # this looks naive, since we're splitting up text files by bytes, but
+        # we do proper backtrace delimination in collect_job
+        trace_ranges = []
+        for path in trace_paths:
+            if path == '-':
+                trace_ranges.append([(None, None)])
+                continue
+
+            size = os.path.getsize(path)
+            if size == 0:
+                trace_ranges.append([(None, None)])
+                continue
+
+            perjob = m.ceil(size // jobs)
+            trace_ranges.append([(i, i+perjob) for i in range(0, size, perjob)])
+
+        results = []
+        with mp.Pool(jobs) as p:
+            for results_ in p.imap_unordered(
+                    starapply,
+                    ((collect_job, (path, start, stop,
+                        syms, sym_at, lines, line_at),
+                        args)
+                        for path, ranges in zip(trace_paths, trace_ranges)
+                        for start, stop in ranges)):
+                results.extend(results_)
+
+    else:
+        results = []
+        for path in trace_paths:
+            results.extend(collect_job(path, None, None,
+                syms, sym_at, lines, line_at,
+                **args))
+
+    return results
+
+
+def fold(Result, results, *,
+        by=None,
+        defines=None,
+        **_):
+    if by is None:
+        by = Result._by
+
+    for k in it.chain(by or [], (k for k, _ in defines or [])):
+        if k not in Result._by and k not in Result._fields:
+            print("error: could not find field %r?" % k)
+            sys.exit(-1)
+
+    # filter by matching defines
+    if defines is not None:
+        results_ = []
+        for r in results:
+            if all(getattr(r, k) in vs for k, vs in defines):
+                results_.append(r)
+        results = results_
+
+    # organize results into conflicts
+    folding = co.OrderedDict()
+    for r in results:
+        name = tuple(getattr(r, k) for k in by)
+        if name not in folding:
+            folding[name] = []
+        folding[name].append(r)
+
+    # merge conflicts
+    folded = []
+    for name, rs in folding.items():
+        folded.append(sum(rs[1:], start=rs[0]))
+
+    # fold recursively
+    folded_ = []
+    for r in folded:
+        folded_.append(r._replace(children=fold(
+            Result, r.children,
+            by=by,
+            defines=defines)))
+    folded = folded_
+
+    return folded
+
+def table(Result, results, diff_results=None, *,
+        by=None,
+        fields=None,
+        sort=None,
+        summary=False,
+        all=False,
+        percent=False,
+        depth=1,
+        **_):
+    all_, all = all, __builtins__.all
+
+    if by is None:
+        by = Result._by
+    if fields is None:
+        fields = Result._fields
+    types = Result._types
+
+    # fold again
+    results = fold(Result, results, by=by)
+    if diff_results is not None:
+        diff_results = fold(Result, diff_results, by=by)
+
+    # organize by name
+    table = {
+        ','.join(str(getattr(r, k) or '') for k in by): r
+        for r in results}
+    diff_table = {
+        ','.join(str(getattr(r, k) or '') for k in by): r
+        for r in diff_results or []}
+    names = list(table.keys() | diff_table.keys())
+
+    # sort again, now with diff info, note that python's sort is stable
+    names.sort()
+    if diff_results is not None:
+        names.sort(key=lambda n: tuple(
+            types[k].ratio(
+                getattr(table.get(n), k, None),
+                getattr(diff_table.get(n), k, None))
+            for k in fields),
+            reverse=True)
+    if sort:
+        for k, reverse in reversed(sort):
+            names.sort(key=lambda n: (getattr(table[n], k),)
+                if getattr(table.get(n), k, None) is not None else (),
+                reverse=reverse ^ (not k or k in Result._fields))
+
+    # build up our lines
+    lines = []
+
+    # header
+    header = []
+    header.append('%s%s' % (
+        ','.join(by),
+        ' (%d added, %d removed)' % (
+            sum(1 for n in table if n not in diff_table),
+            sum(1 for n in diff_table if n not in table))
+            if diff_results is not None and not percent else '')
+        if not summary else '')
+    if diff_results is None:
+        for k in fields:
+            header.append(k)
+    elif percent:
+        for k in fields:
+            header.append(k)
+    else:
+        for k in fields:
+            header.append('o'+k)
+        for k in fields:
+            header.append('n'+k)
+        for k in fields:
+            header.append('d'+k)
+    header.append('')
+    lines.append(header)
+
+    def table_entry(name, r, diff_r=None, ratios=[]):
+        entry = []
+        entry.append(name)
+        if diff_results is None:
+            for k in fields:
+                entry.append(getattr(r, k).table()
+                    if getattr(r, k, None) is not None
+                    else types[k].none)
+        elif percent:
+            for k in fields:
+                entry.append(getattr(r, k).diff_table()
+                    if getattr(r, k, None) is not None
+                    else types[k].diff_none)
+        else:
+            for k in fields:
+                entry.append(getattr(diff_r, k).diff_table()
+                    if getattr(diff_r, k, None) is not None
+                    else types[k].diff_none)
+            for k in fields:
+                entry.append(getattr(r, k).diff_table()
+                    if getattr(r, k, None) is not None
+                    else types[k].diff_none)
+            for k in fields:
+                entry.append(types[k].diff_diff(
+                        getattr(r, k, None),
+                        getattr(diff_r, k, None)))
+        if diff_results is None:
+            entry.append('')
+        elif percent:
+            entry.append(' (%s)' % ', '.join(
+                '+∞%' if t == +m.inf
+                else '-∞%' if t == -m.inf
+                else '%+.1f%%' % (100*t)
+                for t in ratios))
+        else:
+            entry.append(' (%s)' % ', '.join(
+                    '+∞%' if t == +m.inf
+                    else '-∞%' if t == -m.inf
+                    else '%+.1f%%' % (100*t)
+                    for t in ratios
+                    if t)
+                if any(ratios) else '')
+        return entry
+
+    # entries
+    if not summary:
+        for name in names:
+            r = table.get(name)
+            if diff_results is None:
+                diff_r = None
+                ratios = None
+            else:
+                diff_r = diff_table.get(name)
+                ratios = [
+                    types[k].ratio(
+                        getattr(r, k, None),
+                        getattr(diff_r, k, None))
+                    for k in fields]
+                if not all_ and not any(ratios):
+                    continue
+            lines.append(table_entry(name, r, diff_r, ratios))
+
+    # total
+    r = next(iter(fold(Result, results, by=[])), None)
+    if diff_results is None:
+        diff_r = None
+        ratios = None
+    else:
+        diff_r = next(iter(fold(Result, diff_results, by=[])), None)
+        ratios = [
+            types[k].ratio(
+                getattr(r, k, None),
+                getattr(diff_r, k, None))
+            for k in fields]
+    lines.append(table_entry('TOTAL', r, diff_r, ratios))
+
+    # find the best widths, note that column 0 contains the names and column -1
+    # the ratios, so those are handled a bit differently
+    widths = [
+        ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
+        for w, i in zip(
+            it.chain([23], it.repeat(7)),
+            range(len(lines[0])-1))]
+
+    # adjust the name width based on the expected call depth, though
+    # note this doesn't really work with unbounded recursion
+    if not summary and not m.isinf(depth):
+        widths[0] += 4*(depth-1)
+
+    # print the tree recursively
+    print('%-*s  %s%s' % (
+        widths[0], lines[0][0],
+        ' '.join('%*s' % (w, x)
+            for w, x in zip(widths[1:], lines[0][1:-1])),
+        lines[0][-1]))
+
+    if not summary:
+        def recurse(results_, depth_, prefixes=('', '', '', '')):
+            # rebuild our tables at each layer
+            table_ = {
+                ','.join(str(getattr(r, k) or '') for k in by): r
+                for r in results_}
+            names_ = list(table_.keys())
+
+            # sort again at each layer, keep in mind the numbers are
+            # changing as we descend
+            names_.sort()
+            if sort:
+                for k, reverse in reversed(sort):
+                    names_.sort(key=lambda n: (getattr(table_[n], k),)
+                        if getattr(table_.get(n), k, None) is not None else (),
+                        reverse=reverse ^ (not k or k in Result._fields))
+
+            for i, name in enumerate(names_):
+                r = table_[name]
+                is_last = (i == len(names_)-1)
+
+                print('%s%-*s  %s' % (
+                    prefixes[0+is_last],
+                    widths[0] - (
+                        len(prefixes[0+is_last])
+                        if not m.isinf(depth) else 0),
+                    name,
+                    ' '.join('%*s' % (w, x)
+                        for w, x in zip(
+                            widths[1:],
+                            table_entry(name, r)[1:]))))
+
+                # recurse?
+                if depth_ > 1:
+                    recurse(
+                        r.children,
+                        depth_-1,
+                        (prefixes[2+is_last] + "|-> ",
+                         prefixes[2+is_last] + "'-> ",
+                         prefixes[2+is_last] + "|   ",
+                         prefixes[2+is_last] + "    "))
+
+        # we have enough going on with diffing to make the top layer
+        # a special case
+        for name, line in zip(names, lines[1:-1]):
+            print('%-*s  %s%s' % (
+                widths[0], line[0],
+                ' '.join('%*s' % (w, x)
+                    for w, x in zip(widths[1:], line[1:-1])),
+                line[-1]))
+
+            if name in table and depth > 1:
+                recurse(
+                    table[name].children,
+                    depth-1,
+                    ("|-> ",
+                     "'-> ",
+                     "|   ",
+                     "    "))
+
+    print('%-*s  %s%s' % (
+        widths[0], lines[-1][0],
+        ' '.join('%*s' % (w, x)
+            for w, x in zip(widths[1:], lines[-1][1:-1])),
+        lines[-1][-1]))
+
+
+def annotate(Result, results, *,
+        annotate=None,
+        threshold=None,
+        read_threshold=None,
+        prog_threshold=None,
+        erase_threshold=None,
+        **args):
+    # figure out the thresholds
+    if threshold is None:
+        threshold = THRESHOLD
+    elif len(threshold) == 1:
+        threshold = threshold[0], threshold[0]
+
+    if read_threshold is None:
+        read_t0, read_t1 = threshold
+    elif len(read_threshold) == 1:
+        read_t0, read_t1 = read_threshold[0], read_threshold[0]
+    else:
+        read_t0, read_t1 = read_threshold
+    read_t0, read_t1 = min(read_t0, read_t1), max(read_t0, read_t1)
+
+    if prog_threshold is None:
+        prog_t0, prog_t1 = threshold
+    elif len(prog_threshold) == 1:
+        prog_t0, prog_t1 = prog_threshold[0], prog_threshold[0]
+    else:
+        prog_t0, prog_t1 = prog_threshold
+    prog_t0, prog_t1 = min(prog_t0, prog_t1), max(prog_t0, prog_t1)
+
+    if erase_threshold is None:
+        erase_t0, erase_t1 = threshold
+    elif len(erase_threshold) == 1:
+        erase_t0, erase_t1 = erase_threshold[0], erase_threshold[0]
+    else:
+        erase_t0, erase_t1 = erase_threshold
+    erase_t0, erase_t1 = min(erase_t0, erase_t1), max(erase_t0, erase_t1)
+
+    # find maxs
+    max_readed = max(it.chain((float(r.readed) for r in results), [1]))
+    max_proged = max(it.chain((float(r.proged) for r in results), [1]))
+    max_erased = max(it.chain((float(r.erased) for r in results), [1]))
+
+    for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
+        # flatten to line info
+        results = fold(Result, results, by=['file', 'line'])
+        table = {r.line: r for r in results if r.file == path}
+
+        # calculate spans to show
+        if not annotate:
+            spans = []
+            last = None
+            func = None
+            for line, r in sorted(table.items()):
+                if (float(r.readed) / max_readed >= read_t0
+                        or float(r.proged) / max_proged >= prog_t0
+                        or float(r.erased) / max_erased >= erase_t0):
+                    if last is not None and line - last.stop <= args['context']:
+                        last = range(
+                            last.start,
+                            line+1+args['context'])
+                    else:
+                        if last is not None:
+                            spans.append((last, func))
+                        last = range(
+                            line-args['context'],
+                            line+1+args['context'])
+                        func = r.function
+            if last is not None:
+                spans.append((last, func))
+
+        with open(path) as f:
+            skipped = False
+            for i, line in enumerate(f):
+                # skip lines not in spans?
+                if not annotate and not any(i+1 in s for s, _ in spans):
+                    skipped = True
+                    continue
+
+                if skipped:
+                    skipped = False
+                    print('%s@@ %s:%d: %s @@%s' % (
+                        '\x1b[36m' if args['color'] else '',
+                        path,
+                        i+1,
+                        next(iter(f for _, f in spans)),
+                        '\x1b[m' if args['color'] else ''))
+
+                # build line
+                if line.endswith('\n'):
+                    line = line[:-1]
+
+                if i+1 in table:
+                    r = table[i+1]
+                    line = '%-*s // %s readed, %s proged, %s erased' % (
+                        args['width'],
+                        line,
+                        r.readed,
+                        r.proged,
+                        r.erased)
+
+                    if args['color']:
+                        if (float(r.readed) / max_readed >= read_t1
+                                or float(r.proged) / max_proged >= prog_t1
+                                or float(r.erased) / max_erased >= erase_t1):
+                            line = '\x1b[1;31m%s\x1b[m' % line
+                        elif (float(r.readed) / max_readed >= read_t0
+                                or float(r.proged) / max_proged >= prog_t0
+                                or float(r.erased) / max_erased >= erase_t0):
+                            line = '\x1b[35m%s\x1b[m' % line
+
+                print(line)
+
+
+def report(obj_path='', trace_paths=[], *,
+        by=None,
+        fields=None,
+        defines=None,
+        sort=None,
+        **args):
+    # figure out what color should be
+    if args.get('color') == 'auto':
+        args['color'] = sys.stdout.isatty()
+    elif args.get('color') == 'always':
+        args['color'] = True
+    else:
+        args['color'] = False
+
+    # depth of 0 == m.inf
+    if args.get('depth') == 0:
+        args['depth'] = m.inf
+
+    # find sizes
+    if not args.get('use', None):
+        results = collect(obj_path, trace_paths, **args)
+    else:
+        results = []
+        with openio(args['use']) as f:
+            reader = csv.DictReader(f, restval='')
+            for r in reader:
+                try:
+                    results.append(PerfBdResult(
+                        **{k: r[k] for k in PerfBdResult._by
+                            if k in r and r[k].strip()},
+                        **{k: r['perfbd_'+k] for k in PerfBdResult._fields
+                            if 'perfbd_'+k in r and r['perfbd_'+k].strip()}))
+                except TypeError:
+                    pass
+
+    # fold
+    results = fold(PerfBdResult, results, by=by, defines=defines)
+
+    # sort, note that python's sort is stable
+    results.sort()
+    if sort:
+        for k, reverse in reversed(sort):
+            results.sort(key=lambda r: (getattr(r, k),)
+                if getattr(r, k) is not None else (),
+                reverse=reverse ^ (not k or k in PerfBdResult._fields))
+
+    # write results to CSV
+    if args.get('output'):
+        with openio(args['output'], 'w') as f:
+            writer = csv.DictWriter(f,
+                (by if by is not None else PerfBdResult._by)
+                + ['perfbd_'+k for k in PerfBdResult._fields])
+            writer.writeheader()
+            for r in results:
+                writer.writerow(
+                    {k: getattr(r, k)
+                        for k in (by if by is not None else PerfBdResult._by)}
+                    | {'perfbd_'+k: getattr(r, k)
+                        for k in PerfBdResult._fields})
+
+    # find previous results?
+    if args.get('diff'):
+        diff_results = []
+        try:
+            with openio(args['diff']) as f:
+                reader = csv.DictReader(f, restval='')
+                for r in reader:
+                    try:
+                        diff_results.append(PerfBdResult(
+                            **{k: r[k] for k in PerfBdResult._by
+                                if k in r and r[k].strip()},
+                            **{k: r['perfbd_'+k] for k in PerfBdResult._fields
+                                if 'perfbd_'+k in r
+                                    and r['perfbd_'+k].strip()}))
+                    except TypeError:
+                        pass
+        except FileNotFoundError:
+            pass
+
+        # fold
+        diff_results = fold(PerfBdResult, diff_results, by=by, defines=defines)
+
+    # print table
+    if not args.get('quiet'):
+        if (args.get('annotate')
+                or args.get('threshold')
+                or args.get('read_threshold')
+                or args.get('prog_threshold')
+                or args.get('erase_threshold')):
+            # annotate sources
+            annotate(PerfBdResult, results, **args)
+        else:
+            # print table
+            table(PerfBdResult, results,
+                diff_results if args.get('diff') else None,
+                by=by if by is not None else ['function'],
+                fields=fields,
+                sort=sort,
+                **args)
+
+
+def main(**args):
+    if args.get('record'):
+        return record(**args)
+    else:
+        return report(**args)
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Aggregate and report call-stack propagated "
+            "block-device operations from trace output.",
+        allow_abbrev=False)
+    parser.add_argument(
+        'obj_path',
+        nargs='?',
+        help="Input executable for mapping addresses to symbols.")
+    parser.add_argument(
+        'trace_paths',
+        nargs='*',
+        help="Input *.trace files.")
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help="Output commands that run behind the scenes.")
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
+        help="Don't show anything, useful with -o.")
+    parser.add_argument(
+        '-o', '--output',
+        help="Specify CSV file to store results.")
+    parser.add_argument(
+        '-u', '--use',
+        help="Don't parse anything, use this CSV file.")
+    parser.add_argument(
+        '-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument(
+        '-a', '--all',
+        action='store_true',
+        help="Show all, not just the ones that changed.")
+    parser.add_argument(
+        '-p', '--percent',
+        action='store_true',
+        help="Only show percentage change, not a full diff.")
+    parser.add_argument(
+        '-b', '--by',
+        action='append',
+        choices=PerfBdResult._by,
+        help="Group by this field.")
+    parser.add_argument(
+        '-f', '--field',
+        dest='fields',
+        action='append',
+        choices=PerfBdResult._fields,
+        help="Show this field.")
+    parser.add_argument(
+        '-D', '--define',
+        dest='defines',
+        action='append',
+        type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
+        help="Only include results where this field is this value.")
+    class AppendSort(argparse.Action):
+        def __call__(self, parser, namespace, value, option):
+            if namespace.sort is None:
+                namespace.sort = []
+            namespace.sort.append((value, True if option == '-S' else False))
+    parser.add_argument(
+        '-s', '--sort',
+        action=AppendSort,
+        help="Sort by this fields.")
+    parser.add_argument(
+        '-S', '--reverse-sort',
+        action=AppendSort,
+        help="Sort by this fields, but backwards.")
+    parser.add_argument(
+        '-Y', '--summary',
+        action='store_true',
+        help="Only show the total.")
+    parser.add_argument(
+        '-F', '--source',
+        dest='sources',
+        action='append',
+        help="Only consider definitions in this file. Defaults to anything "
+            "in the current directory.")
+    parser.add_argument(
+        '--everything',
+        action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument(
+        '-P', '--propagate',
+        type=lambda x: int(x, 0),
+        help="Depth to propagate samples up the call-stack. 0 propagates up "
+            "to the entry point, 1 does no propagation. Defaults to 0.")
+    parser.add_argument(
+        '-Z', '--depth',
+        nargs='?',
+        type=lambda x: int(x, 0),
+        const=0,
+        help="Depth of function calls to show. 0 shows all calls but may not "
+            "terminate!")
+    parser.add_argument(
+        '-A', '--annotate',
+        action='store_true',
+        help="Show source files annotated with coverage info.")
+    parser.add_argument(
+        '-T', '--threshold',
+        nargs='?',
+        type=lambda x: tuple(float(x) for x in x.split(',')),
+        const=THRESHOLD,
+        help="Show lines with any ops above this threshold as a percent of "
+            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
+    parser.add_argument(
+        '--read-threshold',
+        nargs='?',
+        type=lambda x: tuple(float(x) for x in x.split(',')),
+        const=THRESHOLD,
+        help="Show lines with reads above this threshold as a percent of "
+            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
+    parser.add_argument(
+        '--prog-threshold',
+        nargs='?',
+        type=lambda x: tuple(float(x) for x in x.split(',')),
+        const=THRESHOLD,
+        help="Show lines with progs above this threshold as a percent of "
+            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
+    parser.add_argument(
+        '--erase-threshold',
+        nargs='?',
+        type=lambda x: tuple(float(x) for x in x.split(',')),
+        const=THRESHOLD,
+        help="Show lines with erases above this threshold as a percent of "
+            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
+    parser.add_argument(
+        '-c', '--context',
+        type=lambda x: int(x, 0),
+        default=3,
+        help="Show n additional lines of context. Defaults to 3.")
+    parser.add_argument(
+        '-W', '--width',
+        type=lambda x: int(x, 0),
+        default=80,
+        help="Assume source is styled with this many columns. Defaults to 80.")
+    parser.add_argument(
+        '--color',
+        choices=['never', 'always', 'auto'],
+        default='auto',
+        help="When to use terminal colors. Defaults to 'auto'.")
+    parser.add_argument(
+        '-j', '--jobs',
+        nargs='?',
+        type=lambda x: int(x, 0),
+        const=0,
+        help="Number of processes to use. 0 spawns one process per core.")
+    parser.add_argument(
+        '--objdump-tool',
+        type=lambda x: x.split(),
+        default=OBJDUMP_TOOL,
+        help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
+    sys.exit(main(**{k: v
+        for k, v in vars(parser.parse_intermixed_args()).items()
+        if v is not None}))

+ 6 - 18
scripts/plot.py

@@ -11,7 +11,6 @@
 
 
 import collections as co
 import collections as co
 import csv
 import csv
-import glob
 import io
 import io
 import itertools as it
 import itertools as it
 import math as m
 import math as m
@@ -19,7 +18,6 @@ import os
 import shutil
 import shutil
 import time
 import time
 
 
-CSV_PATHS = ['*.csv']
 COLORS = [
 COLORS = [
     '1;34', # bold blue
     '1;34', # bold blue
     '1;31', # bold red
     '1;31', # bold red
@@ -80,14 +78,14 @@ def si(x, w=4):
         s = s.rstrip('.')
         s = s.rstrip('.')
     return '%s%s%s' % ('-' if x < 0 else '', s, SI_PREFIXES[p])
     return '%s%s%s' % ('-' if x < 0 else '', s, SI_PREFIXES[p])
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
 class LinesIO:
 class LinesIO:
     def __init__(self, maxlen=None):
     def __init__(self, maxlen=None):
@@ -356,16 +354,8 @@ class Plot:
 
 
 def collect(csv_paths, renames=[]):
 def collect(csv_paths, renames=[]):
     # collect results from CSV files
     # collect results from CSV files
-    paths = []
-    for path in csv_paths:
-        if os.path.isdir(path):
-            path = path + '/*.csv'
-
-        for path in glob.glob(path):
-            paths.append(path)
-
     results = []
     results = []
-    for path in paths:
+    for path in csv_paths:
         try:
         try:
             with openio(path) as f:
             with openio(path) as f:
                 reader = csv.DictReader(f, restval='')
                 reader = csv.DictReader(f, restval='')
@@ -732,9 +722,7 @@ if __name__ == "__main__":
     parser.add_argument(
     parser.add_argument(
         'csv_paths',
         'csv_paths',
         nargs='*',
         nargs='*',
-        default=CSV_PATHS,
-        help="Description of where to find *.csv files. May be a directory "
-            "or list of paths. Defaults to %r." % CSV_PATHS)
+        help="Input *.csv files.")
     parser.add_argument(
     parser.add_argument(
         '-b', '--by',
         '-b', '--by',
         action='append',
         action='append',

+ 4 - 4
scripts/prettyasserts.py

@@ -42,14 +42,14 @@ LEXEMES = {
 }
 }
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
 def write_header(f, limit=LIMIT):
 def write_header(f, limit=LIMIT):
     f.writeln("// Generated by %s:" % sys.argv[0])
     f.writeln("// Generated by %s:" % sys.argv[0])

+ 8 - 26
scripts/stack.py

@@ -12,15 +12,12 @@
 
 
 import collections as co
 import collections as co
 import csv
 import csv
-import glob
 import itertools as it
 import itertools as it
 import math as m
 import math as m
 import os
 import os
 import re
 import re
 
 
 
 
-CI_PATHS = ['*.ci']
-
 
 
 # integer fields
 # integer fields
 class Int(co.namedtuple('Int', 'x')):
 class Int(co.namedtuple('Int', 'x')):
@@ -121,16 +118,16 @@ class StackResult(co.namedtuple('StackResult', [
             self.children | other.children)
             self.children | other.children)
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
-def collect(paths, *,
+def collect(ci_paths, *,
         sources=None,
         sources=None,
         everything=False,
         everything=False,
         **args):
         **args):
@@ -167,7 +164,7 @@ def collect(paths, *,
     callgraph = co.defaultdict(lambda: (None, None, 0, set()))
     callgraph = co.defaultdict(lambda: (None, None, 0, set()))
     f_pattern = re.compile(
     f_pattern = re.compile(
         r'([^\\]*)\\n([^:]*)[^\\]*\\n([0-9]+) bytes \((.*)\)')
         r'([^\\]*)\\n([^:]*)[^\\]*\\n([0-9]+) bytes \((.*)\)')
-    for path in paths:
+    for path in ci_paths:
         with open(path) as f:
         with open(path) as f:
             vcg = parse_vcg(f.read())
             vcg = parse_vcg(f.read())
         for k, graph in vcg:
         for k, graph in vcg:
@@ -546,20 +543,7 @@ def main(ci_paths,
 
 
     # find sizes
     # find sizes
     if not args.get('use', None):
     if not args.get('use', None):
-        # find .ci files
-        paths = []
-        for path in ci_paths:
-            if os.path.isdir(path):
-                path = path + '/*.ci'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .ci files found in %r?" % ci_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(ci_paths, **args)
     else:
     else:
         results = []
         results = []
         with openio(args['use']) as f:
         with openio(args['use']) as f:
@@ -644,9 +628,7 @@ if __name__ == "__main__":
     parser.add_argument(
     parser.add_argument(
         'ci_paths',
         'ci_paths',
         nargs='*',
         nargs='*',
-        default=CI_PATHS,
-        help="Description of where to find *.ci files. May be a directory "
-            "or a list of paths. Defaults to %r." % CI_PATHS)
+        help="Input *.ci files.")
     parser.add_argument(
     parser.add_argument(
         '-v', '--verbose',
         '-v', '--verbose',
         action='store_true',
         action='store_true',

+ 13 - 30
scripts/struct_.py

@@ -12,7 +12,6 @@
 import collections as co
 import collections as co
 import csv
 import csv
 import difflib
 import difflib
-import glob
 import itertools as it
 import itertools as it
 import math as m
 import math as m
 import os
 import os
@@ -21,7 +20,6 @@ import shlex
 import subprocess as sp
 import subprocess as sp
 
 
 
 
-OBJ_PATHS = ['*.o']
 OBJDUMP_TOOL = ['objdump']
 OBJDUMP_TOOL = ['objdump']
 
 
 
 
@@ -120,14 +118,14 @@ class StructResult(co.namedtuple('StructResult', ['file', 'struct', 'size'])):
             self.size + other.size)
             self.size + other.size)
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
 def collect(obj_paths, *,
 def collect(obj_paths, *,
         objdump_tool=OBJDUMP_TOOL,
         objdump_tool=OBJDUMP_TOOL,
@@ -136,15 +134,15 @@ def collect(obj_paths, *,
         internal=False,
         internal=False,
         **args):
         **args):
     line_pattern = re.compile(
     line_pattern = re.compile(
-        '^\s+(?P<no>[0-9]+)\s+'
-            '(?:(?P<dir>[0-9]+)\s+)?'
-            '.*\s+'
-            '(?P<path>[^\s]+)$')
+        '^\s+(?P<no>[0-9]+)'
+            '(?:\s+(?P<dir>[0-9]+))?'
+            '\s+.*'
+            '\s+(?P<path>[^\s]+)$')
     info_pattern = re.compile(
     info_pattern = re.compile(
         '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
         '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
-            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
-            '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
-            '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
+            '|.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
+            '|.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
 
 
     results = []
     results = []
     for path in obj_paths:
     for path in obj_paths:
@@ -468,20 +466,7 @@ def main(obj_paths, *,
         **args):
         **args):
     # find sizes
     # find sizes
     if not args.get('use', None):
     if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in obj_paths:
-            if os.path.isdir(path):
-                path = path + '/*.o'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .o files found in %r?" % obj_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(obj_paths, **args)
     else:
     else:
         results = []
         results = []
         with openio(args['use']) as f:
         with openio(args['use']) as f:
@@ -565,9 +550,7 @@ if __name__ == "__main__":
     parser.add_argument(
     parser.add_argument(
         'obj_paths',
         'obj_paths',
         nargs='*',
         nargs='*',
-        default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory "
-            "or a list of paths. Defaults to %r." % OBJ_PATHS)
+        help="Input *.o files.")
     parser.add_argument(
     parser.add_argument(
         '-v', '--verbose',
         '-v', '--verbose',
         action='store_true',
         action='store_true',

+ 6 - 23
scripts/summary.py

@@ -15,15 +15,12 @@
 import collections as co
 import collections as co
 import csv
 import csv
 import functools as ft
 import functools as ft
-import glob
 import itertools as it
 import itertools as it
 import math as m
 import math as m
 import os
 import os
 import re
 import re
 
 
 
 
-CSV_PATHS = ['*.csv']
-
 # supported merge operations
 # supported merge operations
 #
 #
 # this is a terrible way to express these
 # this is a terrible way to express these
@@ -548,14 +545,14 @@ def table(Result, results, diff_results=None, *,
             line[-1]))
             line[-1]))
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
 def main(csv_paths, *,
 def main(csv_paths, *,
         by=None,
         by=None,
@@ -605,20 +602,8 @@ def main(csv_paths, *,
         ops.update(ops_)
         ops.update(ops_)
 
 
     # find CSV files
     # find CSV files
-    paths = []
-    for path in csv_paths:
-        if os.path.isdir(path):
-            path = path + '/*.csv'
-
-        for path in glob.glob(path):
-            paths.append(path)
-
-    if not paths:
-        print("error: no .csv files found in %r?" % csv_paths)
-        sys.exit(-1)
-
     results = []
     results = []
-    for path in paths:
+    for path in csv_paths:
         try:
         try:
             with openio(path) as f:
             with openio(path) as f:
                 reader = csv.DictReader(f, restval='')
                 reader = csv.DictReader(f, restval='')
@@ -721,9 +706,7 @@ if __name__ == "__main__":
     parser.add_argument(
     parser.add_argument(
         'csv_paths',
         'csv_paths',
         nargs='*',
         nargs='*',
-        default=CSV_PATHS,
-        help="Description of where to find *.csv files. May be a directory "
-            "or list of paths. Defaults to %r." % CSV_PATHS)
+        help="Input *.csv files.")
     parser.add_argument(
     parser.add_argument(
         '-q', '--quiet',
         '-q', '--quiet',
         action='store_true',
         action='store_true',

+ 7 - 4
scripts/tailpipe.py

@@ -17,14 +17,14 @@ import sys
 import time
 import time
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
 class LinesIO:
 class LinesIO:
     def __init__(self, maxlen=None):
     def __init__(self, maxlen=None):
@@ -104,6 +104,9 @@ def main(path='-', *, lines=5, cat=False, sleep=0.01, keep_open=False):
                 break
                 break
             # don't just flood open calls
             # don't just flood open calls
             time.sleep(sleep or 0.1)
             time.sleep(sleep or 0.1)
+    except FileNotFoundError as e:
+        print("error: file not found %r" % path)
+        sys.exit(-1)
     except KeyboardInterrupt:
     except KeyboardInterrupt:
         pass
         pass
 
 

+ 36 - 26
scripts/test.py

@@ -35,19 +35,12 @@ VALGRIND_TOOL = ['valgrind']
 PERF_SCRIPT = ['./scripts/perf.py']
 PERF_SCRIPT = ['./scripts/perf.py']
 
 
 
 
-def openio(path, mode='r', buffering=-1, nb=False):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r', buffering)
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w', buffering)
-    elif nb and 'a' in mode:
-        return os.fdopen(os.open(
-                path,
-                os.O_WRONLY | os.O_CREAT | os.O_APPEND | os.O_NONBLOCK,
-                0o666),
-            mode,
-            buffering)
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
         return open(path, mode, buffering)
         return open(path, mode, buffering)
 
 
@@ -549,6 +542,12 @@ def find_runner(runner, **args):
         cmd.append('-d%s' % args['disk'])
         cmd.append('-d%s' % args['disk'])
     if args.get('trace'):
     if args.get('trace'):
         cmd.append('-t%s' % args['trace'])
         cmd.append('-t%s' % args['trace'])
+    if args.get('trace_backtrace'):
+        cmd.append('--trace-backtrace')
+    if args.get('trace_period'):
+        cmd.append('--trace-period=%s' % args['trace_period'])
+    if args.get('trace_freq'):
+        cmd.append('--trace-freq=%s' % args['trace_freq'])
     if args.get('read_sleep'):
     if args.get('read_sleep'):
         cmd.append('--read-sleep=%s' % args['read_sleep'])
         cmd.append('--read-sleep=%s' % args['read_sleep'])
     if args.get('prog_sleep'):
     if args.get('prog_sleep'):
@@ -764,7 +763,7 @@ class TestFailure(Exception):
         self.stdout = stdout
         self.stdout = stdout
         self.assert_ = assert_
         self.assert_ = assert_
 
 
-def run_stage(name, runner_, ids, output_, **args):
+def run_stage(name, runner_, ids, stdout_, trace_, output_, **args):
     # get expected suite/case/perm counts
     # get expected suite/case/perm counts
     (case_suites,
     (case_suites,
         expected_suite_perms,
         expected_suite_perms,
@@ -805,7 +804,6 @@ def run_stage(name, runner_, ids, output_, **args):
         os.close(spty)
         os.close(spty)
         children.add(proc)
         children.add(proc)
         mpty = os.fdopen(mpty, 'r', 1)
         mpty = os.fdopen(mpty, 'r', 1)
-        stdout = None
 
 
         last_id = None
         last_id = None
         last_stdout = []
         last_stdout = []
@@ -822,18 +820,12 @@ def run_stage(name, runner_, ids, output_, **args):
                 if not line:
                 if not line:
                     break
                     break
                 last_stdout.append(line)
                 last_stdout.append(line)
-                if args.get('stdout'):
+                if stdout_:
                     try:
                     try:
-                        if not stdout:
-                            stdout = openio(args['stdout'], 'a', 1, nb=True)
-                        stdout.write(line)
-                    except OSError as e:
-                        if e.errno not in [
-                                errno.ENXIO,
-                                errno.EPIPE,
-                                errno.EAGAIN]:
-                            raise
-                        stdout = None
+                        stdout_.write(line)
+                        stdout_.flush()
+                    except BrokenPipeError:
+                        pass
                 if args.get('verbose'):
                 if args.get('verbose'):
                     sys.stdout.write(line)
                     sys.stdout.write(line)
 
 
@@ -1063,6 +1055,8 @@ def run(runner, test_ids=[], **args):
                 by or 'tests',
                 by or 'tests',
                 runner_,
                 runner_,
                 [by] if by is not None else test_ids,
                 [by] if by is not None else test_ids,
+                stdout,
+                trace,
                 output,
                 output,
                 **args)
                 **args)
         # collect passes/failures
         # collect passes/failures
@@ -1076,9 +1070,15 @@ def run(runner, test_ids=[], **args):
     stop = time.time()
     stop = time.time()
 
 
     if stdout:
     if stdout:
-        stdout.close()
+        try:
+            stdout.close()
+        except BrokenPipeError:
+            pass
     if trace:
     if trace:
-        trace.close()
+        try:
+            trace.close()
+        except BrokenPipeError:
+            pass
     if output:
     if output:
         output.close()
         output.close()
 
 
@@ -1284,6 +1284,16 @@ if __name__ == "__main__":
     test_parser.add_argument(
     test_parser.add_argument(
         '-t', '--trace',
         '-t', '--trace',
         help="Direct trace output to this file.")
         help="Direct trace output to this file.")
+    test_parser.add_argument(
+        '--trace-backtrace',
+        action='store_true',
+        help="Include a backtrace with every trace statement.")
+    test_parser.add_argument(
+        '--trace-period',
+        help="Sample trace output at this period in cycles.")
+    test_parser.add_argument(
+        '--trace-freq',
+        help="Sample trace output at this frequency in hz.")
     test_parser.add_argument(
     test_parser.add_argument(
         '-O', '--stdout',
         '-O', '--stdout',
         help="Direct stdout to this file. Note stderr is already merged here.")
         help="Direct stdout to this file. Note stderr is already merged here.")
@@ -1361,7 +1371,7 @@ if __name__ == "__main__":
         default=VALGRIND_TOOL,
         default=VALGRIND_TOOL,
         help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL)
         help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL)
     test_parser.add_argument(
     test_parser.add_argument(
-        '--perf',
+        '-p', '--perf',
         help="Run under Linux's perf to sample performance counters, writing "
         help="Run under Linux's perf to sample performance counters, writing "
             "samples to this file.")
             "samples to this file.")
     test_parser.add_argument(
     test_parser.add_argument(

+ 28 - 20
scripts/tracebd.py

@@ -41,14 +41,14 @@ CHARS_BRAILLE = (
     '⠋⢋⡋⣋⠫⢫⡫⣫⠏⢏⡏⣏⠯⢯⡯⣯' '⠛⢛⡛⣛⠻⢻⡻⣻⠟⢟⡟⣟⠿⢿⡿⣿')
     '⠋⢋⡋⣋⠫⢫⡫⣫⠏⢏⡏⣏⠯⢯⡯⣯' '⠛⢛⡛⣛⠻⢻⡻⣻⠟⢟⡟⣟⠿⢿⡿⣿')
 
 
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
     if path == '-':
         if mode == 'r':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 
 class LinesIO:
 class LinesIO:
     def __init__(self, maxlen=None):
     def __init__(self, maxlen=None):
@@ -663,29 +663,30 @@ def main(path='-', *,
 
 
     # parse a line of trace output
     # parse a line of trace output
     pattern = re.compile(
     pattern = re.compile(
-        'trace.*?bd_(?:'
+        '^(?P<file>[^:]*):(?P<line>[0-9]+):trace:.*?bd_(?:'
             '(?P<create>create\w*)\('
             '(?P<create>create\w*)\('
                 '(?:'
                 '(?:'
                     'block_size=(?P<block_size>\w+)'
                     'block_size=(?P<block_size>\w+)'
                     '|' 'block_count=(?P<block_count>\w+)'
                     '|' 'block_count=(?P<block_count>\w+)'
                     '|' '.*?' ')*' '\)'
                     '|' '.*?' ')*' '\)'
             '|' '(?P<read>read)\('
             '|' '(?P<read>read)\('
-                '\s*(?P<read_ctx>\w+)\s*' ','
-                '\s*(?P<read_block>\w+)\s*' ','
-                '\s*(?P<read_off>\w+)\s*' ','
-                '\s*(?P<read_buffer>\w+)\s*' ','
-                '\s*(?P<read_size>\w+)\s*' '\)'
+                '\s*(?P<read_ctx>\w+)' '\s*,'
+                '\s*(?P<read_block>\w+)' '\s*,'
+                '\s*(?P<read_off>\w+)' '\s*,'
+                '\s*(?P<read_buffer>\w+)' '\s*,'
+                '\s*(?P<read_size>\w+)' '\s*\)'
             '|' '(?P<prog>prog)\('
             '|' '(?P<prog>prog)\('
-                '\s*(?P<prog_ctx>\w+)\s*' ','
-                '\s*(?P<prog_block>\w+)\s*' ','
-                '\s*(?P<prog_off>\w+)\s*' ','
-                '\s*(?P<prog_buffer>\w+)\s*' ','
-                '\s*(?P<prog_size>\w+)\s*' '\)'
+                '\s*(?P<prog_ctx>\w+)' '\s*,'
+                '\s*(?P<prog_block>\w+)' '\s*,'
+                '\s*(?P<prog_off>\w+)' '\s*,'
+                '\s*(?P<prog_buffer>\w+)' '\s*,'
+                '\s*(?P<prog_size>\w+)' '\s*\)'
             '|' '(?P<erase>erase)\('
             '|' '(?P<erase>erase)\('
-                '\s*(?P<erase_ctx>\w+)\s*' ','
-                '\s*(?P<erase_block>\w+)\s*' '\)'
+                '\s*(?P<erase_ctx>\w+)' '\s*,'
+                '\s*(?P<erase_block>\w+)'
+                '\s*\(\s*(?P<erase_size>\w+)\s*\)' '\s*\)'
             '|' '(?P<sync>sync)\('
             '|' '(?P<sync>sync)\('
-                '\s*(?P<sync_ctx>\w+)\s*' '\)' ')')
+                '\s*(?P<sync_ctx>\w+)' '\s*\)' ')\s*$')
     def parse(line):
     def parse(line):
         nonlocal bd
         nonlocal bd
 
 
@@ -694,7 +695,7 @@ def main(path='-', *,
         # through here
         # through here
         if 'trace' not in line or 'bd' not in line:
         if 'trace' not in line or 'bd' not in line:
             return False
             return False
-        m = pattern.search(line)
+        m = pattern.match(line)
         if not m:
         if not m:
             return False
             return False
 
 
@@ -748,12 +749,16 @@ def main(path='-', *,
 
 
         elif m.group('erase') and (erase or wear):
         elif m.group('erase') and (erase or wear):
             block = int(m.group('erase_block'), 0)
             block = int(m.group('erase_block'), 0)
+            size = int(m.group('erase_size'), 0)
 
 
             if block_stop is not None and block >= block_stop:
             if block_stop is not None and block >= block_stop:
                 return False
                 return False
             block -= block_start
             block -= block_start
+            if off_stop is not None:
+                size = min(size, off_stop)
+            off = -off_start
 
 
-            bd.erase(block)
+            bd.erase(block, off, size)
             return True
             return True
 
 
         else:
         else:
@@ -818,6 +823,9 @@ def main(path='-', *,
                 break
                 break
             # don't just flood open calls
             # don't just flood open calls
             time.sleep(sleep or 0.1)
             time.sleep(sleep or 0.1)
+    except FileNotFoundError as e:
+        print("error: file not found %r" % path)
+        sys.exit(-1)
     except KeyboardInterrupt:
     except KeyboardInterrupt:
         pass
         pass