فهرست منبع

Added perfbd.py and block device performance sampling in bench-runner

Based loosely on Linux's perf tool, perfbd.py uses trace output with
backtraces to aggregate and show the block device usage of all functions
in a program, propagating block devices operation cost up the backtrace
for each operation.

This combined with --trace-period and --trace-freq for
sampling/filtering trace events allow the bench-runner to very
efficiently record the general cost of block device operations with very
little overhead.

Adopted this as the default side-effect of make bench, replacing
cycle-based performance measurements which are less important for
littlefs.
Christopher Haster 3 سال پیش
والد
کامیت
3a33c3795b
20فایلهای تغییر یافته به همراه2022 افزوده شده و 606 حذف شده
  1. 67 22
      Makefile
  2. 2 1
      bd/lfs_emubd.c
  3. 4 2
      bd/lfs_filebd.c
  4. 2 1
      bd/lfs_rambd.c
  5. 102 10
      runners/bench_runner.c
  6. 102 10
      runners/test_runner.c
  7. 36 26
      scripts/bench.py
  8. 14 31
      scripts/code.py
  9. 6 24
      scripts/cov.py
  10. 14 31
      scripts/data.py
  11. 313 297
      scripts/perf.py
  12. 1252 0
      scripts/perfbd.py
  13. 6 18
      scripts/plot.py
  14. 4 4
      scripts/prettyasserts.py
  15. 8 26
      scripts/stack.py
  16. 13 30
      scripts/struct_.py
  17. 6 23
      scripts/summary.py
  18. 7 4
      scripts/tailpipe.py
  19. 36 26
      scripts/test.py
  20. 28 20
      scripts/tracebd.py

+ 67 - 22
Makefile

@@ -41,30 +41,32 @@ TEST_SRC ?= $(SRC) \
 		$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
 		runners/test_runner.c
 TEST_RUNNER ?= $(BUILDDIR)runners/test_runner
-TEST_TC   := $(TESTS:%.toml=$(BUILDDIR)%.t.c) \
+TEST_TC    := $(TESTS:%.toml=$(BUILDDIR)%.t.c) \
 		$(TEST_SRC:%.c=$(BUILDDIR)%.t.c)
-TEST_TAC  := $(TEST_TC:%.t.c=%.t.a.c)
-TEST_OBJ  := $(TEST_TAC:%.t.a.c=%.t.a.o)
-TEST_DEP  := $(TEST_TAC:%.t.a.c=%.t.a.d)
-TEST_CI	  := $(TEST_TAC:%.t.a.c=%.t.a.ci)
-TEST_GCNO := $(TEST_TAC:%.t.a.c=%.t.a.gcno)
-TEST_GCDA := $(TEST_TAC:%.t.a.c=%.t.a.gcda)
-TEST_PERF := $(TEST_RUNNER:%=%.perf)
+TEST_TAC   := $(TEST_TC:%.t.c=%.t.a.c)
+TEST_OBJ   := $(TEST_TAC:%.t.a.c=%.t.a.o)
+TEST_DEP   := $(TEST_TAC:%.t.a.c=%.t.a.d)
+TEST_CI	   := $(TEST_TAC:%.t.a.c=%.t.a.ci)
+TEST_GCNO  := $(TEST_TAC:%.t.a.c=%.t.a.gcno)
+TEST_GCDA  := $(TEST_TAC:%.t.a.c=%.t.a.gcda)
+TEST_PERF  := $(TEST_RUNNER:%=%.perf)
+TEST_TRACE := $(TEST_RUNNER:%=%.trace)
 
 BENCHES ?= $(wildcard benches/*.toml)
 BENCH_SRC ?= $(SRC) \
 		$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
 		runners/bench_runner.c
 BENCH_RUNNER ?= $(BUILDDIR)runners/bench_runner
-BENCH_BC   := $(BENCHES:%.toml=$(BUILDDIR)%.b.c) \
+BENCH_BC    := $(BENCHES:%.toml=$(BUILDDIR)%.b.c) \
 		$(BENCH_SRC:%.c=$(BUILDDIR)%.b.c)
-BENCH_BAC  := $(BENCH_BC:%.b.c=%.b.a.c)
-BENCH_OBJ  := $(BENCH_BAC:%.b.a.c=%.b.a.o)
-BENCH_DEP  := $(BENCH_BAC:%.b.a.c=%.b.a.d)
-BENCH_CI   := $(BENCH_BAC:%.b.a.c=%.b.a.ci)
-BENCH_GCNO := $(BENCH_BAC:%.b.a.c=%.b.a.gcno)
-BENCH_GCDA := $(BENCH_BAC:%.b.a.c=%.b.a.gcda)
-BENCH_PERF := $(BENCH_RUNNER:%=%.perf)
+BENCH_BAC   := $(BENCH_BC:%.b.c=%.b.a.c)
+BENCH_OBJ   := $(BENCH_BAC:%.b.a.c=%.b.a.o)
+BENCH_DEP   := $(BENCH_BAC:%.b.a.c=%.b.a.d)
+BENCH_CI    := $(BENCH_BAC:%.b.a.c=%.b.a.ci)
+BENCH_GCNO  := $(BENCH_BAC:%.b.a.c=%.b.a.gcno)
+BENCH_GCDA  := $(BENCH_BAC:%.b.a.c=%.b.a.gcda)
+BENCH_PERF  := $(BENCH_RUNNER:%=%.perf)
+BENCH_TRACE := $(BENCH_RUNNER:%=%.trace)
 
 ifdef DEBUG
 override CFLAGS += -O0
@@ -85,6 +87,9 @@ endif
 ifdef YES_PERF
 override CFLAGS += -fno-omit-frame-pointer
 endif
+ifdef YES_PERFBD
+override CFLAGS += -fno-omit-frame-pointer
+endif
 
 ifdef VERBOSE
 override CODEFLAGS   += -v
@@ -93,7 +98,11 @@ override STACKFLAGS  += -v
 override STRUCTFLAGS += -v
 override COVFLAGS    += -v
 override PERFFLAGS   += -v
+override PERFBDFLAGS += -v
 endif
+# forward -j flag
+override PERFFLAGS   += $(filter -j%,$(MAKEFLAGS))
+override PERFBDFLAGS += $(filter -j%,$(MAKEFLAGS))
 ifneq ($(NM),nm)
 override CODEFLAGS += --nm-tool="$(NM)"
 override DATAFLAGS += --nm-tool="$(NM)"
@@ -103,6 +112,7 @@ override CODEFLAGS   += --objdump-tool="$(OBJDUMP)"
 override DATAFLAGS   += --objdump-tool="$(OBJDUMP)"
 override STRUCTFLAGS += --objdump-tool="$(OBJDUMP)"
 override PERFFLAGS   += --objdump-tool="$(OBJDUMP)"
+override PERFBDFLAGS += --objdump-tool="$(OBJDUMP)"
 endif
 ifneq ($(PERF),perf)
 override PERFFLAGS += --perf-tool="$(PERF)"
@@ -114,10 +124,14 @@ override BENCHFLAGS += -b
 override TESTFLAGS  += $(filter -j%,$(MAKEFLAGS))
 override BENCHFLAGS += $(filter -j%,$(MAKEFLAGS))
 ifdef YES_PERF
-override TESTFLAGS += --perf=$(TEST_PERF)
+override TESTFLAGS  += -p$(TEST_PERF)
+override BENCHFLAGS += -p$(BENCH_PERF)
+endif
+ifdef YES_PERFBD
+override TESTFLAGS  += -t$(TEST_TRACE) --trace-backtrace --trace-freq=100
 endif
-ifndef NO_PERF
-override BENCHFLAGS += --perf=$(BENCH_PERF)
+ifndef NO_PERFBD
+override BENCHFLAGS  += -t$(BENCH_TRACE) --trace-backtrace --trace-freq=100
 endif
 ifdef VERBOSE
 override TESTFLAGS   += -v
@@ -165,6 +179,11 @@ endif
 ifdef YES_PERF
 bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
 endif
+ifdef YES_PERFBD
+bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
+endif
+# note we remove some binary dependent files during compilation,
+# otherwise it's way to easy to end up with outdated results
 test-runner build-test: $(TEST_RUNNER)
 ifndef NO_COV
 	rm -f $(TEST_GCDA)
@@ -172,6 +191,9 @@ endif
 ifdef YES_PERF
 	rm -f $(TEST_PERF)
 endif
+ifdef YES_PERFBD
+	rm -f $(TEST_TRACE)
+endif
 
 .PHONY: test
 test: test-runner
@@ -185,16 +207,24 @@ test-list: test-runner
 ifdef YES_COV
 bench-runner build-bench: override CFLAGS+=--coverage
 endif
-ifndef NO_PERF
+ifdef YES_PERF
+bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
+endif
+ifndef NO_PERFBD
 bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
 endif
+# note we remove some binary dependent files during compilation,
+# otherwise it's way to easy to end up with outdated results
 bench-runner build-bench: $(BENCH_RUNNER)
 ifdef YES_COV 
 	rm -f $(BENCH_GCDA)
 endif
-ifndef NO_PERF
+ifdef YES_PERF
 	rm -f $(BENCH_PERF)
 endif
+ifndef NO_PERFBD
+	rm -f $(BENCH_TRACE)
+endif
 
 .PHONY: bench
 bench: bench-runner
@@ -234,6 +264,13 @@ perf: $(BENCH_PERF)
 		-Scycles \
 		$(PERFFLAGS))
 
+.PHONY: perfbd
+perfbd: $(BENCH_TRACE)
+	$(strip ./scripts/perfbd.py \
+		$(BENCH_RUNNER) $^ $(patsubst %,-F%,$(SRC)) \
+		-Serased -Sproged -Sreaded \
+		$(PERFBDFLAGS))
+
 .PHONY: summary sizes
 summary sizes: $(BUILDDIR)lfs.csv
 	$(strip ./scripts/summary.py -Y $^ \
@@ -275,6 +312,11 @@ $(BUILDDIR)lfs.cov.csv: $(GCDA)
 $(BUILDDIR)lfs.perf.csv: $(BENCH_PERF)
 	./scripts/perf.py $^ $(patsubst %,-F%,$(SRC)) -q $(PERFFLAGS) -o $@
 
+$(BUILDDIR)lfs.perfbd.csv: $(BENCH_TRACE)
+	$(strip ./scripts/perfbd.py \
+		$(BENCH_RUNNER) $^ $(patsubst %,-F%,$(SRC)) \
+		-q $(PERFBDFLAGS) -o $@)
+
 $(BUILDDIR)lfs.csv: \
 		$(BUILDDIR)lfs.code.csv \
 		$(BUILDDIR)lfs.data.csv \
@@ -326,7 +368,8 @@ clean:
 		$(BUILDDIR)lfs.stack.csv \
 		$(BUILDDIR)lfs.struct.csv \
 		$(BUILDDIR)lfs.cov.csv \
-		$(BUILDDIR)lfs.perf.csv)
+		$(BUILDDIR)lfs.perf.csv \
+		$(BUILDDIR)lfs.perfbd.csv)
 	rm -f $(OBJ)
 	rm -f $(DEP)
 	rm -f $(ASM)
@@ -340,6 +383,7 @@ clean:
 	rm -f $(TEST_GCNO)
 	rm -f $(TEST_GCDA)
 	rm -f $(TEST_PERF)
+	rm -f $(TEST_TRACE)
 	rm -f $(BENCH_RUNNER)
 	rm -f $(BENCH_BC)
 	rm -f $(BENCH_BAC)
@@ -349,3 +393,4 @@ clean:
 	rm -f $(BENCH_GCNO)
 	rm -f $(BENCH_GCDA)
 	rm -f $(BENCH_PERF)
+	rm -f $(BENCH_TRACE)

+ 2 - 1
bd/lfs_emubd.c

@@ -358,7 +358,8 @@ int lfs_emubd_prog(const struct lfs_config *cfg, lfs_block_t block,
 }
 
 int lfs_emubd_erase(const struct lfs_config *cfg, lfs_block_t block) {
-    LFS_EMUBD_TRACE("lfs_emubd_erase(%p, 0x%"PRIx32")", (void*)cfg, block);
+    LFS_EMUBD_TRACE("lfs_emubd_erase(%p, 0x%"PRIx32" (%"PRIu32"))",
+            (void*)cfg, block, cfg->block_size);
     lfs_emubd_t *bd = cfg->context;
 
     // check if erase is valid

+ 4 - 2
bd/lfs_filebd.c

@@ -96,7 +96,8 @@ int lfs_filebd_read(const struct lfs_config *cfg, lfs_block_t block,
 
 int lfs_filebd_prog(const struct lfs_config *cfg, lfs_block_t block,
         lfs_off_t off, const void *buffer, lfs_size_t size) {
-    LFS_FILEBD_TRACE("lfs_filebd_prog(%p, 0x%"PRIx32", %"PRIu32", %p, %"PRIu32")",
+    LFS_FILEBD_TRACE("lfs_filebd_prog(%p, "
+                "0x%"PRIx32", %"PRIu32", %p, %"PRIu32")",
             (void*)cfg, block, off, buffer, size);
     lfs_filebd_t *bd = cfg->context;
 
@@ -127,7 +128,8 @@ int lfs_filebd_prog(const struct lfs_config *cfg, lfs_block_t block,
 }
 
 int lfs_filebd_erase(const struct lfs_config *cfg, lfs_block_t block) {
-    LFS_FILEBD_TRACE("lfs_filebd_erase(%p, 0x%"PRIx32")", (void*)cfg, block);
+    LFS_FILEBD_TRACE("lfs_filebd_erase(%p, 0x%"PRIx32" (%"PRIu32"))",
+            (void*)cfg, block, cfg->block_size);
 
     // check if erase is valid
     LFS_ASSERT(block < cfg->block_count);

+ 2 - 1
bd/lfs_rambd.c

@@ -107,7 +107,8 @@ int lfs_rambd_prog(const struct lfs_config *cfg, lfs_block_t block,
 }
 
 int lfs_rambd_erase(const struct lfs_config *cfg, lfs_block_t block) {
-    LFS_RAMBD_TRACE("lfs_rambd_erase(%p, 0x%"PRIx32")", (void*)cfg, block);
+    LFS_RAMBD_TRACE("lfs_rambd_erase(%p, 0x%"PRIx32" (%"PRIu32"))",
+            (void*)cfg, block, cfg->block_size);
 
     // check if erase is valid
     LFS_ASSERT(block < cfg->block_count);

+ 102 - 10
runners/bench_runner.c

@@ -14,6 +14,8 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <execinfo.h>
+#include <time.h>
 
 
 // some helpers
@@ -405,26 +407,63 @@ size_t bench_step_step = 1;
 
 const char *bench_disk_path = NULL;
 const char *bench_trace_path = NULL;
+bool bench_trace_backtrace = false;
+uint32_t bench_trace_period = 0;
+uint32_t bench_trace_freq = 0;
 FILE *bench_trace_file = NULL;
 uint32_t bench_trace_cycles = 0;
+uint64_t bench_trace_time = 0;
+uint64_t bench_trace_open_time = 0;
 lfs_emubd_sleep_t bench_read_sleep = 0.0;
 lfs_emubd_sleep_t bench_prog_sleep = 0.0;
 lfs_emubd_sleep_t bench_erase_sleep = 0.0;
 
+// this determines both the backtrace buffer and the trace printf buffer, if
+// trace ends up interleaved or truncated this may need to be increased
+#ifndef BENCH_TRACE_BACKTRACE_BUFFER_SIZE
+#define BENCH_TRACE_BACKTRACE_BUFFER_SIZE 8192
+#endif
+void *bench_trace_backtrace_buffer[
+    BENCH_TRACE_BACKTRACE_BUFFER_SIZE / sizeof(void*)];
 
 // trace printing
 void bench_trace(const char *fmt, ...) {
     if (bench_trace_path) {
+        // sample at a specific period?
+        if (bench_trace_period) {
+            if (bench_trace_cycles % bench_trace_period != 0) {
+                bench_trace_cycles += 1;
+                return;
+            }
+            bench_trace_cycles += 1;
+        }
+
+        // sample at a specific frequency?
+        if (bench_trace_freq) {
+            struct timespec t;
+            clock_gettime(CLOCK_MONOTONIC, &t);
+            uint64_t now = (uint64_t)t.tv_sec*1000*1000*1000
+                    + (uint64_t)t.tv_nsec;
+            if (now - bench_trace_time < (1000*1000*1000) / bench_trace_freq) {
+                return;
+            }
+            bench_trace_time = now;
+        }
+
         if (!bench_trace_file) {
             // Tracing output is heavy and trying to open every trace
             // call is slow, so we only try to open the trace file every
             // so often. Note this doesn't affect successfully opened files
-            if (bench_trace_cycles % 128 != 0) {
-                bench_trace_cycles += 1;
+            struct timespec t;
+            clock_gettime(CLOCK_MONOTONIC, &t);
+            uint64_t now = (uint64_t)t.tv_sec*1000*1000*1000
+                    + (uint64_t)t.tv_nsec;
+            if (now - bench_trace_open_time < 100*1000*1000) {
                 return;
             }
-            bench_trace_cycles += 1;
+            bench_trace_open_time = now;
 
+            // try to open the trace file
             int fd;
             if (strcmp(bench_trace_path, "-") == 0) {
                 fd = dup(1);
@@ -445,19 +484,42 @@ void bench_trace(const char *fmt, ...) {
 
             FILE *f = fdopen(fd, "a");
             assert(f);
-            int err = setvbuf(f, NULL, _IOLBF, BUFSIZ);
+            int err = setvbuf(f, NULL, _IOFBF,
+                    BENCH_TRACE_BACKTRACE_BUFFER_SIZE);
             assert(!err);
             bench_trace_file = f;
         }
 
+        // print trace
         va_list va;
         va_start(va, fmt);
         int res = vfprintf(bench_trace_file, fmt, va);
+        va_end(va);
         if (res < 0) {
             fclose(bench_trace_file);
             bench_trace_file = NULL;
+            return;
         }
-        va_end(va);
+
+        if (bench_trace_backtrace) {
+            // print backtrace
+            size_t count = backtrace(
+                    bench_trace_backtrace_buffer,
+                    BENCH_TRACE_BACKTRACE_BUFFER_SIZE);
+            // note we skip our own stack frame
+            for (size_t i = 1; i < count; i++) {
+                res = fprintf(bench_trace_file, "\tat %p\n",
+                        bench_trace_backtrace_buffer[i]);
+                if (res < 0) {
+                    fclose(bench_trace_file);
+                    bench_trace_file = NULL;
+                    return;
+                }
+            }
+        }
+
+        // flush immediately
+        fflush(bench_trace_file);
     }
 }
 
@@ -1312,9 +1374,12 @@ enum opt_flags {
     OPT_STEP                     = 's',
     OPT_DISK                     = 'd',
     OPT_TRACE                    = 't',
-    OPT_READ_SLEEP               = 7,
-    OPT_PROG_SLEEP               = 8,
-    OPT_ERASE_SLEEP              = 9,
+    OPT_TRACE_BACKTRACE          = 7,
+    OPT_TRACE_PERIOD             = 8,
+    OPT_TRACE_FREQ               = 9,
+    OPT_READ_SLEEP               = 10,
+    OPT_PROG_SLEEP               = 11,
+    OPT_ERASE_SLEEP              = 12,
 };
 
 const char *short_opts = "hYlLD:G:s:d:t:";
@@ -1337,6 +1402,9 @@ const struct option long_opts[] = {
     {"step",             required_argument, NULL, OPT_STEP},
     {"disk",             required_argument, NULL, OPT_DISK},
     {"trace",            required_argument, NULL, OPT_TRACE},
+    {"trace-backtrace",  no_argument,       NULL, OPT_TRACE_BACKTRACE},
+    {"trace-period",     required_argument, NULL, OPT_TRACE_PERIOD},
+    {"trace-freq",       required_argument, NULL, OPT_TRACE_FREQ},
     {"read-sleep",       required_argument, NULL, OPT_READ_SLEEP},
     {"prog-sleep",       required_argument, NULL, OPT_PROG_SLEEP},
     {"erase-sleep",      required_argument, NULL, OPT_ERASE_SLEEP},
@@ -1357,8 +1425,11 @@ const char *const help_text[] = {
     "Override a bench define.",
     "Comma-separated list of disk geometries to bench.",
     "Comma-separated range of bench permutations to run (start,stop,step).",
-    "Redirect block device operations to this file.",
-    "Redirect trace output to this file.",
+    "Direct block device operations to this file.",
+    "Direct trace output to this file.",
+    "Include a backtrace with every trace statement.",
+    "Sample trace output at this period in cycles.",
+    "Sample trace output at this frequency in hz.",
     "Artificial read delay in seconds.",
     "Artificial prog delay in seconds.",
     "Artificial erase delay in seconds.",
@@ -1790,6 +1861,27 @@ step_unknown:
             case OPT_TRACE:
                 bench_trace_path = optarg;
                 break;
+            case OPT_TRACE_BACKTRACE:
+                bench_trace_backtrace = true;
+                break;
+            case OPT_TRACE_PERIOD: {
+                char *parsed = NULL;
+                bench_trace_period = strtoumax(optarg, &parsed, 0);
+                if (parsed == optarg) {
+                    fprintf(stderr, "error: invalid trace-period: %s\n", optarg);
+                    exit(-1);
+                }
+                break;
+            }
+            case OPT_TRACE_FREQ: {
+                char *parsed = NULL;
+                bench_trace_freq = strtoumax(optarg, &parsed, 0);
+                if (parsed == optarg) {
+                    fprintf(stderr, "error: invalid trace-freq: %s\n", optarg);
+                    exit(-1);
+                }
+                break;
+            }
             case OPT_READ_SLEEP: {
                 char *parsed = NULL;
                 double read_sleep = strtod(optarg, &parsed);

+ 102 - 10
runners/test_runner.c

@@ -14,6 +14,8 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <time.h>
+#include <execinfo.h>
 
 
 // some helpers
@@ -421,26 +423,63 @@ size_t test_step_step = 1;
 
 const char *test_disk_path = NULL;
 const char *test_trace_path = NULL;
+bool test_trace_backtrace = false;
+uint32_t test_trace_period = 0;
+uint32_t test_trace_freq = 0;
 FILE *test_trace_file = NULL;
 uint32_t test_trace_cycles = 0;
+uint64_t test_trace_time = 0;
+uint64_t test_trace_open_time = 0;
 lfs_emubd_sleep_t test_read_sleep = 0.0;
 lfs_emubd_sleep_t test_prog_sleep = 0.0;
 lfs_emubd_sleep_t test_erase_sleep = 0.0;
 
+// this determines both the backtrace buffer and the trace printf buffer, if
+// trace ends up interleaved or truncated this may need to be increased
+#ifndef TEST_TRACE_BACKTRACE_BUFFER_SIZE
+#define TEST_TRACE_BACKTRACE_BUFFER_SIZE 8192
+#endif
+void *test_trace_backtrace_buffer[
+    TEST_TRACE_BACKTRACE_BUFFER_SIZE / sizeof(void*)];
 
 // trace printing
 void test_trace(const char *fmt, ...) {
     if (test_trace_path) {
+        // sample at a specific period?
+        if (test_trace_period) {
+            if (test_trace_cycles % test_trace_period != 0) {
+                test_trace_cycles += 1;
+                return;
+            }
+            test_trace_cycles += 1;
+        }
+
+        // sample at a specific frequency?
+        if (test_trace_freq) {
+            struct timespec t;
+            clock_gettime(CLOCK_MONOTONIC, &t);
+            uint64_t now = (uint64_t)t.tv_sec*1000*1000*1000
+                    + (uint64_t)t.tv_nsec;
+            if (now - test_trace_time < (1000*1000*1000) / test_trace_freq) {
+                return;
+            }
+            test_trace_time = now;
+        }
+
         if (!test_trace_file) {
             // Tracing output is heavy and trying to open every trace
             // call is slow, so we only try to open the trace file every
             // so often. Note this doesn't affect successfully opened files
-            if (test_trace_cycles % 128 != 0) {
-                test_trace_cycles += 1;
+            struct timespec t;
+            clock_gettime(CLOCK_MONOTONIC, &t);
+            uint64_t now = (uint64_t)t.tv_sec*1000*1000*1000
+                    + (uint64_t)t.tv_nsec;
+            if (now - test_trace_open_time < 100*1000*1000) {
                 return;
             }
-            test_trace_cycles += 1;
+            test_trace_open_time = now;
 
+            // try to open the trace file
             int fd;
             if (strcmp(test_trace_path, "-") == 0) {
                 fd = dup(1);
@@ -461,19 +500,42 @@ void test_trace(const char *fmt, ...) {
 
             FILE *f = fdopen(fd, "a");
             assert(f);
-            int err = setvbuf(f, NULL, _IOLBF, BUFSIZ);
+            int err = setvbuf(f, NULL, _IOFBF,
+                    TEST_TRACE_BACKTRACE_BUFFER_SIZE);
             assert(!err);
             test_trace_file = f;
         }
 
+        // print trace
         va_list va;
         va_start(va, fmt);
         int res = vfprintf(test_trace_file, fmt, va);
+        va_end(va);
         if (res < 0) {
             fclose(test_trace_file);
             test_trace_file = NULL;
+            return;
         }
-        va_end(va);
+
+        if (test_trace_backtrace) {
+            // print backtrace
+            size_t count = backtrace(
+                    test_trace_backtrace_buffer,
+                    TEST_TRACE_BACKTRACE_BUFFER_SIZE);
+            // note we skip our own stack frame
+            for (size_t i = 1; i < count; i++) {
+                res = fprintf(test_trace_file, "\tat %p\n",
+                        test_trace_backtrace_buffer[i]);
+                if (res < 0) {
+                    fclose(test_trace_file);
+                    test_trace_file = NULL;
+                    return;
+                }
+            }
+        }
+
+        // flush immediately
+        fflush(test_trace_file);
     }
 }
 
@@ -1838,9 +1900,12 @@ enum opt_flags {
     OPT_STEP                     = 's',
     OPT_DISK                     = 'd',
     OPT_TRACE                    = 't',
-    OPT_READ_SLEEP               = 8,
-    OPT_PROG_SLEEP               = 9,
-    OPT_ERASE_SLEEP              = 10,
+    OPT_TRACE_BACKTRACE          = 8,
+    OPT_TRACE_PERIOD             = 9,
+    OPT_TRACE_FREQ               = 10,
+    OPT_READ_SLEEP               = 11,
+    OPT_PROG_SLEEP               = 12,
+    OPT_ERASE_SLEEP              = 13,
 };
 
 const char *short_opts = "hYlLD:G:P:s:d:t:";
@@ -1865,6 +1930,9 @@ const struct option long_opts[] = {
     {"step",             required_argument, NULL, OPT_STEP},
     {"disk",             required_argument, NULL, OPT_DISK},
     {"trace",            required_argument, NULL, OPT_TRACE},
+    {"trace-backtrace",  no_argument,       NULL, OPT_TRACE_BACKTRACE},
+    {"trace-period",     required_argument, NULL, OPT_TRACE_PERIOD},
+    {"trace-freq",       required_argument, NULL, OPT_TRACE_FREQ},
     {"read-sleep",       required_argument, NULL, OPT_READ_SLEEP},
     {"prog-sleep",       required_argument, NULL, OPT_PROG_SLEEP},
     {"erase-sleep",      required_argument, NULL, OPT_ERASE_SLEEP},
@@ -1887,8 +1955,11 @@ const char *const help_text[] = {
     "Comma-separated list of disk geometries to test.",
     "Comma-separated list of power-loss scenarios to test.",
     "Comma-separated range of test permutations to run (start,stop,step).",
-    "Redirect block device operations to this file.",
-    "Redirect trace output to this file.",
+    "Direct block device operations to this file.",
+    "Direct trace output to this file.",
+    "Include a backtrace with every trace statement.",
+    "Sample trace output at this period in cycles.",
+    "Sample trace output at this frequency in hz.",
     "Artificial read delay in seconds.",
     "Artificial prog delay in seconds.",
     "Artificial erase delay in seconds.",
@@ -2460,6 +2531,27 @@ step_unknown:
             case OPT_TRACE:
                 test_trace_path = optarg;
                 break;
+            case OPT_TRACE_BACKTRACE:
+                test_trace_backtrace = true;
+                break;
+            case OPT_TRACE_PERIOD: {
+                char *parsed = NULL;
+                test_trace_period = strtoumax(optarg, &parsed, 0);
+                if (parsed == optarg) {
+                    fprintf(stderr, "error: invalid trace-period: %s\n", optarg);
+                    exit(-1);
+                }
+                break;
+            }
+            case OPT_TRACE_FREQ: {
+                char *parsed = NULL;
+                test_trace_freq = strtoumax(optarg, &parsed, 0);
+                if (parsed == optarg) {
+                    fprintf(stderr, "error: invalid trace-freq: %s\n", optarg);
+                    exit(-1);
+                }
+                break;
+            }
             case OPT_READ_SLEEP: {
                 char *parsed = NULL;
                 double read_sleep = strtod(optarg, &parsed);

+ 36 - 26
scripts/bench.py

@@ -35,19 +35,12 @@ VALGRIND_TOOL = ['valgrind']
 PERF_SCRIPT = ['./scripts/perf.py']
 
 
-def openio(path, mode='r', buffering=-1, nb=False):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r', buffering)
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w', buffering)
-    elif nb and 'a' in mode:
-        return os.fdopen(os.open(
-                path,
-                os.O_WRONLY | os.O_CREAT | os.O_APPEND | os.O_NONBLOCK,
-                0o666),
-            mode,
-            buffering)
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
         return open(path, mode, buffering)
 
@@ -533,6 +526,12 @@ def find_runner(runner, **args):
         cmd.append('-d%s' % args['disk'])
     if args.get('trace'):
         cmd.append('-t%s' % args['trace'])
+    if args.get('trace_backtrace'):
+        cmd.append('--trace-backtrace')
+    if args.get('trace_period'):
+        cmd.append('--trace-period=%s' % args['trace_period'])
+    if args.get('trace_freq'):
+        cmd.append('--trace-freq=%s' % args['trace_freq'])
     if args.get('read_sleep'):
         cmd.append('--read-sleep=%s' % args['read_sleep'])
     if args.get('prog_sleep'):
@@ -747,7 +746,7 @@ class BenchFailure(Exception):
         self.stdout = stdout
         self.assert_ = assert_
 
-def run_stage(name, runner_, ids, output_, **args):
+def run_stage(name, runner_, ids, stdout_, trace_, output_, **args):
     # get expected suite/case/perm counts
     (case_suites,
         expected_suite_perms,
@@ -795,7 +794,6 @@ def run_stage(name, runner_, ids, output_, **args):
         os.close(spty)
         children.add(proc)
         mpty = os.fdopen(mpty, 'r', 1)
-        stdout = None
 
         last_id = None
         last_stdout = []
@@ -812,18 +810,12 @@ def run_stage(name, runner_, ids, output_, **args):
                 if not line:
                     break
                 last_stdout.append(line)
-                if args.get('stdout'):
+                if stdout_:
                     try:
-                        if not stdout:
-                            stdout = openio(args['stdout'], 'a', 1, nb=True)
-                        stdout.write(line)
-                    except OSError as e:
-                        if e.errno not in [
-                                errno.ENXIO,
-                                errno.EPIPE,
-                                errno.EAGAIN]:
-                            raise
-                        stdout = None
+                        stdout_.write(line)
+                        stdout_.flush()
+                    except BrokenPipeError:
+                        pass
                 if args.get('verbose'):
                     sys.stdout.write(line)
 
@@ -1061,6 +1053,8 @@ def run(runner, bench_ids=[], **args):
                 by or 'benches',
                 runner_,
                 [by] if by is not None else bench_ids,
+                stdout,
+                trace,
                 output,
                 **args)
         # collect passes/failures
@@ -1076,9 +1070,15 @@ def run(runner, bench_ids=[], **args):
     stop = time.time()
 
     if stdout:
-        stdout.close()
+        try:
+            stdout.close()
+        except BrokenPipeError:
+            pass
     if trace:
-        trace.close()
+        try:
+            trace.close()
+        except BrokenPipeError:
+            pass
     if output:
         output.close()
 
@@ -1276,6 +1276,16 @@ if __name__ == "__main__":
     bench_parser.add_argument(
         '-t', '--trace',
         help="Direct trace output to this file.")
+    bench_parser.add_argument(
+        '--trace-backtrace',
+        action='store_true',
+        help="Include a backtrace with every trace statement.")
+    bench_parser.add_argument(
+        '--trace-period',
+        help="Sample trace output at this period in cycles.")
+    bench_parser.add_argument(
+        '--trace-freq',
+        help="Sample trace output at this frequency in hz.")
     bench_parser.add_argument(
         '-O', '--stdout',
         help="Direct stdout to this file. Note stderr is already merged here.")
@@ -1353,7 +1363,7 @@ if __name__ == "__main__":
         default=VALGRIND_TOOL,
         help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL)
     bench_parser.add_argument(
-        '--perf',
+        '-p', '--perf',
         help="Run under Linux's perf to sample performance counters, writing "
             "samples to this file.")
     bench_parser.add_argument(

+ 14 - 31
scripts/code.py

@@ -15,7 +15,6 @@
 import collections as co
 import csv
 import difflib
-import glob
 import itertools as it
 import math as m
 import os
@@ -24,7 +23,6 @@ import shlex
 import subprocess as sp
 
 
-OBJ_PATHS = ['*.o']
 NM_TOOL = ['nm']
 NM_TYPES = 'tTrRdD'
 OBJDUMP_TOOL = ['objdump']
@@ -126,16 +124,16 @@ class CodeResult(co.namedtuple('CodeResult', [
             self.size + other.size)
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
-def collect(paths, *,
+def collect(obj_paths, *,
         nm_tool=NM_TOOL,
         nm_types=NM_TYPES,
         objdump_tool=OBJDUMP_TOOL,
@@ -147,17 +145,17 @@ def collect(paths, *,
         ' (?P<type>[%s])' % re.escape(nm_types) +
         ' (?P<func>.+?)$')
     line_pattern = re.compile(
-        '^\s+(?P<no>[0-9]+)\s+'
-            '(?:(?P<dir>[0-9]+)\s+)?'
-            '.*\s+'
-            '(?P<path>[^\s]+)$')
+        '^\s+(?P<no>[0-9]+)'
+            '(?:\s+(?P<dir>[0-9]+))?'
+            '\s+.*'
+            '\s+(?P<path>[^\s]+)$')
     info_pattern = re.compile(
         '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
-            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
-            '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
+            '|.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
 
     results = []
-    for path in paths:
+    for path in obj_paths:
         # guess the source, if we have debug-info we'll replace this later
         file = re.sub('(\.o)?$', '.c', path, 1)
 
@@ -520,20 +518,7 @@ def main(obj_paths, *,
         **args):
     # find sizes
     if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in obj_paths:
-            if os.path.isdir(path):
-                path = path + '/*.o'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .o files found in %r?" % obj_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(obj_paths, **args)
     else:
         results = []
         with openio(args['use']) as f:
@@ -613,9 +598,7 @@ if __name__ == "__main__":
     parser.add_argument(
         'obj_paths',
         nargs='*',
-        default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory "
-            "or a list of paths. Defaults to %r." % OBJ_PATHS)
+        help="Input *.o files.")
     parser.add_argument(
         '-v', '--verbose',
         action='store_true',

+ 6 - 24
scripts/cov.py

@@ -14,7 +14,6 @@
 
 import collections as co
 import csv
-import glob
 import itertools as it
 import json
 import math as m
@@ -26,8 +25,6 @@ import subprocess as sp
 # TODO use explode_asserts to avoid counting assert branches?
 # TODO use dwarf=info to find functions for inline functions?
 
-
-GCDA_PATHS = ['*.gcda']
 GCOV_TOOL = ['gcov']
 
 
@@ -202,14 +199,14 @@ class CovResult(co.namedtuple('CovResult', [
             self.branches + other.branches)
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 def collect(gcda_paths, *,
         gcov_tool=GCOV_TOOL,
@@ -592,20 +589,7 @@ def main(gcda_paths, *,
 
     # find sizes
     if not args.get('use', None):
-        # find .gcda files
-        paths = []
-        for path in gcda_paths:
-            if os.path.isdir(path):
-                path = path + '/*.gcda'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .gcda files found in %r?" % gcda_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(gcda_paths, **args)
     else:
         results = []
         with openio(args['use']) as f:
@@ -707,9 +691,7 @@ if __name__ == "__main__":
     parser.add_argument(
         'gcda_paths',
         nargs='*',
-        default=GCDA_PATHS,
-        help="Description of where to find *.gcda files. May be a directory "
-            "or a list of paths. Defaults to %r." % GCDA_PATHS)
+        help="Input *.gcda files.")
     parser.add_argument(
         '-v', '--verbose',
         action='store_true',

+ 14 - 31
scripts/data.py

@@ -15,7 +15,6 @@
 import collections as co
 import csv
 import difflib
-import glob
 import itertools as it
 import math as m
 import os
@@ -24,7 +23,6 @@ import shlex
 import subprocess as sp
 
 
-OBJ_PATHS = ['*.o']
 NM_TOOL = ['nm']
 NM_TYPES = 'dDbB'
 OBJDUMP_TOOL = ['objdump']
@@ -126,16 +124,16 @@ class DataResult(co.namedtuple('DataResult', [
             self.size + other.size)
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
-def collect(paths, *,
+def collect(obj_paths, *,
         nm_tool=NM_TOOL,
         nm_types=NM_TYPES,
         objdump_tool=OBJDUMP_TOOL,
@@ -147,17 +145,17 @@ def collect(paths, *,
         ' (?P<type>[%s])' % re.escape(nm_types) +
         ' (?P<func>.+?)$')
     line_pattern = re.compile(
-        '^\s+(?P<no>[0-9]+)\s+'
-            '(?:(?P<dir>[0-9]+)\s+)?'
-            '.*\s+'
-            '(?P<path>[^\s]+)$')
+        '^\s+(?P<no>[0-9]+)'
+            '(?:\s+(?P<dir>[0-9]+))?'
+            '\s+.*'
+            '\s+(?P<path>[^\s]+)$')
     info_pattern = re.compile(
         '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
-            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
-            '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
+            '|.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
 
     results = []
-    for path in paths:
+    for path in obj_paths:
         # guess the source, if we have debug-info we'll replace this later
         file = re.sub('(\.o)?$', '.c', path, 1)
 
@@ -520,20 +518,7 @@ def main(obj_paths, *,
         **args):
     # find sizes
     if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in obj_paths:
-            if os.path.isdir(path):
-                path = path + '/*.o'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .o files found in %r?" % obj_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(obj_paths, **args)
     else:
         results = []
         with openio(args['use']) as f:
@@ -613,9 +598,7 @@ if __name__ == "__main__":
     parser.add_argument(
         'obj_paths',
         nargs='*',
-        default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory "
-            "or a list of paths. Defaults to %r." % OBJ_PATHS)
+        help="Input *.o files.")
     parser.add_argument(
         '-v', '--verbose',
         action='store_true',

+ 313 - 297
scripts/perf.py

@@ -4,7 +4,7 @@
 #
 # Example:
 # ./scripts/perf.py -R -obench.perf ./runners/bench_runner
-# ./scripts/perf.py bench.perf -Flfs.c -Flfs_util.c -Scycles
+# ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles
 #
 # Copyright (c) 2022, The littlefs authors.
 # SPDX-License-Identifier: BSD-3-Clause
@@ -16,7 +16,6 @@ import csv
 import errno
 import fcntl
 import functools as ft
-import glob
 import itertools as it
 import math as m
 import multiprocessing as mp
@@ -31,7 +30,6 @@ import zipfile
 # TODO support non-zip perf results?
 
 
-PERF_PATHS = ['*.perf']
 PERF_TOOL = ['perf']
 PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
 PERF_FREQ = 100
@@ -147,14 +145,14 @@ class PerfResult(co.namedtuple('PerfResult', [
             self.children + other.children)
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 # run perf as a subprocess, storing measurements into a zip file
 def record(command, *,
@@ -164,14 +162,6 @@ def record(command, *,
         perf_events=PERF_EVENTS,
         perf_tool=PERF_TOOL,
         **args):
-    if not command:
-        print('error: no command specified?')
-        sys.exit(-1)
-
-    if not output:
-        print('error: no output file specified?')
-        sys.exit(-1)
-
     # create a temporary file for perf to write to, as far as I can tell
     # this is strictly needed because perf's pipe-mode only works with stdout
     with tempfile.NamedTemporaryFile('rb') as f:
@@ -214,8 +204,187 @@ def record(command, *,
     return err
 
 
+# try to only process each dso onceS
+#
+# note this only caches with the non-keyword arguments
+def multiprocessing_cache(f):
+    local_cache = {}
+    manager = mp.Manager()
+    global_cache = manager.dict()
+    lock = mp.Lock()
+
+    def multiprocessing_cache(*args, **kwargs):
+        # check local cache?
+        if args in local_cache:
+            return local_cache[args]
+        # check global cache?
+        with lock:
+            if args in global_cache:
+                v = global_cache[args]
+                local_cache[args] = v
+                return v
+            # fall back to calling the function
+            v = f(*args, **kwargs)
+            global_cache[args] = v
+            local_cache[args] = v
+            return v
+
+    return multiprocessing_cache
+
+@multiprocessing_cache
+def collect_syms_and_lines(obj_path, *,
+        objdump_tool=None,
+        **args):
+    symbol_pattern = re.compile(
+        '^(?P<addr>[0-9a-fA-F]+)'
+            '\s+.*'
+            '\s+(?P<size>[0-9a-fA-F]+)'
+            '\s+(?P<name>[^\s]+)\s*$')
+    line_pattern = re.compile(
+        '^\s+(?:'
+            # matches dir/file table
+            '(?P<no>[0-9]+)'
+                '(?:\s+(?P<dir>[0-9]+))?'
+                '\s+.*'
+                '\s+(?P<path>[^\s]+)'
+            # matches line opcodes
+            '|' '\[[^\]]*\]\s+'
+                '(?:'
+                    '(?P<op_special>Special)'
+                    '|' '(?P<op_copy>Copy)'
+                    '|' '(?P<op_end>End of Sequence)'
+                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
+                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
+                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
+                    '|' '.' ')*'
+            ')$', re.IGNORECASE)
+
+    # figure out symbol addresses and file+line ranges
+    syms = {}
+    sym_at = []
+    cmd = objdump_tool + ['-t', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = symbol_pattern.match(line)
+        if m:
+            name = m.group('name')
+            addr = int(m.group('addr'), 16)
+            size = int(m.group('size'), 16)
+            # ignore zero-sized symbols
+            if not size:
+                continue
+            # note multiple symbols can share a name
+            if name not in syms:
+                syms[name] = set()
+            syms[name].add((addr, size))
+            sym_at.append((addr, name, size))
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep largest/first when duplicates
+    sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
+    sym_at_ = []
+    for addr, name, size in sym_at:
+        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
+            sym_at_.append((addr, name, size))
+    sym_at = sym_at_
+
+    # state machine for dwarf line numbers, note that objdump's
+    # decodedline seems to have issues with multiple dir/file
+    # tables, which is why we need this
+    lines = []
+    line_at = []
+    dirs = {}
+    files = {}
+    op_file = 1
+    op_line = 1
+    op_addr = 0
+    cmd = objdump_tool + ['--dwarf=rawline', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = line_pattern.match(line)
+        if m:
+            if m.group('no') and not m.group('dir'):
+                # found a directory entry
+                dirs[int(m.group('no'))] = m.group('path')
+            elif m.group('no'):
+                # found a file entry
+                dir = int(m.group('dir'))
+                if dir in dirs:
+                    files[int(m.group('no'))] = os.path.join(
+                        dirs[dir],
+                        m.group('path'))
+                else:
+                    files[int(m.group('no'))] = m.group('path')
+            else:
+                # found a state machine update
+                if m.group('op_file'):
+                    op_file = int(m.group('op_file'), 0)
+                if m.group('op_line'):
+                    op_line = int(m.group('op_line'), 0)
+                if m.group('op_addr'):
+                    op_addr = int(m.group('op_addr'), 0)
+
+                if (m.group('op_special')
+                        or m.group('op_copy')
+                        or m.group('op_end')):
+                    file = os.path.abspath(files.get(op_file, '?'))
+                    lines.append((file, op_line, op_addr))
+                    line_at.append((op_addr, file, op_line))
+
+                if m.group('op_end'):
+                    op_file = 1
+                    op_line = 1
+                    op_addr = 0
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep first when duplicates
+    lines.sort()
+    lines_ = []
+    for file, line, addr in lines:
+        if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
+            lines_.append((file, line, addr))
+    lines = lines_
+
+    # sort and keep first when duplicates
+    line_at.sort()
+    line_at_ = []
+    for addr, file, line in line_at:
+        if len(line_at_) == 0 or line_at_[-1][0] != addr:
+            line_at_.append((addr, file, line))
+    line_at = line_at_
+
+    return syms, sym_at, lines, line_at
+
+
 def collect_decompressed(path, *,
         perf_tool=PERF_TOOL,
+        sources=None,
         everything=False,
         propagate=0,
         depth=1,
@@ -228,7 +397,7 @@ def collect_decompressed(path, *,
         '\s+(?P<event>[^:]+):')
     frame_pattern = re.compile(
         '\s+(?P<addr>\w+)'
-        '\s+(?P<sym>[^\s]+)'
+        '\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?'
         '\s+\((?P<dso>[^\)]+)\)')
     events = {
         'cycles':           'cycles',
@@ -254,6 +423,9 @@ def collect_decompressed(path, *,
     last_event = ''
     last_period = 0
     last_stack = []
+    deltas = co.defaultdict(lambda: {})
+    syms_ = co.defaultdict(lambda: {})
+    at_cache = {}
     results = {}
 
     def commit():
@@ -276,36 +448,117 @@ def collect_decompressed(path, *,
     for line in proc.stdout:
         # we need to process a lot of data, so wait to use regex as late
         # as possible
-        if not line:
-            continue
         if not line.startswith('\t'):
-            m = sample_pattern.match(line)
-            if m:
-                if last_stack:
-                    commit()
-                last_event = m.group('event')
-                last_filtered = last_event in events
-                last_period = int(m.group('period'), 0)
-                last_stack = []
+            if last_filtered:
+                commit()
+            last_filtered = False
+
+            if line:
+                m = sample_pattern.match(line)
+                if m and m.group('event') in events:
+                    last_filtered = True
+                    last_event = m.group('event')
+                    last_period = int(m.group('period'), 0)
+                    last_stack = []
+
         elif last_filtered:
             m = frame_pattern.match(line)
             if m:
                 # filter out internal/kernel functions
                 if not everything and (
                         m.group('sym').startswith('__')
-                        or m.group('dso').startswith('/usr/lib')
-                        or not m.group('sym')[:1].isalpha()):
+                        or m.group('sym').startswith('0')
+                        or m.group('sym').startswith('-')
+                        or m.group('sym').startswith('[')
+                        or m.group('dso').startswith('/usr/lib')):
                     continue
 
-                last_stack.append((
-                    m.group('dso'),
-                    m.group('sym'),
-                    int(m.group('addr'), 16)))
+                dso = m.group('dso')
+                sym = m.group('sym')
+                off = int(m.group('off'), 0) if m.group('off') else 0
+                addr_ = int(m.group('addr'), 16)
+
+                # get the syms/lines for the dso, this is cached
+                syms, sym_at, lines, line_at = collect_syms_and_lines(
+                    dso,
+                    **args)
+
+                # ASLR is tricky, we have symbols+offsets, but static symbols
+                # means we may have multiple options for each symbol.
+                #
+                # To try to solve this, we use previous seen symbols to build
+                # confidence for the correct ASLR delta. This means we may
+                # guess incorrectly for early symbols, but this will only affect
+                # a few samples.
+                if sym in syms:
+                    sym_addr_ = addr_ - off
+
+                    # track possible deltas?
+                    for sym_addr, size in syms[sym]:
+                        delta = sym_addr - sym_addr_
+                        if delta not in deltas[dso]:
+                            deltas[dso][delta] = sum(
+                                abs(a_+delta - a)
+                                for s, (a_, _) in syms_[dso].items()
+                                for a, _ in syms[s])
+                    for delta in deltas[dso].keys():
+                        deltas[dso][delta] += abs(sym_addr_+delta - sym_addr)
+                    syms_[dso][sym] = sym_addr_, size
+
+                    # guess the best delta
+                    delta, _ = min(deltas[dso].items(),
+                        key=lambda x: (x[1], x[0]))
+                    addr = addr_ + delta
+
+                    # cached?
+                    if (dso,addr) in at_cache:
+                        cached = at_cache[(dso,addr)]
+                        if cached is None:
+                            # cache says to skip
+                            continue
+                        file, line = cached
+                    else:
+                        # find file+line
+                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
+                        if i > 0:
+                            _, file, line = line_at[i-1]
+                        else:
+                            file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
+
+                        # ignore filtered sources
+                        if sources is not None:
+                            if not any(
+                                    os.path.abspath(file) == os.path.abspath(s)
+                                    for s in sources):
+                                at_cache[(dso,addr)] = None
+                                continue
+                        else:
+                            # default to only cwd
+                            if not everything and not os.path.commonpath([
+                                    os.getcwd(),
+                                    os.path.abspath(file)]) == os.getcwd():
+                                at_cache[(dso,addr)] = None
+                                continue
+
+                        # simplify path
+                        if os.path.commonpath([
+                                os.getcwd(),
+                                os.path.abspath(file)]) == os.getcwd():
+                            file = os.path.relpath(file)
+                        else:
+                            file = os.path.abspath(file)
+
+                        at_cache[(dso,addr)] = file, line
+                else:
+                    file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
+
+                last_stack.append((file, sym, line))
 
                 # stop propogating?
                 if propagate and len(last_stack) >= propagate:
+                    commit()
                     last_filtered = False
-    if last_stack:
+    if last_filtered:
         commit()
 
     proc.wait()
@@ -341,35 +594,15 @@ def starapply(args):
     f, args, kwargs = args
     return f(*args, **kwargs)
 
-def collect(paths, *,
+def collect(perf_paths, *,
         jobs=None,
-        objdump_tool=None,
-        sources=None,
-        everything=False,
         **args):
-    symbol_pattern = re.compile(
-        '^(?P<addr>[0-9a-fA-F]+)\s.*\s(?P<name>[^\s]+)\s*$')
-    line_pattern = re.compile(
-        '^\s+(?:'
-            # matches dir/file table
-            '(?P<no>[0-9]+)\s+'
-                '(?:(?P<dir>[0-9]+)\s+)?'
-                '.*\s+'
-                '(?P<path>[^\s]+)'
-            # matches line opcodes
-            '|' '\[[^\]]*\]\s+'
-                '(?:'
-                    '(?P<op_special>Special)'
-                    '|' '(?P<op_copy>Copy)'
-                    '|' '(?P<op_end>End of Sequence)'
-                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
-                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
-                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
-                    '|' '.' ')*'
-            ')$', re.IGNORECASE)
+    # automatic job detection?
+    if jobs == 0:
+        jobs = len(os.sched_getaffinity(0))
 
     records = []
-    for path in paths:
+    for path in perf_paths:
         # each .perf file is actually a zip file containing perf files from
         # multiple runs
         with zipfile.ZipFile(path) as z:
@@ -377,225 +610,17 @@ def collect(paths, *,
 
     # we're dealing with a lot of data but also surprisingly
     # parallelizable
-    dsos = {}
-    results = []
-    with mp.Pool(jobs or len(os.sched_getaffinity(0))) as p:
-        for results_ in p.imap_unordered(
-                starapply,
-                ((collect_job, (path, i), dict(
-                    everything=everything,
-                    **args))
-                    for path, i in records)):
-
-            # organize by dso
-            results__ = {}
-            for r in results_:
-                if r.file not in results__:
-                    results__[r.file] = []
-                results__[r.file].append(r)
-            results_ = results__
-
-            for dso, results_ in results_.items():
-                if dso not in dsos:
-                    # find file+line ranges for dsos
-                    #
-                    # do this here so we only process each dso once
-                    syms = {}
-                    sym_at = []
-                    cmd = objdump_tool + ['-t', dso]
-                    if args.get('verbose'):
-                        print(' '.join(shlex.quote(c) for c in cmd))
-                    proc = sp.Popen(cmd,
-                        stdout=sp.PIPE,
-                        stderr=sp.PIPE if not args.get('verbose') else None,
-                        universal_newlines=True,
-                        errors='replace',
-                        close_fds=False)
-                    for line in proc.stdout:
-                        m = symbol_pattern.match(line)
-                        if m:
-                            name = m.group('name')
-                            addr = int(m.group('addr'), 16)
-                            # note multiple symbols can share a name
-                            if name not in syms:
-                                syms[name] = set()
-                            syms[name].add(addr)
-                            sym_at.append((addr, name))
-                    proc.wait()
-                    if proc.returncode != 0:
-                        if not args.get('verbose'):
-                            for line in proc.stderr:
-                                sys.stdout.write(line)
-                        # assume no debug-info on failure
-                        pass
-
-                    # sort and keep first when duplicates
-                    sym_at.sort()
-                    sym_at_ = []
-                    for addr, name in sym_at:
-                        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
-                            sym_at_.append((addr, name))
-                    sym_at = sym_at_
-
-                    # state machine for dwarf line numbers, note that objdump's
-                    # decodedline seems to have issues with multiple dir/file
-                    # tables, which is why we need this
-                    line_at = []
-                    dirs = {}
-                    files = {}
-                    op_file = 1
-                    op_line = 1
-                    op_addr = 0
-                    cmd = objdump_tool + ['--dwarf=rawline', dso]
-                    if args.get('verbose'):
-                        print(' '.join(shlex.quote(c) for c in cmd))
-                    proc = sp.Popen(cmd,
-                        stdout=sp.PIPE,
-                        stderr=sp.PIPE if not args.get('verbose') else None,
-                        universal_newlines=True,
-                        errors='replace',
-                        close_fds=False)
-                    for line in proc.stdout:
-                        m = line_pattern.match(line)
-                        if m:
-                            if m.group('no') and not m.group('dir'):
-                                # found a directory entry
-                                dirs[int(m.group('no'))] = m.group('path')
-                            elif m.group('no'):
-                                # found a file entry
-                                dir = int(m.group('dir'))
-                                if dir in dirs:
-                                    files[int(m.group('no'))] = os.path.join(
-                                        dirs[dir],
-                                        m.group('path'))
-                                else:
-                                    files[int(m.group('no'))] = m.group('path')
-                            else:
-                                # found a state machine update
-                                if m.group('op_file'):
-                                    op_file = int(m.group('op_file'), 0)
-                                if m.group('op_line'):
-                                    op_line = int(m.group('op_line'), 0)
-                                if m.group('op_addr'):
-                                    op_addr = int(m.group('op_addr'), 0)
-
-                                if (m.group('op_special')
-                                        or m.group('op_copy')
-                                        or m.group('op_end')):
-                                    line_at.append((
-                                        op_addr,
-                                        files.get(op_file, '?'),
-                                        op_line))
-
-                                if m.group('op_end'):
-                                    op_file = 1
-                                    op_line = 1
-                                    op_addr = 0
-                    proc.wait()
-                    if proc.returncode != 0:
-                        if not args.get('verbose'):
-                            for line in proc.stderr:
-                                sys.stdout.write(line)
-                        # assume no debug-info on failure
-                        pass
-
-                    # sort and keep first when duplicates
-                    #
-                    # I think dwarf requires this to be sorted but just in case
-                    line_at.sort()
-                    line_at_ = []
-                    for addr, file, line in line_at:
-                        if len(line_at_) == 0 or line_at_[-1][0] != addr:
-                            line_at_.append((addr, file, line))
-                    line_at = line_at_
-
-                    # discard lines outside of the range of the containing
-                    # function, these are introduced by dwarf for inlined
-                    # functions but don't map to elf-level symbols
-                    sym_at_ = []
-                    for addr, sym in sym_at:
-                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
-                        if i > 0:
-                            _, file, line = line_at[i-1]
-                            sym_at_.append((file, line, sym))
-                    sym_at_.sort()
-
-                    line_at_ = []
-                    for addr, file, line in line_at:
-                        # only keep if sym-at-addr and sym-at-line match
-                        i = bisect.bisect(
-                            sym_at, addr, key=lambda x: x[0])
-                        j = bisect.bisect(
-                            sym_at_, (file, line), key=lambda x: (x[0], x[1]))
-                        if i > 0 and j > 0 and (
-                                sym_at[i-1][1] == sym_at_[j-1][2]):
-                            line_at_.append((addr, file, line))
-                    line_at = line_at_
-
-                    dsos[dso] = (syms, sym_at, line_at)
-
-                syms, _, line_at = dsos[dso]
-
-                # first try to reverse ASLR
-                def deltas(r, d):
-                    if '+' in r.function:
-                        sym, off = r.function.split('+', 1)
-                        off = int(off, 0)
-                    else:
-                        sym, off = r.function, 0
-                    addr = r.line - off + d
-
-                    for addr_ in syms.get(sym, []):
-                        yield addr_ - addr
-
-                delta = min(
-                    it.chain.from_iterable(
-                        deltas(r, 0) for r in results_),
-                    key=lambda d: sum(it.chain.from_iterable(
-                        deltas(r, d) for r in results_)),
-                    default=0)
-
-                # then try to map addrs -> file+line
-                #
-                # note we need to do this recursively
-                def remap(results):
-                    results_ = []
-                    for r in results:
-                        addr = r.line + delta
-                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
-                        if i > 0:
-                            _, file, line = line_at[i-1]
-                        else:
-                            file, line = re.sub('(\.o)?$', '.c', r.file, 1), 0
-
-                        # ignore filtered sources
-                        if sources is not None:
-                            if not any(
-                                    os.path.abspath(file) == os.path.abspath(s)
-                                    for s in sources):
-                                continue
-                        else:
-                            # default to only cwd
-                            if not everything and not os.path.commonpath([
-                                    os.getcwd(),
-                                    os.path.abspath(file)]) == os.getcwd():
-                                continue
-
-                        # simplify path
-                        if os.path.commonpath([
-                                os.getcwd(),
-                                os.path.abspath(file)]) == os.getcwd():
-                            file = os.path.relpath(file)
-                        else:
-                            file = os.path.abspath(file)
-
-                        function, *_ = r.function.split('+', 1)
-                        results_.append(r._replace(
-                            file=file, function=function, line=line,
-                            children=remap(r.children)))
-                    return results_
-
-                results.extend(remap(results_))
+    if jobs is not None:
+        results = []
+        with mp.Pool(jobs) as p:
+            for results_ in p.imap_unordered(
+                    starapply,
+                    ((collect_job, (path, i), args) for path, i in records)):
+                results.extend(results_)
+    else:
+        results = []
+        for path, i in records:
+            results.extend(collect_job(path, i, **args))
 
     return results
 
@@ -640,7 +665,7 @@ def fold(Result, results, *,
             Result, r.children,
             by=by,
             defines=defines)))
-    folded = folded_ 
+    folded = folded_
 
     return folded
 
@@ -983,7 +1008,6 @@ def report(perf_paths, *,
         fields=None,
         defines=None,
         sort=None,
-        self=False,
         branches=False,
         caches=False,
         **args):
@@ -1001,20 +1025,7 @@ def report(perf_paths, *,
 
     # find sizes
     if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in perf_paths:
-            if os.path.isdir(path):
-                path = path + '/*.perf'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .perf files found in %r?" % perf_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(perf_paths, **args)
     else:
         results = []
         with openio(args['use']) as f:
@@ -1124,8 +1135,7 @@ if __name__ == "__main__":
     parser.add_argument(
         'perf_paths',
         nargs=nargs,
-        help="Description of where to find *.perf files. May be a directory "
-            "or a list of paths. Defaults to %r." % PERF_PATHS)
+        help="Input *.perf files.")
     parser.add_argument(
         '-v', '--verbose',
         action='store_true',
@@ -1224,7 +1234,7 @@ if __name__ == "__main__":
         nargs='?',
         type=lambda x: tuple(float(x) for x in x.split(',')),
         const=THRESHOLD,
-        help="Show lines wth samples above this threshold as a percent of "
+        help="Show lines with samples above this threshold as a percent of "
             "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
     parser.add_argument(
         '-c', '--context',
@@ -1295,7 +1305,13 @@ if __name__ == "__main__":
 
     # perf_paths/command overlap, so need to do some munging here
     args.command = args.perf_paths
-    args.perf_paths = args.perf_paths or PERF_PATHS
+    if args.record:
+        if not args.command:
+            print('error: no command specified?')
+            sys.exit(-1)
+        if not args.output:
+            print('error: no output file specified?')
+            sys.exit(-1)
 
     sys.exit(main(**{k: v
         for k, v in vars(args).items()

+ 1252 - 0
scripts/perfbd.py

@@ -0,0 +1,1252 @@
+#!/usr/bin/env python3
+#
+# Aggregate and report call-stack propagated block-device operations
+# from trace output.
+#
+# Example:
+# ./scripts/bench.py -ttrace
+# ./scripts/perfbd.py trace -j -Flfs.c -Flfs_util.c -Serased -Sproged -Sreaded
+#
+# Copyright (c) 2022, The littlefs authors.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+import bisect
+import collections as co
+import csv
+import functools as ft
+import itertools as it
+import math as m
+import multiprocessing as mp
+import os
+import re
+import shlex
+import subprocess as sp
+
+
+OBJDUMP_TOOL = ['objdump']
+THRESHOLD = (0.5, 0.85)
+
+
+# integer fields
+class Int(co.namedtuple('Int', 'x')):
+    __slots__ = ()
+    def __new__(cls, x=0):
+        if isinstance(x, Int):
+            return x
+        if isinstance(x, str):
+            try:
+                x = int(x, 0)
+            except ValueError:
+                # also accept +-∞ and +-inf
+                if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
+                    x = m.inf
+                elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
+                    x = -m.inf
+                else:
+                    raise
+        assert isinstance(x, int) or m.isinf(x), x
+        return super().__new__(cls, x)
+
+    def __str__(self):
+        if self.x == m.inf:
+            return '∞'
+        elif self.x == -m.inf:
+            return '-∞'
+        else:
+            return str(self.x)
+
+    def __int__(self):
+        assert not m.isinf(self.x)
+        return self.x
+
+    def __float__(self):
+        return float(self.x)
+
+    none = '%7s' % '-'
+    def table(self):
+        return '%7s' % (self,)
+
+    diff_none = '%7s' % '-'
+    diff_table = table
+
+    def diff_diff(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        diff = new - old
+        if diff == +m.inf:
+            return '%7s' % '+∞'
+        elif diff == -m.inf:
+            return '%7s' % '-∞'
+        else:
+            return '%+7d' % diff
+
+    def ratio(self, other):
+        new = self.x if self else 0
+        old = other.x if other else 0
+        if m.isinf(new) and m.isinf(old):
+            return 0.0
+        elif m.isinf(new):
+            return +m.inf
+        elif m.isinf(old):
+            return -m.inf
+        elif not old and not new:
+            return 0.0
+        elif not old:
+            return 1.0
+        else:
+            return (new-old) / old
+
+    def __add__(self, other):
+        return self.__class__(self.x + other.x)
+
+    def __sub__(self, other):
+        return self.__class__(self.x - other.x)
+
+    def __mul__(self, other):
+        return self.__class__(self.x * other.x)
+
+# perf results
+class PerfBdResult(co.namedtuple('PerfBdResult', [
+        'file', 'function', 'line',
+        'readed', 'proged', 'erased',
+        'children'])):
+    _by = ['file', 'function', 'line']
+    _fields = ['readed', 'proged', 'erased']
+    _types = {'readed': Int, 'proged': Int, 'erased': Int}
+
+    __slots__ = ()
+    def __new__(cls, file='', function='', line=0,
+            readed=0, proged=0, erased=0,
+            children=[]):
+        return super().__new__(cls, file, function, int(Int(line)),
+            Int(readed), Int(proged), Int(erased),
+            children)
+
+    def __add__(self, other):
+        return PerfBdResult(self.file, self.function, self.line,
+            self.readed + other.readed,
+            self.proged + other.proged,
+            self.erased + other.erased,
+            self.children + other.children)
+
+
+def openio(path, mode='r', buffering=-1):
+    if path == '-':
+        if mode == 'r':
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
+        else:
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
+    else:
+        return open(path, mode, buffering)
+
+def collect_syms_and_lines(obj_path, *,
+        objdump_tool=None,
+        **args):
+    symbol_pattern = re.compile(
+        '^(?P<addr>[0-9a-fA-F]+)'
+            '\s+.*'
+            '\s+(?P<size>[0-9a-fA-F]+)'
+            '\s+(?P<name>[^\s]+)\s*$')
+    line_pattern = re.compile(
+        '^\s+(?:'
+            # matches dir/file table
+            '(?P<no>[0-9]+)'
+                '(?:\s+(?P<dir>[0-9]+))?'
+                '\s+.*'
+                '\s+(?P<path>[^\s]+)'
+            # matches line opcodes
+            '|' '\[[^\]]*\]\s+'
+                '(?:'
+                    '(?P<op_special>Special)'
+                    '|' '(?P<op_copy>Copy)'
+                    '|' '(?P<op_end>End of Sequence)'
+                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
+                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
+                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
+                    '|' '.' ')*'
+            ')$', re.IGNORECASE)
+
+    # figure out symbol addresses
+    syms = {}
+    sym_at = []
+    cmd = objdump_tool + ['-t', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = symbol_pattern.match(line)
+        if m:
+            name = m.group('name')
+            addr = int(m.group('addr'), 16)
+            size = int(m.group('size'), 16)
+            # ignore zero-sized symbols
+            if not size:
+                continue
+            # note multiple symbols can share a name
+            if name not in syms:
+                syms[name] = set()
+            syms[name].add((addr, size))
+            sym_at.append((addr, name, size))
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep largest/first when duplicates
+    sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
+    sym_at_ = []
+    for addr, name, size in sym_at:
+        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
+            sym_at_.append((addr, name, size))
+    sym_at = sym_at_
+
+    # state machine for dwarf line numbers, note that objdump's
+    # decodedline seems to have issues with multiple dir/file
+    # tables, which is why we need this
+    lines = []
+    line_at = []
+    dirs = {}
+    files = {}
+    op_file = 1
+    op_line = 1
+    op_addr = 0
+    cmd = objdump_tool + ['--dwarf=rawline', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = line_pattern.match(line)
+        if m:
+            if m.group('no') and not m.group('dir'):
+                # found a directory entry
+                dirs[int(m.group('no'))] = m.group('path')
+            elif m.group('no'):
+                # found a file entry
+                dir = int(m.group('dir'))
+                if dir in dirs:
+                    files[int(m.group('no'))] = os.path.join(
+                        dirs[dir],
+                        m.group('path'))
+                else:
+                    files[int(m.group('no'))] = m.group('path')
+            else:
+                # found a state machine update
+                if m.group('op_file'):
+                    op_file = int(m.group('op_file'), 0)
+                if m.group('op_line'):
+                    op_line = int(m.group('op_line'), 0)
+                if m.group('op_addr'):
+                    op_addr = int(m.group('op_addr'), 0)
+
+                if (m.group('op_special')
+                        or m.group('op_copy')
+                        or m.group('op_end')):
+                    file = os.path.abspath(files.get(op_file, '?'))
+                    lines.append((file, op_line, op_addr))
+                    line_at.append((op_addr, file, op_line))
+
+                if m.group('op_end'):
+                    op_file = 1
+                    op_line = 1
+                    op_addr = 0
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep first when duplicates
+    lines.sort()
+    lines_ = []
+    for file, line, addr in lines:
+        if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
+            lines_.append((file, line, addr))
+    lines = lines_
+
+    # sort and keep first when duplicates
+    line_at.sort()
+    line_at_ = []
+    for addr, file, line in line_at:
+        if len(line_at_) == 0 or line_at_[-1][0] != addr:
+            line_at_.append((addr, file, line))
+    line_at = line_at_
+
+    return syms, sym_at, lines, line_at
+
+
+def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
+        sources=None,
+        everything=False,
+        propagate=0,
+        depth=1,
+        **args):
+    trace_pattern = re.compile(
+        '^(?P<file>[^:]*):(?P<line>[0-9]+):trace:\s*(?P<prefix>[^\s]*?bd_)(?:'
+            '(?P<read>read)\('
+                '\s*(?P<read_ctx>\w+)' '\s*,'
+                '\s*(?P<read_block>\w+)' '\s*,'
+                '\s*(?P<read_off>\w+)' '\s*,'
+                '\s*(?P<read_buffer>\w+)' '\s*,'
+                '\s*(?P<read_size>\w+)' '\s*\)'
+            '|' '(?P<prog>prog)\('
+                '\s*(?P<prog_ctx>\w+)' '\s*,'
+                '\s*(?P<prog_block>\w+)' '\s*,'
+                '\s*(?P<prog_off>\w+)' '\s*,'
+                '\s*(?P<prog_buffer>\w+)' '\s*,'
+                '\s*(?P<prog_size>\w+)' '\s*\)'
+            '|' '(?P<erase>erase)\('
+                '\s*(?P<erase_ctx>\w+)' '\s*,'
+                '\s*(?P<erase_block>\w+)'
+                '\s*\(\s*(?P<erase_size>\w+)\s*\)' '\s*\)' ')\s*$')
+    frame_pattern = re.compile(
+        '^\s+at (?P<addr>\w+)\s*$')
+
+    # parse all of the trace files for read/prog/erase operations
+    last_filtered = False
+    last_file = None
+    last_line = None
+    last_sym = None
+    last_readed = 0
+    last_proged = 0
+    last_erased = 0
+    last_stack = []
+    last_delta = None
+    at_cache = {}
+    results = {}
+
+    def commit():
+        # fallback to just capturing top-level measurements
+        if not last_stack:
+            file = last_file
+            sym = last_sym
+            line = last_line
+
+            # ignore filtered sources
+            if sources is not None:
+                if not any(
+                        os.path.abspath(file)
+                            == os.path.abspath(s)
+                        for s in sources):
+                    return
+            else:
+                # default to only cwd
+                if not everything and not os.path.commonpath([
+                        os.getcwd(),
+                        os.path.abspath(file)]) == os.getcwd():
+                    return
+
+            # simplify path
+            if os.path.commonpath([
+                    os.getcwd(),
+                    os.path.abspath(file)]) == os.getcwd():
+                file = os.path.relpath(file)
+            else:
+                file = os.path.abspath(file)
+
+            results[(file, sym, line)] = (
+                last_readed,
+                last_proged,
+                last_erased,
+                {})
+        else:
+            # tail-recursively propagate measurements
+            for i in range(len(last_stack)):
+                results_ = results
+                for j in reversed(range(i+1)):
+                    if i+1-j > depth:
+                        break
+
+                    # propagate
+                    name = last_stack[j]
+                    if name in results_:
+                        r, p, e, children = results_[name]
+                    else:
+                        r, p, e, children = 0, 0, 0, {}
+                    results_[name] = (
+                        r+last_readed,
+                        p+last_proged,
+                        e+last_erased,
+                        children)
+
+                    # recurse
+                    results_ = results_[name][-1]
+
+    with openio(path) as f:
+        # try to jump to middle of file? need step out of utf8-safe mode and
+        # then resync up with the next newline to avoid parsing half a line
+        if start is not None and start > 0:
+            fd = f.fileno()
+            os.lseek(fd, start, os.SEEK_SET)
+            while os.read(fd, 1) not in {b'\n', b'\r', b''}:
+                pass
+            f = os.fdopen(fd)
+
+        for line in f:
+            # we have a lot of data, try to take a few shortcuts,
+            # string search is much faster than regex so try to use
+            # regex as late as possible.
+            if not line.startswith('\t'):
+                if last_filtered:
+                    commit()
+                last_filtered = False
+
+                # done processing our slice?
+                if stop is not None:
+                    if os.lseek(f.fileno(), 0, os.SEEK_CUR) > stop:
+                        break
+
+                if 'trace' in line and 'bd' in line:
+                    m = trace_pattern.match(line)
+                    if m:
+                        last_filtered = True
+                        last_file = os.path.abspath(m.group('file'))
+                        last_line = int(m.group('line'), 0)
+                        last_sym = m.group('prefix')
+                        last_readed = 0
+                        last_proged = 0
+                        last_erased = 0
+                        last_stack = []
+                        last_delta = None
+
+                        if m.group('read'):
+                            last_sym += m.group('read')
+                            last_readed += int(m.group('read_size'))
+                        elif m.group('prog'):
+                            last_sym += m.group('prog')
+                            last_proged += int(m.group('prog_size'))
+                        elif m.group('erase'):
+                            last_sym += m.group('erase')
+                            last_erased += int(m.group('erase_size'))
+
+            elif last_filtered:
+                m = frame_pattern.match(line)
+                if m:
+                    addr_ = int(m.group('addr'), 0)
+
+                    # before we can do anything with addr, we need to
+                    # reverse ASLR, fortunately we know the file+line of
+                    # the first stack frame, so we can use that as a point
+                    # of reference
+                    if last_delta is None:
+                        i = bisect.bisect(lines, (last_file, last_line),
+                            key=lambda x: (x[0], x[1]))
+                        if i > 0:
+                            last_delta = lines[i-1][2] - addr_
+                        else:
+                            # can't reverse ASLR, give up on backtrace
+                            commit()
+                            last_filtered = False
+                            continue
+
+                    addr = addr_ + last_delta
+
+                    # cached?
+                    if addr in at_cache:
+                        cached = at_cache[addr]
+                        if cached is None:
+                            # cache says to skip
+                            continue
+                        file, sym, line = cached
+                    else:
+                        # find sym
+                        i = bisect.bisect(sym_at, addr, key=lambda x: x[0])
+                        # check that we're actually in the sym's size
+                        if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]:
+                            _, sym, _ = sym_at[i-1]
+                        else:
+                            sym = hex(addr)
+
+                        # filter out internal/unknown functions
+                        if not everything and (
+                                sym.startswith('__')
+                                or sym.startswith('0')
+                                or sym.startswith('-')
+                                or sym == '_start'):
+                            at_cache[addr] = None
+                            continue
+
+                        # find file+line
+                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
+                        if i > 0:
+                            _, file, line = line_at[i-1]
+                        elif len(last_stack) == 0:
+                            file, line = last_file, last_line
+                        else:
+                            file, line = re.sub('(\.o)?$', '.c', obj_path, 1), 0
+
+                        # ignore filtered sources
+                        if sources is not None:
+                            if not any(
+                                    os.path.abspath(file)
+                                        == os.path.abspath(s)
+                                    for s in sources):
+                                at_cache[addr] = None
+                                continue
+                        else:
+                            # default to only cwd
+                            if not everything and not os.path.commonpath([
+                                    os.getcwd(),
+                                    os.path.abspath(file)]) == os.getcwd():
+                                at_cache[addr] = None
+                                continue
+
+                        # simplify path
+                        if os.path.commonpath([
+                                os.getcwd(),
+                                os.path.abspath(file)]) == os.getcwd():
+                            file = os.path.relpath(file)
+                        else:
+                            file = os.path.abspath(file)
+
+                        at_cache[addr] = file, sym, line
+
+                    last_stack.append((file, sym, line))
+
+                    # stop propagating?
+                    if propagate and len(last_stack) >= propagate:
+                        commit()
+                        last_filtered = False
+        if last_filtered:
+            commit()
+
+    # rearrange results into result type
+    def to_results(results):
+        results_ = []
+        for name, (r, p, e, children) in results.items():
+            results_.append(PerfBdResult(*name,
+                r, p, e,
+                children=to_results(children)))
+        return results_
+
+    return to_results(results)
+
+def starapply(args):
+    f, args, kwargs = args
+    return f(*args, **kwargs)
+
+def collect(obj_path, trace_paths, *,
+        jobs=None,
+        **args):
+    # automatic job detection?
+    if jobs == 0:
+        jobs = len(os.sched_getaffinity(0))
+
+    # find sym/line info to reverse ASLR
+    syms, sym_at, lines, line_at = collect_syms_and_lines(obj_path, **args)
+
+    if jobs is not None:
+        # try to split up files so that even single files can be processed
+        # in parallel
+        #
+        # this looks naive, since we're splitting up text files by bytes, but
+        # we do proper backtrace delimination in collect_job
+        trace_ranges = []
+        for path in trace_paths:
+            if path == '-':
+                trace_ranges.append([(None, None)])
+                continue
+
+            size = os.path.getsize(path)
+            if size == 0:
+                trace_ranges.append([(None, None)])
+                continue
+
+            perjob = m.ceil(size // jobs)
+            trace_ranges.append([(i, i+perjob) for i in range(0, size, perjob)])
+
+        results = []
+        with mp.Pool(jobs) as p:
+            for results_ in p.imap_unordered(
+                    starapply,
+                    ((collect_job, (path, start, stop,
+                        syms, sym_at, lines, line_at),
+                        args)
+                        for path, ranges in zip(trace_paths, trace_ranges)
+                        for start, stop in ranges)):
+                results.extend(results_)
+
+    else:
+        results = []
+        for path in trace_paths:
+            results.extend(collect_job(path, None, None,
+                syms, sym_at, lines, line_at,
+                **args))
+
+    return results
+
+
+def fold(Result, results, *,
+        by=None,
+        defines=None,
+        **_):
+    if by is None:
+        by = Result._by
+
+    for k in it.chain(by or [], (k for k, _ in defines or [])):
+        if k not in Result._by and k not in Result._fields:
+            print("error: could not find field %r?" % k)
+            sys.exit(-1)
+
+    # filter by matching defines
+    if defines is not None:
+        results_ = []
+        for r in results:
+            if all(getattr(r, k) in vs for k, vs in defines):
+                results_.append(r)
+        results = results_
+
+    # organize results into conflicts
+    folding = co.OrderedDict()
+    for r in results:
+        name = tuple(getattr(r, k) for k in by)
+        if name not in folding:
+            folding[name] = []
+        folding[name].append(r)
+
+    # merge conflicts
+    folded = []
+    for name, rs in folding.items():
+        folded.append(sum(rs[1:], start=rs[0]))
+
+    # fold recursively
+    folded_ = []
+    for r in folded:
+        folded_.append(r._replace(children=fold(
+            Result, r.children,
+            by=by,
+            defines=defines)))
+    folded = folded_
+
+    return folded
+
+def table(Result, results, diff_results=None, *,
+        by=None,
+        fields=None,
+        sort=None,
+        summary=False,
+        all=False,
+        percent=False,
+        depth=1,
+        **_):
+    all_, all = all, __builtins__.all
+
+    if by is None:
+        by = Result._by
+    if fields is None:
+        fields = Result._fields
+    types = Result._types
+
+    # fold again
+    results = fold(Result, results, by=by)
+    if diff_results is not None:
+        diff_results = fold(Result, diff_results, by=by)
+
+    # organize by name
+    table = {
+        ','.join(str(getattr(r, k) or '') for k in by): r
+        for r in results}
+    diff_table = {
+        ','.join(str(getattr(r, k) or '') for k in by): r
+        for r in diff_results or []}
+    names = list(table.keys() | diff_table.keys())
+
+    # sort again, now with diff info, note that python's sort is stable
+    names.sort()
+    if diff_results is not None:
+        names.sort(key=lambda n: tuple(
+            types[k].ratio(
+                getattr(table.get(n), k, None),
+                getattr(diff_table.get(n), k, None))
+            for k in fields),
+            reverse=True)
+    if sort:
+        for k, reverse in reversed(sort):
+            names.sort(key=lambda n: (getattr(table[n], k),)
+                if getattr(table.get(n), k, None) is not None else (),
+                reverse=reverse ^ (not k or k in Result._fields))
+
+    # build up our lines
+    lines = []
+
+    # header
+    header = []
+    header.append('%s%s' % (
+        ','.join(by),
+        ' (%d added, %d removed)' % (
+            sum(1 for n in table if n not in diff_table),
+            sum(1 for n in diff_table if n not in table))
+            if diff_results is not None and not percent else '')
+        if not summary else '')
+    if diff_results is None:
+        for k in fields:
+            header.append(k)
+    elif percent:
+        for k in fields:
+            header.append(k)
+    else:
+        for k in fields:
+            header.append('o'+k)
+        for k in fields:
+            header.append('n'+k)
+        for k in fields:
+            header.append('d'+k)
+    header.append('')
+    lines.append(header)
+
+    def table_entry(name, r, diff_r=None, ratios=[]):
+        entry = []
+        entry.append(name)
+        if diff_results is None:
+            for k in fields:
+                entry.append(getattr(r, k).table()
+                    if getattr(r, k, None) is not None
+                    else types[k].none)
+        elif percent:
+            for k in fields:
+                entry.append(getattr(r, k).diff_table()
+                    if getattr(r, k, None) is not None
+                    else types[k].diff_none)
+        else:
+            for k in fields:
+                entry.append(getattr(diff_r, k).diff_table()
+                    if getattr(diff_r, k, None) is not None
+                    else types[k].diff_none)
+            for k in fields:
+                entry.append(getattr(r, k).diff_table()
+                    if getattr(r, k, None) is not None
+                    else types[k].diff_none)
+            for k in fields:
+                entry.append(types[k].diff_diff(
+                        getattr(r, k, None),
+                        getattr(diff_r, k, None)))
+        if diff_results is None:
+            entry.append('')
+        elif percent:
+            entry.append(' (%s)' % ', '.join(
+                '+∞%' if t == +m.inf
+                else '-∞%' if t == -m.inf
+                else '%+.1f%%' % (100*t)
+                for t in ratios))
+        else:
+            entry.append(' (%s)' % ', '.join(
+                    '+∞%' if t == +m.inf
+                    else '-∞%' if t == -m.inf
+                    else '%+.1f%%' % (100*t)
+                    for t in ratios
+                    if t)
+                if any(ratios) else '')
+        return entry
+
+    # entries
+    if not summary:
+        for name in names:
+            r = table.get(name)
+            if diff_results is None:
+                diff_r = None
+                ratios = None
+            else:
+                diff_r = diff_table.get(name)
+                ratios = [
+                    types[k].ratio(
+                        getattr(r, k, None),
+                        getattr(diff_r, k, None))
+                    for k in fields]
+                if not all_ and not any(ratios):
+                    continue
+            lines.append(table_entry(name, r, diff_r, ratios))
+
+    # total
+    r = next(iter(fold(Result, results, by=[])), None)
+    if diff_results is None:
+        diff_r = None
+        ratios = None
+    else:
+        diff_r = next(iter(fold(Result, diff_results, by=[])), None)
+        ratios = [
+            types[k].ratio(
+                getattr(r, k, None),
+                getattr(diff_r, k, None))
+            for k in fields]
+    lines.append(table_entry('TOTAL', r, diff_r, ratios))
+
+    # find the best widths, note that column 0 contains the names and column -1
+    # the ratios, so those are handled a bit differently
+    widths = [
+        ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
+        for w, i in zip(
+            it.chain([23], it.repeat(7)),
+            range(len(lines[0])-1))]
+
+    # adjust the name width based on the expected call depth, though
+    # note this doesn't really work with unbounded recursion
+    if not summary and not m.isinf(depth):
+        widths[0] += 4*(depth-1)
+
+    # print the tree recursively
+    print('%-*s  %s%s' % (
+        widths[0], lines[0][0],
+        ' '.join('%*s' % (w, x)
+            for w, x in zip(widths[1:], lines[0][1:-1])),
+        lines[0][-1]))
+
+    if not summary:
+        def recurse(results_, depth_, prefixes=('', '', '', '')):
+            # rebuild our tables at each layer
+            table_ = {
+                ','.join(str(getattr(r, k) or '') for k in by): r
+                for r in results_}
+            names_ = list(table_.keys())
+
+            # sort again at each layer, keep in mind the numbers are
+            # changing as we descend
+            names_.sort()
+            if sort:
+                for k, reverse in reversed(sort):
+                    names_.sort(key=lambda n: (getattr(table_[n], k),)
+                        if getattr(table_.get(n), k, None) is not None else (),
+                        reverse=reverse ^ (not k or k in Result._fields))
+
+            for i, name in enumerate(names_):
+                r = table_[name]
+                is_last = (i == len(names_)-1)
+
+                print('%s%-*s  %s' % (
+                    prefixes[0+is_last],
+                    widths[0] - (
+                        len(prefixes[0+is_last])
+                        if not m.isinf(depth) else 0),
+                    name,
+                    ' '.join('%*s' % (w, x)
+                        for w, x in zip(
+                            widths[1:],
+                            table_entry(name, r)[1:]))))
+
+                # recurse?
+                if depth_ > 1:
+                    recurse(
+                        r.children,
+                        depth_-1,
+                        (prefixes[2+is_last] + "|-> ",
+                         prefixes[2+is_last] + "'-> ",
+                         prefixes[2+is_last] + "|   ",
+                         prefixes[2+is_last] + "    "))
+
+        # we have enough going on with diffing to make the top layer
+        # a special case
+        for name, line in zip(names, lines[1:-1]):
+            print('%-*s  %s%s' % (
+                widths[0], line[0],
+                ' '.join('%*s' % (w, x)
+                    for w, x in zip(widths[1:], line[1:-1])),
+                line[-1]))
+
+            if name in table and depth > 1:
+                recurse(
+                    table[name].children,
+                    depth-1,
+                    ("|-> ",
+                     "'-> ",
+                     "|   ",
+                     "    "))
+
+    print('%-*s  %s%s' % (
+        widths[0], lines[-1][0],
+        ' '.join('%*s' % (w, x)
+            for w, x in zip(widths[1:], lines[-1][1:-1])),
+        lines[-1][-1]))
+
+
+def annotate(Result, results, *,
+        annotate=None,
+        threshold=None,
+        read_threshold=None,
+        prog_threshold=None,
+        erase_threshold=None,
+        **args):
+    # figure out the thresholds
+    if threshold is None:
+        threshold = THRESHOLD
+    elif len(threshold) == 1:
+        threshold = threshold[0], threshold[0]
+
+    if read_threshold is None:
+        read_t0, read_t1 = threshold
+    elif len(read_threshold) == 1:
+        read_t0, read_t1 = read_threshold[0], read_threshold[0]
+    else:
+        read_t0, read_t1 = read_threshold
+    read_t0, read_t1 = min(read_t0, read_t1), max(read_t0, read_t1)
+
+    if prog_threshold is None:
+        prog_t0, prog_t1 = threshold
+    elif len(prog_threshold) == 1:
+        prog_t0, prog_t1 = prog_threshold[0], prog_threshold[0]
+    else:
+        prog_t0, prog_t1 = prog_threshold
+    prog_t0, prog_t1 = min(prog_t0, prog_t1), max(prog_t0, prog_t1)
+
+    if erase_threshold is None:
+        erase_t0, erase_t1 = threshold
+    elif len(erase_threshold) == 1:
+        erase_t0, erase_t1 = erase_threshold[0], erase_threshold[0]
+    else:
+        erase_t0, erase_t1 = erase_threshold
+    erase_t0, erase_t1 = min(erase_t0, erase_t1), max(erase_t0, erase_t1)
+
+    # find maxs
+    max_readed = max(it.chain((float(r.readed) for r in results), [1]))
+    max_proged = max(it.chain((float(r.proged) for r in results), [1]))
+    max_erased = max(it.chain((float(r.erased) for r in results), [1]))
+
+    for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
+        # flatten to line info
+        results = fold(Result, results, by=['file', 'line'])
+        table = {r.line: r for r in results if r.file == path}
+
+        # calculate spans to show
+        if not annotate:
+            spans = []
+            last = None
+            func = None
+            for line, r in sorted(table.items()):
+                if (float(r.readed) / max_readed >= read_t0
+                        or float(r.proged) / max_proged >= prog_t0
+                        or float(r.erased) / max_erased >= erase_t0):
+                    if last is not None and line - last.stop <= args['context']:
+                        last = range(
+                            last.start,
+                            line+1+args['context'])
+                    else:
+                        if last is not None:
+                            spans.append((last, func))
+                        last = range(
+                            line-args['context'],
+                            line+1+args['context'])
+                        func = r.function
+            if last is not None:
+                spans.append((last, func))
+
+        with open(path) as f:
+            skipped = False
+            for i, line in enumerate(f):
+                # skip lines not in spans?
+                if not annotate and not any(i+1 in s for s, _ in spans):
+                    skipped = True
+                    continue
+
+                if skipped:
+                    skipped = False
+                    print('%s@@ %s:%d: %s @@%s' % (
+                        '\x1b[36m' if args['color'] else '',
+                        path,
+                        i+1,
+                        next(iter(f for _, f in spans)),
+                        '\x1b[m' if args['color'] else ''))
+
+                # build line
+                if line.endswith('\n'):
+                    line = line[:-1]
+
+                if i+1 in table:
+                    r = table[i+1]
+                    line = '%-*s // %s readed, %s proged, %s erased' % (
+                        args['width'],
+                        line,
+                        r.readed,
+                        r.proged,
+                        r.erased)
+
+                    if args['color']:
+                        if (float(r.readed) / max_readed >= read_t1
+                                or float(r.proged) / max_proged >= prog_t1
+                                or float(r.erased) / max_erased >= erase_t1):
+                            line = '\x1b[1;31m%s\x1b[m' % line
+                        elif (float(r.readed) / max_readed >= read_t0
+                                or float(r.proged) / max_proged >= prog_t0
+                                or float(r.erased) / max_erased >= erase_t0):
+                            line = '\x1b[35m%s\x1b[m' % line
+
+                print(line)
+
+
+def report(obj_path='', trace_paths=[], *,
+        by=None,
+        fields=None,
+        defines=None,
+        sort=None,
+        **args):
+    # figure out what color should be
+    if args.get('color') == 'auto':
+        args['color'] = sys.stdout.isatty()
+    elif args.get('color') == 'always':
+        args['color'] = True
+    else:
+        args['color'] = False
+
+    # depth of 0 == m.inf
+    if args.get('depth') == 0:
+        args['depth'] = m.inf
+
+    # find sizes
+    if not args.get('use', None):
+        results = collect(obj_path, trace_paths, **args)
+    else:
+        results = []
+        with openio(args['use']) as f:
+            reader = csv.DictReader(f, restval='')
+            for r in reader:
+                try:
+                    results.append(PerfBdResult(
+                        **{k: r[k] for k in PerfBdResult._by
+                            if k in r and r[k].strip()},
+                        **{k: r['perfbd_'+k] for k in PerfBdResult._fields
+                            if 'perfbd_'+k in r and r['perfbd_'+k].strip()}))
+                except TypeError:
+                    pass
+
+    # fold
+    results = fold(PerfBdResult, results, by=by, defines=defines)
+
+    # sort, note that python's sort is stable
+    results.sort()
+    if sort:
+        for k, reverse in reversed(sort):
+            results.sort(key=lambda r: (getattr(r, k),)
+                if getattr(r, k) is not None else (),
+                reverse=reverse ^ (not k or k in PerfBdResult._fields))
+
+    # write results to CSV
+    if args.get('output'):
+        with openio(args['output'], 'w') as f:
+            writer = csv.DictWriter(f,
+                (by if by is not None else PerfBdResult._by)
+                + ['perfbd_'+k for k in PerfBdResult._fields])
+            writer.writeheader()
+            for r in results:
+                writer.writerow(
+                    {k: getattr(r, k)
+                        for k in (by if by is not None else PerfBdResult._by)}
+                    | {'perfbd_'+k: getattr(r, k)
+                        for k in PerfBdResult._fields})
+
+    # find previous results?
+    if args.get('diff'):
+        diff_results = []
+        try:
+            with openio(args['diff']) as f:
+                reader = csv.DictReader(f, restval='')
+                for r in reader:
+                    try:
+                        diff_results.append(PerfBdResult(
+                            **{k: r[k] for k in PerfBdResult._by
+                                if k in r and r[k].strip()},
+                            **{k: r['perfbd_'+k] for k in PerfBdResult._fields
+                                if 'perfbd_'+k in r
+                                    and r['perfbd_'+k].strip()}))
+                    except TypeError:
+                        pass
+        except FileNotFoundError:
+            pass
+
+        # fold
+        diff_results = fold(PerfBdResult, diff_results, by=by, defines=defines)
+
+    # print table
+    if not args.get('quiet'):
+        if (args.get('annotate')
+                or args.get('threshold')
+                or args.get('read_threshold')
+                or args.get('prog_threshold')
+                or args.get('erase_threshold')):
+            # annotate sources
+            annotate(PerfBdResult, results, **args)
+        else:
+            # print table
+            table(PerfBdResult, results,
+                diff_results if args.get('diff') else None,
+                by=by if by is not None else ['function'],
+                fields=fields,
+                sort=sort,
+                **args)
+
+
+def main(**args):
+    if args.get('record'):
+        return record(**args)
+    else:
+        return report(**args)
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Aggregate and report call-stack propagated "
+            "block-device operations from trace output.",
+        allow_abbrev=False)
+    parser.add_argument(
+        'obj_path',
+        nargs='?',
+        help="Input executable for mapping addresses to symbols.")
+    parser.add_argument(
+        'trace_paths',
+        nargs='*',
+        help="Input *.trace files.")
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help="Output commands that run behind the scenes.")
+    parser.add_argument(
+        '-q', '--quiet',
+        action='store_true',
+        help="Don't show anything, useful with -o.")
+    parser.add_argument(
+        '-o', '--output',
+        help="Specify CSV file to store results.")
+    parser.add_argument(
+        '-u', '--use',
+        help="Don't parse anything, use this CSV file.")
+    parser.add_argument(
+        '-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument(
+        '-a', '--all',
+        action='store_true',
+        help="Show all, not just the ones that changed.")
+    parser.add_argument(
+        '-p', '--percent',
+        action='store_true',
+        help="Only show percentage change, not a full diff.")
+    parser.add_argument(
+        '-b', '--by',
+        action='append',
+        choices=PerfBdResult._by,
+        help="Group by this field.")
+    parser.add_argument(
+        '-f', '--field',
+        dest='fields',
+        action='append',
+        choices=PerfBdResult._fields,
+        help="Show this field.")
+    parser.add_argument(
+        '-D', '--define',
+        dest='defines',
+        action='append',
+        type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
+        help="Only include results where this field is this value.")
+    class AppendSort(argparse.Action):
+        def __call__(self, parser, namespace, value, option):
+            if namespace.sort is None:
+                namespace.sort = []
+            namespace.sort.append((value, True if option == '-S' else False))
+    parser.add_argument(
+        '-s', '--sort',
+        action=AppendSort,
+        help="Sort by this fields.")
+    parser.add_argument(
+        '-S', '--reverse-sort',
+        action=AppendSort,
+        help="Sort by this fields, but backwards.")
+    parser.add_argument(
+        '-Y', '--summary',
+        action='store_true',
+        help="Only show the total.")
+    parser.add_argument(
+        '-F', '--source',
+        dest='sources',
+        action='append',
+        help="Only consider definitions in this file. Defaults to anything "
+            "in the current directory.")
+    parser.add_argument(
+        '--everything',
+        action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument(
+        '-P', '--propagate',
+        type=lambda x: int(x, 0),
+        help="Depth to propagate samples up the call-stack. 0 propagates up "
+            "to the entry point, 1 does no propagation. Defaults to 0.")
+    parser.add_argument(
+        '-Z', '--depth',
+        nargs='?',
+        type=lambda x: int(x, 0),
+        const=0,
+        help="Depth of function calls to show. 0 shows all calls but may not "
+            "terminate!")
+    parser.add_argument(
+        '-A', '--annotate',
+        action='store_true',
+        help="Show source files annotated with coverage info.")
+    parser.add_argument(
+        '-T', '--threshold',
+        nargs='?',
+        type=lambda x: tuple(float(x) for x in x.split(',')),
+        const=THRESHOLD,
+        help="Show lines with any ops above this threshold as a percent of "
+            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
+    parser.add_argument(
+        '--read-threshold',
+        nargs='?',
+        type=lambda x: tuple(float(x) for x in x.split(',')),
+        const=THRESHOLD,
+        help="Show lines with reads above this threshold as a percent of "
+            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
+    parser.add_argument(
+        '--prog-threshold',
+        nargs='?',
+        type=lambda x: tuple(float(x) for x in x.split(',')),
+        const=THRESHOLD,
+        help="Show lines with progs above this threshold as a percent of "
+            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
+    parser.add_argument(
+        '--erase-threshold',
+        nargs='?',
+        type=lambda x: tuple(float(x) for x in x.split(',')),
+        const=THRESHOLD,
+        help="Show lines with erases above this threshold as a percent of "
+            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
+    parser.add_argument(
+        '-c', '--context',
+        type=lambda x: int(x, 0),
+        default=3,
+        help="Show n additional lines of context. Defaults to 3.")
+    parser.add_argument(
+        '-W', '--width',
+        type=lambda x: int(x, 0),
+        default=80,
+        help="Assume source is styled with this many columns. Defaults to 80.")
+    parser.add_argument(
+        '--color',
+        choices=['never', 'always', 'auto'],
+        default='auto',
+        help="When to use terminal colors. Defaults to 'auto'.")
+    parser.add_argument(
+        '-j', '--jobs',
+        nargs='?',
+        type=lambda x: int(x, 0),
+        const=0,
+        help="Number of processes to use. 0 spawns one process per core.")
+    parser.add_argument(
+        '--objdump-tool',
+        type=lambda x: x.split(),
+        default=OBJDUMP_TOOL,
+        help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
+    sys.exit(main(**{k: v
+        for k, v in vars(parser.parse_intermixed_args()).items()
+        if v is not None}))

+ 6 - 18
scripts/plot.py

@@ -11,7 +11,6 @@
 
 import collections as co
 import csv
-import glob
 import io
 import itertools as it
 import math as m
@@ -19,7 +18,6 @@ import os
 import shutil
 import time
 
-CSV_PATHS = ['*.csv']
 COLORS = [
     '1;34', # bold blue
     '1;31', # bold red
@@ -80,14 +78,14 @@ def si(x, w=4):
         s = s.rstrip('.')
     return '%s%s%s' % ('-' if x < 0 else '', s, SI_PREFIXES[p])
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 class LinesIO:
     def __init__(self, maxlen=None):
@@ -356,16 +354,8 @@ class Plot:
 
 def collect(csv_paths, renames=[]):
     # collect results from CSV files
-    paths = []
-    for path in csv_paths:
-        if os.path.isdir(path):
-            path = path + '/*.csv'
-
-        for path in glob.glob(path):
-            paths.append(path)
-
     results = []
-    for path in paths:
+    for path in csv_paths:
         try:
             with openio(path) as f:
                 reader = csv.DictReader(f, restval='')
@@ -732,9 +722,7 @@ if __name__ == "__main__":
     parser.add_argument(
         'csv_paths',
         nargs='*',
-        default=CSV_PATHS,
-        help="Description of where to find *.csv files. May be a directory "
-            "or list of paths. Defaults to %r." % CSV_PATHS)
+        help="Input *.csv files.")
     parser.add_argument(
         '-b', '--by',
         action='append',

+ 4 - 4
scripts/prettyasserts.py

@@ -42,14 +42,14 @@ LEXEMES = {
 }
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 def write_header(f, limit=LIMIT):
     f.writeln("// Generated by %s:" % sys.argv[0])

+ 8 - 26
scripts/stack.py

@@ -12,15 +12,12 @@
 
 import collections as co
 import csv
-import glob
 import itertools as it
 import math as m
 import os
 import re
 
 
-CI_PATHS = ['*.ci']
-
 
 # integer fields
 class Int(co.namedtuple('Int', 'x')):
@@ -121,16 +118,16 @@ class StackResult(co.namedtuple('StackResult', [
             self.children | other.children)
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
-def collect(paths, *,
+def collect(ci_paths, *,
         sources=None,
         everything=False,
         **args):
@@ -167,7 +164,7 @@ def collect(paths, *,
     callgraph = co.defaultdict(lambda: (None, None, 0, set()))
     f_pattern = re.compile(
         r'([^\\]*)\\n([^:]*)[^\\]*\\n([0-9]+) bytes \((.*)\)')
-    for path in paths:
+    for path in ci_paths:
         with open(path) as f:
             vcg = parse_vcg(f.read())
         for k, graph in vcg:
@@ -546,20 +543,7 @@ def main(ci_paths,
 
     # find sizes
     if not args.get('use', None):
-        # find .ci files
-        paths = []
-        for path in ci_paths:
-            if os.path.isdir(path):
-                path = path + '/*.ci'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .ci files found in %r?" % ci_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(ci_paths, **args)
     else:
         results = []
         with openio(args['use']) as f:
@@ -644,9 +628,7 @@ if __name__ == "__main__":
     parser.add_argument(
         'ci_paths',
         nargs='*',
-        default=CI_PATHS,
-        help="Description of where to find *.ci files. May be a directory "
-            "or a list of paths. Defaults to %r." % CI_PATHS)
+        help="Input *.ci files.")
     parser.add_argument(
         '-v', '--verbose',
         action='store_true',

+ 13 - 30
scripts/struct_.py

@@ -12,7 +12,6 @@
 import collections as co
 import csv
 import difflib
-import glob
 import itertools as it
 import math as m
 import os
@@ -21,7 +20,6 @@ import shlex
 import subprocess as sp
 
 
-OBJ_PATHS = ['*.o']
 OBJDUMP_TOOL = ['objdump']
 
 
@@ -120,14 +118,14 @@ class StructResult(co.namedtuple('StructResult', ['file', 'struct', 'size'])):
             self.size + other.size)
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 def collect(obj_paths, *,
         objdump_tool=OBJDUMP_TOOL,
@@ -136,15 +134,15 @@ def collect(obj_paths, *,
         internal=False,
         **args):
     line_pattern = re.compile(
-        '^\s+(?P<no>[0-9]+)\s+'
-            '(?:(?P<dir>[0-9]+)\s+)?'
-            '.*\s+'
-            '(?P<path>[^\s]+)$')
+        '^\s+(?P<no>[0-9]+)'
+            '(?:\s+(?P<dir>[0-9]+))?'
+            '\s+.*'
+            '\s+(?P<path>[^\s]+)$')
     info_pattern = re.compile(
         '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
-            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
-            '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
-            '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
+            '|.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
+            '|.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
 
     results = []
     for path in obj_paths:
@@ -468,20 +466,7 @@ def main(obj_paths, *,
         **args):
     # find sizes
     if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in obj_paths:
-            if os.path.isdir(path):
-                path = path + '/*.o'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .o files found in %r?" % obj_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(obj_paths, **args)
     else:
         results = []
         with openio(args['use']) as f:
@@ -565,9 +550,7 @@ if __name__ == "__main__":
     parser.add_argument(
         'obj_paths',
         nargs='*',
-        default=OBJ_PATHS,
-        help="Description of where to find *.o files. May be a directory "
-            "or a list of paths. Defaults to %r." % OBJ_PATHS)
+        help="Input *.o files.")
     parser.add_argument(
         '-v', '--verbose',
         action='store_true',

+ 6 - 23
scripts/summary.py

@@ -15,15 +15,12 @@
 import collections as co
 import csv
 import functools as ft
-import glob
 import itertools as it
 import math as m
 import os
 import re
 
 
-CSV_PATHS = ['*.csv']
-
 # supported merge operations
 #
 # this is a terrible way to express these
@@ -548,14 +545,14 @@ def table(Result, results, diff_results=None, *,
             line[-1]))
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 def main(csv_paths, *,
         by=None,
@@ -605,20 +602,8 @@ def main(csv_paths, *,
         ops.update(ops_)
 
     # find CSV files
-    paths = []
-    for path in csv_paths:
-        if os.path.isdir(path):
-            path = path + '/*.csv'
-
-        for path in glob.glob(path):
-            paths.append(path)
-
-    if not paths:
-        print("error: no .csv files found in %r?" % csv_paths)
-        sys.exit(-1)
-
     results = []
-    for path in paths:
+    for path in csv_paths:
         try:
             with openio(path) as f:
                 reader = csv.DictReader(f, restval='')
@@ -721,9 +706,7 @@ if __name__ == "__main__":
     parser.add_argument(
         'csv_paths',
         nargs='*',
-        default=CSV_PATHS,
-        help="Description of where to find *.csv files. May be a directory "
-            "or list of paths. Defaults to %r." % CSV_PATHS)
+        help="Input *.csv files.")
     parser.add_argument(
         '-q', '--quiet',
         action='store_true',

+ 7 - 4
scripts/tailpipe.py

@@ -17,14 +17,14 @@ import sys
 import time
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 class LinesIO:
     def __init__(self, maxlen=None):
@@ -104,6 +104,9 @@ def main(path='-', *, lines=5, cat=False, sleep=0.01, keep_open=False):
                 break
             # don't just flood open calls
             time.sleep(sleep or 0.1)
+    except FileNotFoundError as e:
+        print("error: file not found %r" % path)
+        sys.exit(-1)
     except KeyboardInterrupt:
         pass
 

+ 36 - 26
scripts/test.py

@@ -35,19 +35,12 @@ VALGRIND_TOOL = ['valgrind']
 PERF_SCRIPT = ['./scripts/perf.py']
 
 
-def openio(path, mode='r', buffering=-1, nb=False):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r', buffering)
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w', buffering)
-    elif nb and 'a' in mode:
-        return os.fdopen(os.open(
-                path,
-                os.O_WRONLY | os.O_CREAT | os.O_APPEND | os.O_NONBLOCK,
-                0o666),
-            mode,
-            buffering)
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
         return open(path, mode, buffering)
 
@@ -549,6 +542,12 @@ def find_runner(runner, **args):
         cmd.append('-d%s' % args['disk'])
     if args.get('trace'):
         cmd.append('-t%s' % args['trace'])
+    if args.get('trace_backtrace'):
+        cmd.append('--trace-backtrace')
+    if args.get('trace_period'):
+        cmd.append('--trace-period=%s' % args['trace_period'])
+    if args.get('trace_freq'):
+        cmd.append('--trace-freq=%s' % args['trace_freq'])
     if args.get('read_sleep'):
         cmd.append('--read-sleep=%s' % args['read_sleep'])
     if args.get('prog_sleep'):
@@ -764,7 +763,7 @@ class TestFailure(Exception):
         self.stdout = stdout
         self.assert_ = assert_
 
-def run_stage(name, runner_, ids, output_, **args):
+def run_stage(name, runner_, ids, stdout_, trace_, output_, **args):
     # get expected suite/case/perm counts
     (case_suites,
         expected_suite_perms,
@@ -805,7 +804,6 @@ def run_stage(name, runner_, ids, output_, **args):
         os.close(spty)
         children.add(proc)
         mpty = os.fdopen(mpty, 'r', 1)
-        stdout = None
 
         last_id = None
         last_stdout = []
@@ -822,18 +820,12 @@ def run_stage(name, runner_, ids, output_, **args):
                 if not line:
                     break
                 last_stdout.append(line)
-                if args.get('stdout'):
+                if stdout_:
                     try:
-                        if not stdout:
-                            stdout = openio(args['stdout'], 'a', 1, nb=True)
-                        stdout.write(line)
-                    except OSError as e:
-                        if e.errno not in [
-                                errno.ENXIO,
-                                errno.EPIPE,
-                                errno.EAGAIN]:
-                            raise
-                        stdout = None
+                        stdout_.write(line)
+                        stdout_.flush()
+                    except BrokenPipeError:
+                        pass
                 if args.get('verbose'):
                     sys.stdout.write(line)
 
@@ -1063,6 +1055,8 @@ def run(runner, test_ids=[], **args):
                 by or 'tests',
                 runner_,
                 [by] if by is not None else test_ids,
+                stdout,
+                trace,
                 output,
                 **args)
         # collect passes/failures
@@ -1076,9 +1070,15 @@ def run(runner, test_ids=[], **args):
     stop = time.time()
 
     if stdout:
-        stdout.close()
+        try:
+            stdout.close()
+        except BrokenPipeError:
+            pass
     if trace:
-        trace.close()
+        try:
+            trace.close()
+        except BrokenPipeError:
+            pass
     if output:
         output.close()
 
@@ -1284,6 +1284,16 @@ if __name__ == "__main__":
     test_parser.add_argument(
         '-t', '--trace',
         help="Direct trace output to this file.")
+    test_parser.add_argument(
+        '--trace-backtrace',
+        action='store_true',
+        help="Include a backtrace with every trace statement.")
+    test_parser.add_argument(
+        '--trace-period',
+        help="Sample trace output at this period in cycles.")
+    test_parser.add_argument(
+        '--trace-freq',
+        help="Sample trace output at this frequency in hz.")
     test_parser.add_argument(
         '-O', '--stdout',
         help="Direct stdout to this file. Note stderr is already merged here.")
@@ -1361,7 +1371,7 @@ if __name__ == "__main__":
         default=VALGRIND_TOOL,
         help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL)
     test_parser.add_argument(
-        '--perf',
+        '-p', '--perf',
         help="Run under Linux's perf to sample performance counters, writing "
             "samples to this file.")
     test_parser.add_argument(

+ 28 - 20
scripts/tracebd.py

@@ -41,14 +41,14 @@ CHARS_BRAILLE = (
     '⠋⢋⡋⣋⠫⢫⡫⣫⠏⢏⡏⣏⠯⢯⡯⣯' '⠛⢛⡛⣛⠻⢻⡻⣻⠟⢟⡟⣟⠿⢿⡿⣿')
 
 
-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
     if path == '-':
         if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
         else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
     else:
-        return open(path, mode)
+        return open(path, mode, buffering)
 
 class LinesIO:
     def __init__(self, maxlen=None):
@@ -663,29 +663,30 @@ def main(path='-', *,
 
     # parse a line of trace output
     pattern = re.compile(
-        'trace.*?bd_(?:'
+        '^(?P<file>[^:]*):(?P<line>[0-9]+):trace:.*?bd_(?:'
             '(?P<create>create\w*)\('
                 '(?:'
                     'block_size=(?P<block_size>\w+)'
                     '|' 'block_count=(?P<block_count>\w+)'
                     '|' '.*?' ')*' '\)'
             '|' '(?P<read>read)\('
-                '\s*(?P<read_ctx>\w+)\s*' ','
-                '\s*(?P<read_block>\w+)\s*' ','
-                '\s*(?P<read_off>\w+)\s*' ','
-                '\s*(?P<read_buffer>\w+)\s*' ','
-                '\s*(?P<read_size>\w+)\s*' '\)'
+                '\s*(?P<read_ctx>\w+)' '\s*,'
+                '\s*(?P<read_block>\w+)' '\s*,'
+                '\s*(?P<read_off>\w+)' '\s*,'
+                '\s*(?P<read_buffer>\w+)' '\s*,'
+                '\s*(?P<read_size>\w+)' '\s*\)'
             '|' '(?P<prog>prog)\('
-                '\s*(?P<prog_ctx>\w+)\s*' ','
-                '\s*(?P<prog_block>\w+)\s*' ','
-                '\s*(?P<prog_off>\w+)\s*' ','
-                '\s*(?P<prog_buffer>\w+)\s*' ','
-                '\s*(?P<prog_size>\w+)\s*' '\)'
+                '\s*(?P<prog_ctx>\w+)' '\s*,'
+                '\s*(?P<prog_block>\w+)' '\s*,'
+                '\s*(?P<prog_off>\w+)' '\s*,'
+                '\s*(?P<prog_buffer>\w+)' '\s*,'
+                '\s*(?P<prog_size>\w+)' '\s*\)'
             '|' '(?P<erase>erase)\('
-                '\s*(?P<erase_ctx>\w+)\s*' ','
-                '\s*(?P<erase_block>\w+)\s*' '\)'
+                '\s*(?P<erase_ctx>\w+)' '\s*,'
+                '\s*(?P<erase_block>\w+)'
+                '\s*\(\s*(?P<erase_size>\w+)\s*\)' '\s*\)'
             '|' '(?P<sync>sync)\('
-                '\s*(?P<sync_ctx>\w+)\s*' '\)' ')')
+                '\s*(?P<sync_ctx>\w+)' '\s*\)' ')\s*$')
     def parse(line):
         nonlocal bd
 
@@ -694,7 +695,7 @@ def main(path='-', *,
         # through here
         if 'trace' not in line or 'bd' not in line:
             return False
-        m = pattern.search(line)
+        m = pattern.match(line)
         if not m:
             return False
 
@@ -748,12 +749,16 @@ def main(path='-', *,
 
         elif m.group('erase') and (erase or wear):
             block = int(m.group('erase_block'), 0)
+            size = int(m.group('erase_size'), 0)
 
             if block_stop is not None and block >= block_stop:
                 return False
             block -= block_start
+            if off_stop is not None:
+                size = min(size, off_stop)
+            off = -off_start
 
-            bd.erase(block)
+            bd.erase(block, off, size)
             return True
 
         else:
@@ -818,6 +823,9 @@ def main(path='-', *,
                 break
             # don't just flood open calls
             time.sleep(sleep or 0.1)
+    except FileNotFoundError as e:
+        print("error: file not found %r" % path)
+        sys.exit(-1)
     except KeyboardInterrupt:
         pass