Browse Source

Fixed hidden orphans by separating deorphan search into two passes

This happens in rare situations where there is a failed mdir relocation,
interrupted by a power-loss, containing the destination of a directory
rename operation, where the directory being renamed preceded the
relocating mdir in the mdir tail-list. This requires at some point for a
previous directory rename to create a cycle.

If this happens, it's possible for the half-orphan to contain the only
reference to the renamed directory. Since half-orphans contain outdated
state when viewed through the mdir tail-list, the renamed directory
appears to be a full-orphan until we fix the relocating half-orphan.
This causes littlefs to incorrectly remove the renamed directory from
the mdir tail-list, causes catastrophic problems down the line.

The source of the problem is that the two different types of orphans
really operate on two different levels of abstraction: half-orphans fix
failed mdir commits, while full-orphans fix directory removes/renames.
Conflating the two leads to situations where we attempt to fix assumed
problems about the directory tree before we have fixed problems with the
mdir state.

The fix here is to separate out the deorphan search into two passes: one
to fix half-orphans and correct any mdir-commits, restoring the mdirs
and gstate to a known good state, then two to fix failed
removes/renames.

---

This was found with the -Plinear heuristic powerloss testing, which now
runs on more geometries. The failing case was:

  test_relocations_reentrant_renames:112gg261dk1e3f3:123456789abcdefg1h1i1j1k1
  l1m1n1o1p1q1r1s1t1u1v1g2h2i2j2k2l2m2n2o2p2q2r2s2t2

Also fixed/tweaked some parts of the test framework as a part of finding
this bug:

- Fixed off-by-one in exhaustive powerloss state encoding.

- Added --gdb-powerloss-before and --gdb-powerloss-after to help debug
  state changes through a failing powerloss, maybe this should be
  expanded to any arbitrary powerloss number in the future.

- Added lfs_emubd_crc and lfs_emubd_bdcrc to get block/bd crcs for quick
  state comparisons while debugging.

- Fixed bd read/prog/erase counts not being copied during exhaustive
  powerloss testing.

- Fixed small typo in lfs_emubd trace.
Christopher Haster 3 năm trước cách đây
mục cha
commit
eba5553314
6 tập tin đã thay đổi với 154 bổ sung39 xóa
  1. 58 1
      bd/lfs_emubd.c
  2. 7 0
      bd/lfs_emubd.h
  3. 46 37
      lfs.c
  4. 1 1
      runners/test_runner.c
  5. 4 0
      scripts/bench.py
  6. 38 0
      scripts/test.py

+ 58 - 1
bd/lfs_emubd.c

@@ -452,7 +452,7 @@ int lfs_emubd_erase(const struct lfs_config *cfg, lfs_block_t block) {
         }
     }
 
-    LFS_EMUBD_TRACE("lfs_emubd_prog -> %d", 0);
+    LFS_EMUBD_TRACE("lfs_emubd_erase -> %d", 0);
     return 0;
 }
 
@@ -468,6 +468,60 @@ int lfs_emubd_sync(const struct lfs_config *cfg) {
 
 /// Additional extended API for driving test features ///
 
+static int lfs_emubd_rawcrc(const struct lfs_config *cfg,
+        lfs_block_t block, uint32_t *crc) {
+    lfs_emubd_t *bd = cfg->context;
+
+    // check if crc is valid
+    LFS_ASSERT(block < cfg->block_count);
+
+    // crc the block
+    uint32_t crc_ = 0xffffffff;
+    const lfs_emubd_block_t *b = bd->blocks[block];
+    if (b) {
+        crc_ = lfs_crc(crc_, b->data, cfg->block_size);
+    } else {
+        uint8_t erase_value = (bd->cfg->erase_value != -1)
+                ? bd->cfg->erase_value
+                : 0;
+        for (lfs_size_t i = 0; i < cfg->block_size; i++) {
+            crc_ = lfs_crc(crc_, &erase_value, 1);
+        }
+    }
+    *crc = 0xffffffff ^ crc_;
+
+    return 0;
+}
+
+int lfs_emubd_crc(const struct lfs_config *cfg,
+        lfs_block_t block, uint32_t *crc) {
+    LFS_EMUBD_TRACE("lfs_emubd_crc(%p, %"PRIu32", %p)",
+            (void*)cfg, block, crc);
+    int err = lfs_emubd_rawcrc(cfg, block, crc);
+    LFS_EMUBD_TRACE("lfs_emubd_crc -> %d", err);
+    return err;
+}
+
+int lfs_emubd_bdcrc(const struct lfs_config *cfg, uint32_t *crc) {
+    LFS_EMUBD_TRACE("lfs_emubd_bdcrc(%p, %p)", (void*)cfg, crc);
+
+    uint32_t crc_ = 0xffffffff;
+    for (lfs_block_t i = 0; i < cfg->block_count; i++) {
+        uint32_t i_crc;
+        int err = lfs_emubd_rawcrc(cfg, i, &i_crc);
+        if (err) {
+            LFS_EMUBD_TRACE("lfs_emubd_bdcrc -> %d", err);
+            return err;
+        }
+
+        crc_ = lfs_crc(crc_, &i_crc, sizeof(uint32_t));
+    }
+    *crc = 0xffffffff ^ crc_;
+
+    LFS_EMUBD_TRACE("lfs_emubd_bdcrc -> %d", 0);
+    return 0;
+}
+
 lfs_emubd_sio_t lfs_emubd_getreaded(const struct lfs_config *cfg) {
     LFS_EMUBD_TRACE("lfs_emubd_getreaded(%p)", (void*)cfg);
     lfs_emubd_t *bd = cfg->context;
@@ -591,6 +645,9 @@ int lfs_emubd_copy(const struct lfs_config *cfg, lfs_emubd_t *copy) {
     }
 
     // other state
+    copy->readed = bd->readed;
+    copy->proged = bd->proged;
+    copy->erased = bd->erased;
     copy->power_cycles = bd->power_cycles;
     copy->disk = bd->disk;
     if (copy->disk) {

+ 7 - 0
bd/lfs_emubd.h

@@ -181,6 +181,13 @@ int lfs_emubd_sync(const struct lfs_config *cfg);
 
 /// Additional extended API for driving test features ///
 
+// A CRC of a block for debugging purposes
+int lfs_emubd_crc(const struct lfs_config *cfg,
+        lfs_block_t block, uint32_t *crc);
+
+// A CRC of the entire block device for debugging purposes
+int lfs_emubd_bdcrc(const struct lfs_config *cfg, uint32_t *crc);
+
 // Get total amount of bytes read
 lfs_emubd_sio_t lfs_emubd_getreaded(const struct lfs_config *cfg);
 

+ 46 - 37
lfs.c

@@ -4485,8 +4485,17 @@ static int lfs_fs_deorphan(lfs_t *lfs, bool powerloss) {
     }
 
     int8_t found = 0;
+
 restart:
-    {
+    // Check for orphans in two separate passes:
+    // - 1 for half-orphans (relocations)
+    // - 2 for full-orphans (removes/renames)
+    //
+    // Two separate passes are needed as half-orphans can contain outdated
+    // references to full-orphans, effectively hiding them from the deorphan
+    // search.
+    //
+    for (int pass = 0; pass < 2; pass++) {
         // Fix any orphans
         lfs_mdir_t pdir = {.split = true, .tail = {0, 1}};
         lfs_mdir_t dir;
@@ -4507,42 +4516,7 @@ restart:
                     return tag;
                 }
 
-                // note we only check for full orphans if we may have had a
-                // power-loss, otherwise orphans are created intentionally
-                // during operations such as lfs_mkdir
-                if (tag == LFS_ERR_NOENT && powerloss) {
-                    // we are an orphan
-                    LFS_DEBUG("Fixing orphan {0x%"PRIx32", 0x%"PRIx32"}",
-                            pdir.tail[0], pdir.tail[1]);
-
-                    // steal state
-                    err = lfs_dir_getgstate(lfs, &dir, &lfs->gdelta);
-                    if (err) {
-                        return err;
-                    }
-
-                    // steal tail
-                    lfs_pair_tole32(dir.tail);
-                    int state = lfs_dir_orphaningcommit(lfs, &pdir, LFS_MKATTRS(
-                            {LFS_MKTAG(LFS_TYPE_TAIL + dir.split, 0x3ff, 8),
-                                dir.tail}));
-                    lfs_pair_fromle32(dir.tail);
-                    if (state < 0) {
-                        return state;
-                    }
-
-                    found += 1;
-
-                    // did our commit create more orphans?
-                    if (state == LFS_OK_ORPHANED) {
-                        goto restart;
-                    }
-
-                    // refetch tail
-                    continue;
-                }
-
-                if (tag != LFS_ERR_NOENT) {
+                if (pass == 0 && tag != LFS_ERR_NOENT) {
                     lfs_block_t pair[2];
                     lfs_stag_t state = lfs_dir_get(lfs, &parent,
                             LFS_MKTAG(0x7ff, 0x3ff, 0), tag, pair);
@@ -4592,6 +4566,41 @@ restart:
                         continue;
                     }
                 }
+
+                // note we only check for full orphans if we may have had a
+                // power-loss, otherwise orphans are created intentionally
+                // during operations such as lfs_mkdir
+                if (pass == 1 && tag == LFS_ERR_NOENT && powerloss) {
+                    // we are an orphan
+                    LFS_DEBUG("Fixing orphan {0x%"PRIx32", 0x%"PRIx32"}",
+                            pdir.tail[0], pdir.tail[1]);
+
+                    // steal state
+                    err = lfs_dir_getgstate(lfs, &dir, &lfs->gdelta);
+                    if (err) {
+                        return err;
+                    }
+
+                    // steal tail
+                    lfs_pair_tole32(dir.tail);
+                    int state = lfs_dir_orphaningcommit(lfs, &pdir, LFS_MKATTRS(
+                            {LFS_MKTAG(LFS_TYPE_TAIL + dir.split, 0x3ff, 8),
+                                dir.tail}));
+                    lfs_pair_fromle32(dir.tail);
+                    if (state < 0) {
+                        return state;
+                    }
+
+                    found += 1;
+
+                    // did our commit create more orphans?
+                    if (state == LFS_OK_ORPHANED) {
+                        goto restart;
+                    }
+
+                    // refetch tail
+                    continue;
+                }
             }
 
             pdir = dir;

+ 1 - 1
runners/test_runner.c

@@ -1688,7 +1688,7 @@ static void run_powerloss_exhaustive_layer(
             fprintf(stderr, "error: exhaustive: out of memory\n");
             exit(-1);
         }
-        *cycle = i;
+        *cycle = i+1;
 
         printf("powerloss ");
         perm_printid(suite, case_, cycles->cycles, cycles->cycle_count);

+ 4 - 0
scripts/bench.py

@@ -1144,8 +1144,12 @@ def run(runner, bench_ids=[], **args):
         cmd = runner_ + [failure.id]
 
         if args.get('gdb_main'):
+            # we don't really need the case breakpoint here, but it
+            # can be helpful
+            path, lineno = find_path(runner_, failure.id, **args)
             cmd[:0] = args['gdb_path'] + [
                 '-ex', 'break main',
+                '-ex', 'break %s:%d' % (path, lineno),
                 '-ex', 'run',
                 '--args']
         elif args.get('gdb_case'):

+ 38 - 0
scripts/test.py

@@ -1138,14 +1138,20 @@ def run(runner, test_ids=[], **args):
 
     # drop into gdb?
     if failures and (args.get('gdb')
+            or args.get('gdb_powerloss_before')
+            or args.get('gdb_powerloss_after')
             or args.get('gdb_case')
             or args.get('gdb_main')):
         failure = failures[0]
         cmd = runner_ + [failure.id]
 
         if args.get('gdb_main'):
+            # we don't really need the case breakpoint here, but it
+            # can be helpful
+            path, lineno = find_path(runner_, failure.id, **args)
             cmd[:0] = args['gdb_path'] + [
                 '-ex', 'break main',
+                '-ex', 'break %s:%d' % (path, lineno),
                 '-ex', 'run',
                 '--args']
         elif args.get('gdb_case'):
@@ -1154,6 +1160,30 @@ def run(runner, test_ids=[], **args):
                 '-ex', 'break %s:%d' % (path, lineno),
                 '-ex', 'run',
                 '--args']
+        elif args.get('gdb_powerloss_before'):
+            # figure out how many powerlosses there were
+            powerlosses = (
+                sum(1 for _ in re.finditer('[0-9a-f]',
+                    failure.id.split(':', 2)[-1]))
+                if failure.id.count(':') >= 2 else 0)
+            path, lineno = find_path(runner_, failure.id, **args)
+            cmd[:0] = args['gdb_path'] + [
+                '-ex', 'break %s:%d' % (path, lineno),
+                '-ex', 'ignore 1 %d' % max(powerlosses-1, 0),
+                '-ex', 'run',
+                '--args']
+        elif args.get('gdb_powerloss_after'):
+            # figure out how many powerlosses there were
+            powerlosses = (
+                sum(1 for _ in re.finditer('[0-9a-f]',
+                    failure.id.split(':', 2)[-1]))
+                if failure.id.count(':') >= 2 else 0)
+            path, lineno = find_path(runner_, failure.id, **args)
+            cmd[:0] = args['gdb_path'] + [
+                '-ex', 'break %s:%d' % (path, lineno),
+                '-ex', 'ignore 1 %d' % powerlosses,
+                '-ex', 'run',
+                '--args']
         elif failure.assert_ is not None:
             cmd[:0] = args['gdb_path'] + [
                 '-ex', 'run',
@@ -1342,6 +1372,14 @@ if __name__ == "__main__":
         '--gdb',
         action='store_true',
         help="Drop into gdb on test failure.")
+    test_parser.add_argument(
+        '--gdb-powerloss-before',
+        action='store_true',
+        help="Drop into gdb before the powerloss that caused the failure.")
+    test_parser.add_argument(
+        '--gdb-powerloss-after',
+        action='store_true',
+        help="Drop into gdb after the powerloss that caused the failure.")
     test_parser.add_argument(
         '--gdb-case',
         action='store_true',