Răsfoiți Sursa

Added allocation randomization for dynamic wear-leveling

This implements the second step of full dynamic wear-leveling, block
allocation randomization. This is the key part the uniformly distributes
wear across the filesystem, even through reboots.

The entropy actually comes from the filesystem itself, by xoring
together all of the CRCs in the metadata-pairs on the filesystem. While
this sounds like a ridiculous operation, it's easy to do when we already
scan the metadata-pairs at mount time.

This gives us a random number we can use for block allocation.
Unfortunately it's not a great general purpose random generator as the
output only changes every filesystem write. Fortunately that's exactly
when we need our allocator.

---

Additionally, the randomization created a mess for the testing
framework. Fortunately, this method of randomization is deterministic.
A very useful property for reproducing bugs.
Christopher Haster 7 ani în urmă
părinte
comite
126ef8b07f
10 a modificat fișierele cu 254 adăugiri și 63 ștergeri
  1. 93 11
      emubd/lfs_emubd.c
  2. 4 0
      emubd/lfs_emubd.h
  3. 9 6
      lfs.c
  4. 1 0
      lfs.h
  5. 35 31
      tests/corrupt.py
  6. 98 0
      tests/debug.py
  7. 2 2
      tests/stats.py
  8. 7 6
      tests/test_corrupt.sh
  9. 4 6
      tests/test_move.sh
  10. 1 1
      tests/test_orphan.sh

+ 93 - 11
emubd/lfs_emubd.c

@@ -19,6 +19,40 @@
 #include <inttypes.h>
 
 
+// Emulated block device utils
+static inline void lfs_emubd_tole32(lfs_emubd_t *emu) {
+    emu->cfg.read_size     = lfs_tole32(emu->cfg.read_size);
+    emu->cfg.prog_size     = lfs_tole32(emu->cfg.prog_size);
+    emu->cfg.block_size    = lfs_tole32(emu->cfg.block_size);
+    emu->cfg.block_count   = lfs_tole32(emu->cfg.block_count);
+
+    emu->stats.read_count  = lfs_tole32(emu->stats.read_count);
+    emu->stats.prog_count  = lfs_tole32(emu->stats.prog_count);
+    emu->stats.erase_count = lfs_tole32(emu->stats.erase_count);
+
+    for (int i = 0; i < sizeof(emu->history.blocks) /
+            sizeof(emu->history.blocks[0]); i++) {
+        emu->history.blocks[i] = lfs_tole32(emu->history.blocks[i]);
+    }
+}
+
+static inline void lfs_emubd_fromle32(lfs_emubd_t *emu) {
+    emu->cfg.read_size     = lfs_fromle32(emu->cfg.read_size);
+    emu->cfg.prog_size     = lfs_fromle32(emu->cfg.prog_size);
+    emu->cfg.block_size    = lfs_fromle32(emu->cfg.block_size);
+    emu->cfg.block_count   = lfs_fromle32(emu->cfg.block_count);
+
+    emu->stats.read_count  = lfs_fromle32(emu->stats.read_count);
+    emu->stats.prog_count  = lfs_fromle32(emu->stats.prog_count);
+    emu->stats.erase_count = lfs_fromle32(emu->stats.erase_count);
+
+    for (int i = 0; i < sizeof(emu->history.blocks) /
+            sizeof(emu->history.blocks[0]); i++) {
+        emu->history.blocks[i] = lfs_fromle32(emu->history.blocks[i]);
+    }
+}
+
+
 // Block device emulated on existing filesystem
 int lfs_emubd_create(const struct lfs_config *cfg, const char *path) {
     lfs_emubd_t *emu = cfg->context;
@@ -46,20 +80,39 @@ int lfs_emubd_create(const struct lfs_config *cfg, const char *path) {
     }
 
     // Load stats to continue incrementing
-    snprintf(emu->child, LFS_NAME_MAX, "stats");
+    snprintf(emu->child, LFS_NAME_MAX, ".stats");
     FILE *f = fopen(emu->path, "r");
     if (!f) {
-        return -errno;
-    }
+        memset(&emu->stats, 0, sizeof(emu->stats));
+    } else {
+        size_t res = fread(&emu->stats, sizeof(emu->stats), 1, f);
+        lfs_emubd_fromle32(emu);
+        if (res < 1) {
+            return -errno;
+        }
 
-    size_t res = fread(&emu->stats, sizeof(emu->stats), 1, f);
-    if (res < 1) {
-        return -errno;
+        err = fclose(f);
+        if (err) {
+            return -errno;
+        }
     }
 
-    err = fclose(f);
-    if (err) {
-        return -errno;
+    // Load history
+    snprintf(emu->child, LFS_NAME_MAX, ".history");
+    f = fopen(emu->path, "r");
+    if (!f) {
+        memset(&emu->history, 0, sizeof(emu->history));
+    } else {
+        size_t res = fread(&emu->history, sizeof(emu->history), 1, f);
+        lfs_emubd_fromle32(emu);
+        if (res < 1) {
+            return -errno;
+        }
+
+        err = fclose(f);
+        if (err) {
+            return -errno;
+        }
     }
 
     return 0;
@@ -161,6 +214,13 @@ int lfs_emubd_prog(const struct lfs_config *cfg, lfs_block_t block,
         return -errno;
     }
 
+    // update history and stats
+    if (block != emu->history.blocks[0]) {
+        memcpy(&emu->history.blocks[1], &emu->history.blocks[0],
+                sizeof(emu->history) - sizeof(emu->history.blocks[0]));
+        emu->history.blocks[0] = block;
+    }
+
     emu->stats.prog_count += 1;
     return 0;
 }
@@ -206,13 +266,15 @@ int lfs_emubd_sync(const struct lfs_config *cfg) {
     lfs_emubd_t *emu = cfg->context;
 
     // Just write out info/stats for later lookup
-    snprintf(emu->child, LFS_NAME_MAX, "config");
+    snprintf(emu->child, LFS_NAME_MAX, ".config");
     FILE *f = fopen(emu->path, "w");
     if (!f) {
         return -errno;
     }
 
+    lfs_emubd_tole32(emu);
     size_t res = fwrite(&emu->cfg, sizeof(emu->cfg), 1, f);
+    lfs_emubd_fromle32(emu);
     if (res < 1) {
         return -errno;
     }
@@ -222,13 +284,33 @@ int lfs_emubd_sync(const struct lfs_config *cfg) {
         return -errno;
     }
 
-    snprintf(emu->child, LFS_NAME_MAX, "stats");
+    snprintf(emu->child, LFS_NAME_MAX, ".stats");
     f = fopen(emu->path, "w");
     if (!f) {
         return -errno;
     }
 
+    lfs_emubd_tole32(emu);
     res = fwrite(&emu->stats, sizeof(emu->stats), 1, f);
+    lfs_emubd_fromle32(emu);
+    if (res < 1) {
+        return -errno;
+    }
+
+    err = fclose(f);
+    if (err) {
+        return -errno;
+    }
+
+    snprintf(emu->child, LFS_NAME_MAX, ".history");
+    f = fopen(emu->path, "w");
+    if (!f) {
+        return -errno;
+    }
+
+    lfs_emubd_tole32(emu);
+    res = fwrite(&emu->history, sizeof(emu->history), 1, f);
+    lfs_emubd_fromle32(emu);
     if (res < 1) {
         return -errno;
     }

+ 4 - 0
emubd/lfs_emubd.h

@@ -45,6 +45,10 @@ typedef struct lfs_emubd {
         uint64_t erase_count;
     } stats;
 
+    struct {
+        lfs_block_t blocks[4];
+    } history;
+
     struct {
         uint32_t read_size;
         uint32_t prog_size;

+ 9 - 6
lfs.c

@@ -879,6 +879,8 @@ static int32_t lfs_dir_fetchmatch(lfs_t *lfs,
                 dir->tail[1] = temptail[1];
                 dir->split = tempsplit;
                 dir->locals = templocals;
+
+                lfs->seed ^= crc;
                 crc = 0xffffffff;
             } else {
                 err = lfs_bd_crc32(lfs, dir->pair[0],
@@ -2874,6 +2876,7 @@ static int lfs_init(lfs_t *lfs, const struct lfs_config *cfg) {
     lfs->root[0] = 0xffffffff;
     lfs->root[1] = 0xffffffff;
     lfs->mlist = NULL;
+    lfs->seed = 0;
     lfs->globals.s.movepair[0] = 0xffffffff;
     lfs->globals.s.movepair[1] = 0xffffffff;
     lfs->globals.s.moveid = 0x3ff;
@@ -2962,12 +2965,6 @@ int lfs_mount(lfs_t *lfs, const struct lfs_config *cfg) {
         return err;
     }
 
-    // setup free lookahead
-    lfs->free.off = 0;
-    lfs->free.size = 0;
-    lfs->free.i = 0;
-    lfs_alloc_ack(lfs);
-
     // load superblock
     lfs_mdir_t root;
     err = lfs_dir_fetch(lfs, &root, (const lfs_block_t[2]){0, 1});
@@ -3065,6 +3062,12 @@ int lfs_mount(lfs_t *lfs, const struct lfs_config *cfg) {
                 lfs->globals.s.moveid);
     }
 
+    // setup free lookahead
+    lfs->free.off = lfs->seed % lfs->cfg->block_size;
+    lfs->free.size = 0;
+    lfs->free.i = 0;
+    lfs_alloc_ack(lfs);
+
     return 0;
 
 cleanup:

+ 1 - 0
lfs.h

@@ -382,6 +382,7 @@ typedef struct lfs {
 
     lfs_block_t root[2];
     lfs_mlist_t *mlist;
+    uint32_t seed;
 
     lfs_global_t globals;
     lfs_global_t locals;

+ 35 - 31
tests/corrupt.py

@@ -3,37 +3,41 @@
 import struct
 import sys
 import os
+import argparse
 
-def main(*paths):
-    # find most recent block
-    file = None
-    rev = None
-    for path in paths:
-        try:
-            nfile = open(path, 'r+b')
-            nrev, = struct.unpack('<I', nfile.read(4))
-
-            assert rev != nrev
-            if not file or ((rev - nrev) & 0x80000000):
-                file = nfile
-                rev = nrev
-        except IOError:
-            pass
-
-    # go to last commit
-    tag = 0
-    while True:
-        try:
-            ntag, = struct.unpack('<I', file.read(4))
-        except struct.error:
-            break
-
-        tag ^= ntag
-        file.seek(tag & 0xfff, os.SEEK_CUR)
-
-    # lob off last 3 bytes
-    file.seek(-((tag & 0xfff) + 3), os.SEEK_CUR)
-    file.truncate()
+def corrupt(block):
+    with open(block, 'r+b') as file:
+        # skip rev
+        file.read(4)
+
+        # go to last commit
+        tag = 0
+        while True:
+            try:
+                ntag, = struct.unpack('<I', file.read(4))
+            except struct.error:
+                break
+
+            tag ^= ntag
+            file.seek(tag & 0xfff, os.SEEK_CUR)
+
+        # lob off last 3 bytes
+        file.seek(-((tag & 0xfff) + 3), os.SEEK_CUR)
+        file.truncate()
+
+def main(args):
+    if args.n or not args.blocks:
+        with open('blocks/.history', 'rb') as file:
+            for i in range(int(args.n or 1)):
+                last, = struct.unpack('<I', file.read(4))
+                args.blocks.append('blocks/%x' % last)
+
+    for block in args.blocks:
+        print 'corrupting %s' % block
+        corrupt(block)
 
 if __name__ == "__main__":
-    main(*sys.argv[1:])
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-n')
+    parser.add_argument('blocks', nargs='*')
+    main(parser.parse_args())

+ 98 - 0
tests/debug.py

@@ -0,0 +1,98 @@
+#!/usr/bin/env python2
+
+import struct
+import binascii
+
+TYPES = {
+    (0x1ff, 0x001): 'reg',
+    (0x1ff, 0x002): 'dir',
+    (0x1ff, 0x011): 'superblock',
+    (0x1ff, 0x012): 'root',
+    (0x1ff, 0x030): 'delete',
+    (0x1f0, 0x080): 'globals',
+    (0x1ff, 0x0c0): 'tail soft',
+    (0x1ff, 0x0c1): 'tail hard',
+    (0x1ff, 0x0f0): 'crc',
+    (0x1ff, 0x040): 'struct dir',
+    (0x1ff, 0x041): 'struct inline',
+    (0x1ff, 0x042): 'struct ctz',
+    (0x100, 0x100): 'attr',
+}
+
+def typeof(type):
+    for prefix in range(9):
+        mask = 0x1ff & ~((1 << prefix)-1)
+        if (mask, type & mask) in TYPES:
+            return TYPES[mask, type & mask] + (
+                ' [%0*x]' % (prefix/4, type & ((1 << prefix)-1))
+                if prefix else '')
+    else:
+        return '[%02x]' % type
+
+def main(*blocks):
+    # find most recent block
+    file = None
+    rev = None
+    crc = None
+    versions = []
+
+    for block in blocks:
+        try:
+            nfile = open(block, 'rb')
+            ndata = nfile.read(4)
+            ncrc = binascii.crc32(ndata)
+            nrev, = struct.unpack('<I', ndata)
+
+            assert rev != nrev
+            if not file or ((rev - nrev) & 0x80000000):
+                file = nfile
+                rev = nrev
+                crc = ncrc
+
+            versions.append((nrev, '%s (rev %d)' % (block, nrev)))
+        except IOError:
+            pass
+
+    print "--- %s ---" % ', '.join(v for _,v in sorted(versions, reverse=True))
+
+    # go through each tag, print useful information
+    print "%-4s  %-8s  %-14s  %3s  %3s  %s" % (
+        'off', 'tag', 'type', 'id', 'len', 'dump')
+
+    tag = 0
+    off = 4
+    while True:
+        try:
+            data = file.read(4)
+            crc = binascii.crc32(data, crc)
+            ntag, = struct.unpack('<I', data)
+        except struct.error:
+            break
+
+        tag ^= ntag
+        off += 4
+
+        type = (tag & 0x7fc00000) >> 22
+        id   = (tag & 0x003ff000) >> 12
+        size = (tag & 0x00000fff) >> 0
+
+        data = file.read(size)
+        if type == 0x0f0:
+            crc = binascii.crc32(data[:4], crc)
+        else:
+            crc = binascii.crc32(data, crc)
+
+        print '%04x: %08x  %-14s  %3s  %3d  %-23s  %-8s' % (
+            off, tag,
+            typeof(type) + (' bad!' if type == 0x0f0 and ~crc else ''),
+            id if id != 0x3ff else '.', size,
+            ' '.join('%02x' % ord(c) for c in data[:8]),
+            ''.join(c if c >= ' ' and c <= '~' else '.' for c in data[:8]))
+
+        off += tag & 0xfff
+        if type == 0x0f0:
+            crc = 0
+
+if __name__ == "__main__":
+    import sys
+    main(*sys.argv[1:])

+ 2 - 2
tests/stats.py

@@ -7,7 +7,7 @@ import os
 import re
 
 def main():
-    with open('blocks/config') as file:
+    with open('blocks/.config') as file:
         s = struct.unpack('<LLLL', file.read())
         print 'read_size: %d' % s[0]
         print 'prog_size: %d' % s[1]
@@ -18,7 +18,7 @@ def main():
         os.path.getsize(os.path.join('blocks', f))
         for f in os.listdir('blocks') if re.match('\d+', f))
 
-    with open('blocks/stats') as file:
+    with open('blocks/.stats') as file:
         s = struct.unpack('<QQQ', file.read())
         print 'read_count: %d' % s[0]
         print 'prog_count: %d' % s[1]

+ 7 - 6
tests/test_corrupt.sh

@@ -71,24 +71,25 @@ echo "--- Sanity check ---"
 rm -rf blocks
 lfs_mktree
 lfs_chktree
+BLOCKS="$(ls blocks | grep -vw '[01]')"
 
 echo "--- Block corruption ---"
-for i in {2..33}
+for b in $BLOCKS
 do 
     rm -rf blocks
     mkdir blocks
-    ln -s /dev/zero blocks/$(printf '%x' $i)
+    ln -s /dev/zero blocks/$b
     lfs_mktree
     lfs_chktree
 done
 
 echo "--- Block persistance ---"
-for i in {2..33}
+for b in $BLOCKS
 do 
     rm -rf blocks
     mkdir blocks
     lfs_mktree
-    chmod a-w blocks/$(printf '%x' $i) || true
+    chmod a-w blocks/$b
     lfs_mktree
     lfs_chktree
 done
@@ -96,7 +97,7 @@ done
 echo "--- Big region corruption ---"
 rm -rf blocks
 mkdir blocks
-for i in {2..255}
+for i in {2..512}
 do
     ln -s /dev/zero blocks/$(printf '%x' $i)
 done
@@ -106,7 +107,7 @@ lfs_chktree
 echo "--- Alternating corruption ---"
 rm -rf blocks
 mkdir blocks
-for i in {2..511..2}
+for i in {2..1024..2}
 do
     ln -s /dev/zero blocks/$(printf '%x' $i)
 done

+ 4 - 6
tests/test_move.sh

@@ -59,7 +59,7 @@ tests/test.py << TEST
     lfs_rename(&lfs, "b/hello", "c/hello") => 0;
     lfs_unmount(&lfs) => 0;
 TEST
-tests/corrupt.py blocks/{4,5}
+tests/corrupt.py -n 1
 tests/test.py << TEST
     lfs_mount(&lfs, &cfg) => 0;
     lfs_dir_open(&lfs, &dir[0], "b") => 0;
@@ -86,8 +86,7 @@ tests/test.py << TEST
     lfs_rename(&lfs, "c/hello", "d/hello") => 0;
     lfs_unmount(&lfs) => 0;
 TEST
-tests/corrupt.py blocks/{6,7}
-tests/corrupt.py blocks/{8,9}
+tests/corrupt.py -n 2
 tests/test.py << TEST
     lfs_mount(&lfs, &cfg) => 0;
     lfs_dir_open(&lfs, &dir[0], "c") => 0;
@@ -166,7 +165,7 @@ tests/test.py << TEST
     lfs_rename(&lfs, "b/hi", "c/hi") => 0;
     lfs_unmount(&lfs) => 0;
 TEST
-tests/corrupt.py blocks/{4,5}
+tests/corrupt.py -n 1
 tests/test.py << TEST
     lfs_mount(&lfs, &cfg) => 0;
     lfs_dir_open(&lfs, &dir[0], "b") => 0;
@@ -193,8 +192,7 @@ tests/test.py << TEST
     lfs_rename(&lfs, "c/hi", "d/hi") => 0;
     lfs_unmount(&lfs) => 0;
 TEST
-tests/corrupt.py blocks/{6,7}
-tests/corrupt.py blocks/{8,9}
+tests/corrupt.py -n 2
 tests/test.py << TEST
     lfs_mount(&lfs, &cfg) => 0;
     lfs_dir_open(&lfs, &dir[0], "c") => 0;

+ 1 - 1
tests/test_orphan.sh

@@ -17,7 +17,7 @@ tests/test.py << TEST
 TEST
 # corrupt most recent commit, this should be the update to the previous
 # linked-list entry and should orphan the child
-tests/corrupt.py blocks/{6,7}
+tests/corrupt.py
 tests/test.py << TEST
     lfs_mount(&lfs, &cfg) => 0;