Skip to content

Filesystem corruption when truncating a file that is still open with another file handle #1177

@Ictogan1

Description

@Ictogan1

LittleFS will corrupted if:

  1. A file is opened for writing and filled(to a non-inlinable size), then synced.
  2. A second file handle is made for the same file, this one is used to truncate the file and then closed.
  3. Now the filesystem on disk represents the truncated file, while the first file handle still sees the larger file without having the LFS_F_DIRTY flag set.
  4. Now a new file can be created. When this file is filled, it may allocate blocks that are still "in use" by the first file handle.
  5. If this file is closed after being filled, the filesystem now contains a truncated version of the first file and the full version of the second file.
  6. If the first file handle is now used to write just a small amount of additional data to the first file, the filesystem now contains the full-sized version of the first file again as well as the second file, despite the second file already having overwritten blocks from the first file.
  7. The files are now corrupted.

Test case to prove this issue:

[cases.test_old_file_handles_after_modification]
#defines.SIZE = [10, 100]
defines.BLOCK_SIZE = [512]
defines.CACHE_SIZE = [512]
defines.PROG_SIZE = [16]
defines.ERASE_SIZE = [512]
defines.BLOCK_COUNT = [16]
code = '''
    lfs_t lfs;
    lfs_format(&lfs, cfg) => 0;
    lfs_mount(&lfs, cfg) => 0;
    lfs_file_t files[3];
    lfs_file_open(&lfs, &files[0], "e", LFS_O_WRONLY | LFS_O_CREAT) => 0;
        lfs_size_t write_size = 1000;
        uint8_t* data = malloc(write_size);

        lfs_size_t file_size = 6000;
    for (lfs_size_t bytes_written = 0; bytes_written<file_size; bytes_written+=write_size) {
        lfs_file_write(&lfs, &files[0], data, write_size)=>write_size;
                lfs_file_sync(&lfs, &files[0]) => 0;
    }
        // at this point, filesystem contains 1 file with 6000 bytes and files[0] is an open write-only handle for that file
        // since the latest changes have been synced to the disk, LFS_F_DIRTY is not set(this will be come important)

        // first round of asserts to ensure filesystem state is as expected
    lfs_dir_t dir;
    lfs_dir_open(&lfs, &dir, "/") => 0;
    struct lfs_info info;
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, ".") == 0);
    assert(info.type == LFS_TYPE_DIR);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "..") == 0);
    assert(info.type == LFS_TYPE_DIR);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "e") == 0);
    assert(info.type == LFS_TYPE_REG);
    assert(info.size == file_size);
        lfs_dir_close(&lfs, &dir);

        // open a second file handle for the same file, truncate it to 100 bytes and close the second handle
        lfs_file_open(&lfs, &files[1], "e", LFS_O_RDWR) => 0;
        lfs_file_truncate(&lfs, &files[1], 100);
        lfs_file_close(&lfs, &files[1]) => 0;

        // at this point, the filesystem contains 1 file of 100 bytes, but files[0] is still open with another handle of the same file that has a "CoW snapshot view" of the file still being 6000 bytes
        // second rounds to assert filesystem state
    lfs_dir_open(&lfs, &dir, "/") => 0;
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, ".") == 0);
    assert(info.type == LFS_TYPE_DIR);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "..") == 0);
    assert(info.type == LFS_TYPE_DIR);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "e") == 0);
    assert(info.type == LFS_TYPE_REG);
        assert(info.size == 100);
        lfs_dir_close(&lfs, &dir);

        // open another file, fill it to 6000 bytes
        // this is possible because the filesystems does not consider the blocks in use by files[0] to be in use since they do not represent the on-disk state, but it also is not marked as dirty!
    lfs_file_open(&lfs, &files[2], "f", LFS_O_WRONLY | LFS_O_CREAT) => 0;
    for (lfs_size_t bytes_written = 0; bytes_written<file_size; bytes_written+=write_size) {
        lfs_file_write(&lfs, &files[2], data, write_size)=>write_size;
                lfs_file_sync(&lfs, &files[2]) => 0;
    }
        lfs_file_close(&lfs, &files[2]) => 0;

        // another round of filesystem checks - here everything is still ok
    lfs_dir_open(&lfs, &dir, "/") => 0;
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, ".") == 0);
    assert(info.type == LFS_TYPE_DIR);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "..") == 0);
    assert(info.type == LFS_TYPE_DIR);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "e") == 0);
    assert(info.type == LFS_TYPE_REG);
        assert(info.size == 100);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "f") == 0);
    assert(info.type == LFS_TYPE_REG);
        assert(info.size == file_size);
        lfs_dir_close(&lfs, &dir);

        // now write to files[0]. this restores "e" to it's full 6000(now 6001) byte size. the filesystem does not check if the blocks it is now writing to are already used!
        lfs_file_write(&lfs, &files[0], data, 1) => 1;
        lfs_file_close(&lfs, &files[0]) => 0;

        // third round of filesystem checks - we now have two files of 6000 bytes each on an 8192 byte block device, which should be impossible!
    lfs_dir_open(&lfs, &dir, "/") => 0;
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, ".") == 0);
    assert(info.type == LFS_TYPE_DIR);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "..") == 0);
    assert(info.type == LFS_TYPE_DIR);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "e") == 0);
    assert(info.type == LFS_TYPE_REG);
        assert(info.size == file_size+1);
    lfs_dir_read(&lfs, &dir, &info) => 1;
    assert(strcmp(info.name, "f") == 0);
    assert(info.type == LFS_TYPE_REG);
        assert(info.size == file_size);
        lfs_dir_close(&lfs, &dir);
    lfs_unmount(&lfs) => 0;

        // remount and trying to read confirms that the filesystem is now corrupted!
    lfs_mount(&lfs, cfg) => 0;
        lfs_file_open(&lfs, &files[0], "e", LFS_O_RDONLY) => 0;
        unsigned char read_buf[1];
        lfs_file_read(&lfs, &files[0], read_buf, 1) => 1;
        lfs_file_close(&lfs, &files[0]) => 0;
    lfs_unmount(&lfs) => 0;
'''

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions