> However if admins run a command such as sync or fsfreeze along side,
> fsync/fdatasync may return success even if writeback has failed.
> That could lead to data corruption.

For reproducing the problem, compile the attached C program (iogen.c)
and run with 'runtest.sh' script in the next mail:
  # gcc -o iogen iogen.c
  # bash ./runtest.sh

"iogen" does write(), fsync() and checks if on-disk data is same
as application's buffer after successful fsync.
"runtest.sh" injects failure for the file being written by "iogen".
(You need to enable CONFIG_HWPOISON_INJECT=m for the memory error
 injection to work.)

Without the patch, fsync returns success even though data is not on
disk.

  TEST: ext4 / ioerr / sync-command
  (iogen): inject
  (admin): Injecting I/O error
  (admin): Calling sync(2)
  (iogen): remove
  FAIL: corruption!
  DIFF 00000200: de de de de de de de de  | 00 00 00 00 00 00 00 00
  ...

With the patch, fsync detects error correctly.

  TEST: ext4 / ioerr / sync-command
  (iogen): inject
  (admin): Injecting I/O error
  (admin): Calling sync(2)
  INFO: App fsync: Input/output error
  (iogen): remove
  PASS: detected error right
  (iogen): end

-- cut here --
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

unsigned char *app_buf;
unsigned char *ondisk_data;
char *testfile;
size_t buflen;
int fd;
int rfd;

void dumpdiff(unsigned char *buf1, unsigned char *buf2, int len)
{
        int i, j;
        for(i = 0; i < len; i += 8) {
                if (!memcmp(&buf1[i], &buf2[i], 8))
                        continue;
                fprintf(stderr, "DIFF %08x: ", i);
                for(j = 0; j < 8 && i + j < len; j++)
                        fprintf(stderr, "%02x ", buf1[i]);
                fprintf(stderr, " | ");
                for(j = 0; j < 8 && i + j < len; j++)
                        fprintf(stderr, "%02x ", buf2[i]);
                fprintf(stderr, "\n");
        }
}

void notify_injector(char *str)
{
        if (str)
                fprintf(stderr, "(iogen): %s\n", str);
        write(1, "\n", 2);
        sleep(1);
}

void open_fds(void)
{
        fd = open(testfile, O_RDWR);
        if (fd < 0) {
                perror("????: App open");
                exit(1);
        }
        rfd = open(testfile, O_RDONLY|O_DIRECT); /* for verification */
        if (rfd < 0) {
                perror("????: App open rfd");
                exit(1);
        }
}

void init_fd_status(void)
{
        int r;

        r = fsync(fd); /* flush and clean */
        if (r) {
                perror("????: App fsync0");
                exit(1);
        }
        r = pread(fd, app_buf, buflen, 0); /* stage onto cache */
        if (r != buflen) {
                perror("????: App read1");
                exit(1);
        }
}

void close_fds(void)
{
        int r;

        r = close(rfd);
        if (r)
                perror("????: App close read fd");
        r = close(fd);
        if (r)
                perror("????: App close write fd");
}

void write_data(int cnt)
{
        int r;

        memset(app_buf, cnt, buflen);
        r = pwrite(fd, app_buf, buflen, 0);
        if (r != buflen)
                perror("????: App write1");
}

int sync_data(void)
{
        int r, r2;

        r = fsync(fd);
        if (r)
                perror("INFO: App fsync");
        r2 = fsync(fd);
        if (r2)
                perror("????: App fsync (redo)");

        return r;
}

void read_data_direct(void)
{
        int r;

        r = pread(rfd, ondisk_data, buflen, 0);
        if (r != buflen) {
                perror("????: App direct read");
                r = pread(rfd, ondisk_data, buflen, 0);
                if (r != buflen)
                        perror("FAIL: App direct read (retry)");
        }
}

void check_data(int fsync_result)
{
        int r;

        r = memcmp(app_buf, ondisk_data, buflen);
        if (r) {
                /* data is different */
                if (fsync_result == 0) {
                        fprintf(stderr, "FAIL: corruption!\n");
                        dumpdiff(app_buf, ondisk_data, buflen);
                } else
                        fprintf(stderr, "PASS: detected error right\n");
        } else {
                /* data is same */
                if (fsync_result == 0)
                        fprintf(stderr, "PASS: no error, data is ok\n");
                else
                        fprintf(stderr, "????: sync failed, data is ok\n");
        }
}

void cleanup_data(int cnt)
{
        int r;

        /* write-fsync-read without error injection */
        memset(app_buf, cnt, buflen);
        r = pwrite(fd, app_buf, buflen, 0);
        if (r != buflen)
                perror("BUG : App write (w/o failure)");
        r = fsync(fd);
        if (r)
                perror("BUG : App fsync (w/o failure)");
        r = pread(rfd, ondisk_data, buflen, 0);
        if (r != buflen)
                perror("BUG : App read (w/o failure)");
        r = memcmp(app_buf, ondisk_data, buflen);
        if (r)
                fprintf(stderr, "BUG : memcmp failed\n");
}

/*
 * Do this:
 *   1) write
 *   2) inject failure
 *   3) fsync (should return error)
 *   4) remove failure
 *   5) check on-disk data (using direct read)
 */
void runtest(void)
{
        int fsync_result;

        notify_injector("start");
        open_fds();
        init_fd_status();

        write_data(0xde);
        notify_injector("inject");
        fsync_result = sync_data();
        notify_injector("remove");

        /* re-read and compare */
        read_data_direct();
        check_data(fsync_result);

        cleanup_data(0);
        close_fds();
        notify_injector("end");
}

int main(int argc, char **argv)
{
        testfile = argv[1];
        buflen = atoi(argv[2]);

        app_buf = malloc(buflen);
        if (!app_buf)
                exit(1);
        if (posix_memalign((void **) &ondisk_data, 4096, buflen))
                exit(1);

        runtest();

        free(app_buf);
        free(ondisk_data);

        return 0;
}--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to