Re: [RFC xfstests PATCH] xfstests: add a writeback error handling test

2017-04-24 Thread Christoph Hellwig
On Mon, Apr 24, 2017 at 09:45:51AM -0400, Jeff Layton wrote:
> With the patch series above, ext4 now passes. xfs and btrfs end up in
> r/o mode after the test. xfs returns -EIO at that point though, and
> btrfs returns -EROFS. What behavior we actually want there, I'm not
> certain. We might be able to mitigate that by putting the journals on a
> separate device?

This looks like XFS shut down because of a permanent write error from
dm-error.  Which seems like the expected behavior.


[RFC xfstests PATCH] xfstests: add a writeback error handling test

2017-04-24 Thread Jeff Layton
This is just an RFC set for now. I've numbered it 999 for the moment so
as not to collide with tests being added.

I'm working on a set of kernel patches to change how writeback errors
are handled and reported in the kernel. Instead of reporting a
writeback error to only the first fsync caller on the file, I aim
to make the kernel report them once on every file description:

https://lkml.org/lkml/2017/4/24/438

This patch adds a test for the new behavior. Basically, open many fds to
the same file, turn on dm_error, write to each of the fds, and then
fsync them all to ensure that they all get an error back. 

With the patch series above, ext4 now passes. xfs and btrfs end up in
r/o mode after the test. xfs returns -EIO at that point though, and
btrfs returns -EROFS. What behavior we actually want there, I'm not
certain. We might be able to mitigate that by putting the journals on a
separate device?

Signed-off-by: Jeff Layton 
---
 common/dmerror|  13 ---
 src/Makefile  |   2 +-
 src/fsync-err.c   | 102 ++
 tests/generic/999 |  74 
 tests/generic/999.out |   3 ++
 tests/generic/group   |   1 +
 tools/dmerror |  47 +++
 7 files changed, 236 insertions(+), 6 deletions(-)
 create mode 100644 src/fsync-err.c
 create mode 100755 tests/generic/999
 create mode 100644 tests/generic/999.out
 create mode 100755 tools/dmerror

diff --git a/common/dmerror b/common/dmerror
index d46c5d0b7266..238baa213b1f 100644
--- a/common/dmerror
+++ b/common/dmerror
@@ -23,22 +23,25 @@ if [ $? -eq 0 ]; then
_notrun "Cannot run tests with DAX on dmerror devices"
 fi
 
-_dmerror_init()
+_dmerror_setup()
 {
local dm_backing_dev=$SCRATCH_DEV
 
-   $DMSETUP_PROG remove error-test > /dev/null 2>&1
-
local blk_dev_size=`blockdev --getsz $dm_backing_dev`
 
DMERROR_DEV='/dev/mapper/error-test'
 
DMLINEAR_TABLE="0 $blk_dev_size linear $dm_backing_dev 0"
 
+   DMERROR_TABLE="0 $blk_dev_size error $dm_backing_dev 0"
+}
+
+_dmerror_init()
+{
+   _dmerror_setup
+   $DMSETUP_PROG remove error-test > /dev/null 2>&1
$DMSETUP_PROG create error-test --table "$DMLINEAR_TABLE" || \
_fatal "failed to create dm linear device"
-
-   DMERROR_TABLE="0 $blk_dev_size error $dm_backing_dev 0"
 }
 
 _dmerror_mount()
diff --git a/src/Makefile b/src/Makefile
index e62d7a9774d7..056a75b9f7bb 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -12,7 +12,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
godown resvtest writemod makeextents itrash rename \
multi_open_unlink dmiperf unwritten_sync genhashnames t_holes \
t_mmap_writev t_truncate_cmtime dirhash_collide t_rename_overwrite \
-   holetest t_truncate_self t_mmap_dio af_unix
+   holetest t_truncate_self t_mmap_dio af_unix fsync-err
 
 LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
diff --git a/src/fsync-err.c b/src/fsync-err.c
new file mode 100644
index ..8ebfd145bd70
--- /dev/null
+++ b/src/fsync-err.c
@@ -0,0 +1,102 @@
+/*
+ * fsync-err.c: test whether writeback errors are reported to all open fds
+ * Copyright (c) 2017: Jeff Layton 
+ *
+ * Open a file several times, write to it and then fsync. Flip dm_error over
+ * to make the backing device stop working. Overwrite the same section and
+ * call fsync on all fds and verify that we get errors on all of them. Then,
+ * fsync one more time on all of them and verify that they return 0.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define NUM_FDS10
+
+static void usage() {
+   fprintf(stderr, "Usage: fsync-err \n");
+}
+
+int main(int argc, char **argv)
+{
+   int fd[NUM_FDS], ret, i;
+   char *fname, *buf;
+
+   if (argc < 1) {
+   usage();
+   return 1;
+   }
+
+   /* First argument is filename */
+   fname = argv[1];
+
+   for (i = 0; i < NUM_FDS; ++i) {
+   fd[i] = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+   if (fd[i] < 0) {
+   printf("open of fd[%d] failed: %m\n", i);
+   return 1;
+   }
+   }
+
+   buf = "foobar";
+   for (i = 0; i < NUM_FDS; ++i) {
+   ret = write(fd[i], buf, strlen(buf) + 1);
+   if (ret < 0) {
+   printf("First write on fd[%d] failed: %m\n", i);
+   return 1;
+   }
+   }
+
+   for (i = 0; i < NUM_FDS; ++i) {
+   ret = fsync(fd[i]);
+   if (ret < 0) {
+   printf("First fsync on fd[%d] failed: %m\n", i);
+   return 1;
+   }
+   }
+
+   /* flip