Re: [PATCH v10 8/9] xfs: Implement ->notify_failure() for XFS

2022-02-15 Thread Dan Williams
On Thu, Jan 27, 2022 at 4:41 AM Shiyang Ruan  wrote:
>
> Introduce xfs_notify_failure.c to handle failure related works, such as
> implement ->notify_failure(), register/unregister dax holder in xfs, and
> so on.
>
> If the rmap feature of XFS enabled, we can query it to find files and
> metadata which are associated with the corrupt data.  For now all we do
> is kill processes with that file mapped into their address spaces, but
> future patches could actually do something about corrupt metadata.
>
> After that, the memory failure needs to notify the processes who are
> using those files.
>
> Signed-off-by: Shiyang Ruan 
> ---
>  fs/xfs/Makefile |   1 +
>  fs/xfs/xfs_buf.c|  12 ++
>  fs/xfs/xfs_fsops.c  |   3 +
>  fs/xfs/xfs_mount.h  |   1 +
>  fs/xfs/xfs_notify_failure.c | 222 
>  fs/xfs/xfs_notify_failure.h |  10 ++
>  6 files changed, 249 insertions(+)
>  create mode 100644 fs/xfs/xfs_notify_failure.c
>  create mode 100644 fs/xfs/xfs_notify_failure.h
>
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 04611a1068b4..389970b3e13b 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -84,6 +84,7 @@ xfs-y += xfs_aops.o \
>xfs_message.o \
>xfs_mount.o \
>xfs_mru_cache.o \
> +  xfs_notify_failure.o \
>xfs_pwork.o \
>xfs_reflink.o \
>xfs_stats.o \
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index b45e0d50a405..017010b3d601 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -19,6 +19,7 @@
>  #include "xfs_errortag.h"
>  #include "xfs_error.h"
>  #include "xfs_ag.h"
> +#include "xfs_notify_failure.h"
>
>  static struct kmem_cache *xfs_buf_cache;
>
> @@ -1892,6 +1893,8 @@ xfs_free_buftarg(
> list_lru_destroy(>bt_lru);
>
> blkdev_issue_flush(btp->bt_bdev);
> +   if (btp->bt_daxdev)
> +   dax_unregister_holder(btp->bt_daxdev);
> fs_put_dax(btp->bt_daxdev);
>
> kmem_free(btp);
> @@ -1946,6 +1949,15 @@ xfs_alloc_buftarg(
> btp->bt_dev =  bdev->bd_dev;
> btp->bt_bdev = bdev;
> btp->bt_daxdev = fs_dax_get_by_bdev(bdev, >bt_dax_part_off);
> +   if (btp->bt_daxdev) {
> +   if (dax_get_holder(btp->bt_daxdev)) {
> +   xfs_err(mp, "DAX device already in use?!");

Per the earlier feedback this can be checked atomically inside of
dax_register_holder() with cmpxchg().

> +   goto error_free;
> +   }
> +
> +   dax_register_holder(btp->bt_daxdev, mp,
> +   _dax_holder_operations);
> +   }
>
> /*
>  * Buffer IO error rate limiting. Limit it to no more than 10 messages
> diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
> index 33e26690a8c4..d4d36c5bef11 100644
> --- a/fs/xfs/xfs_fsops.c
> +++ b/fs/xfs/xfs_fsops.c
> @@ -542,6 +542,9 @@ xfs_do_force_shutdown(
> } else if (flags & SHUTDOWN_CORRUPT_INCORE) {
> tag = XFS_PTAG_SHUTDOWN_CORRUPT;
> why = "Corruption of in-memory data";
> +   } else if (flags & SHUTDOWN_CORRUPT_ONDISK) {
> +   tag = XFS_PTAG_SHUTDOWN_CORRUPT;
> +   why = "Corruption of on-disk metadata";
> } else {
> tag = XFS_PTAG_SHUTDOWN_IOERROR;
> why = "Metadata I/O Error";
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 00720a02e761..47ff4ac53c4c 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -435,6 +435,7 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int 
> flags, char *fname,
>  #define SHUTDOWN_LOG_IO_ERROR  0x0002  /* write attempt to the log failed */
>  #define SHUTDOWN_FORCE_UMOUNT  0x0004  /* shutdown from a forced unmount */
>  #define SHUTDOWN_CORRUPT_INCORE0x0008  /* corrupt in-memory data 
> structures */
> +#define SHUTDOWN_CORRUPT_ONDISK0x0010  /* corrupt metadata on device 
> */
>
>  #define XFS_SHUTDOWN_STRINGS \
> { SHUTDOWN_META_IO_ERROR,   "metadata_io" }, \
> diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
> new file mode 100644
> index ..6abaa043f4bc
> --- /dev/null
> +++ b/fs/xfs/xfs_notify_failure.c
> @@ -0,0 +1,222 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2021 Fujitsu.  All Rights Reserved.
> + */
> +
> +#include "xfs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_alloc.h"
> +#include "xfs_bit.h"
> +#include "xfs_btree.h"
> +#include "xfs_inode.h"
> +#include "xfs_icache.h"
> +#include "xfs_rmap.h"
> +#include "xfs_rmap_btree.h"
> +#include "xfs_rtalloc.h"
> +#include 

Re: [PATCH v10 8/9] xfs: Implement ->notify_failure() for XFS

2022-02-01 Thread Darrick J. Wong
On Thu, Jan 27, 2022 at 08:40:57PM +0800, Shiyang Ruan wrote:
> Introduce xfs_notify_failure.c to handle failure related works, such as
> implement ->notify_failure(), register/unregister dax holder in xfs, and
> so on.
> 
> If the rmap feature of XFS enabled, we can query it to find files and
> metadata which are associated with the corrupt data.  For now all we do
> is kill processes with that file mapped into their address spaces, but
> future patches could actually do something about corrupt metadata.
> 
> After that, the memory failure needs to notify the processes who are
> using those files.
> 
> Signed-off-by: Shiyang Ruan 
> ---
>  fs/xfs/Makefile |   1 +
>  fs/xfs/xfs_buf.c|  12 ++
>  fs/xfs/xfs_fsops.c  |   3 +
>  fs/xfs/xfs_mount.h  |   1 +
>  fs/xfs/xfs_notify_failure.c | 222 
>  fs/xfs/xfs_notify_failure.h |  10 ++
>  6 files changed, 249 insertions(+)
>  create mode 100644 fs/xfs/xfs_notify_failure.c
>  create mode 100644 fs/xfs/xfs_notify_failure.h
> 
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 04611a1068b4..389970b3e13b 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -84,6 +84,7 @@ xfs-y   += xfs_aops.o \
>  xfs_message.o \
>  xfs_mount.o \
>  xfs_mru_cache.o \
> +xfs_notify_failure.o \
>  xfs_pwork.o \
>  xfs_reflink.o \
>  xfs_stats.o \
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index b45e0d50a405..017010b3d601 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -19,6 +19,7 @@
>  #include "xfs_errortag.h"
>  #include "xfs_error.h"
>  #include "xfs_ag.h"
> +#include "xfs_notify_failure.h"
>  
>  static struct kmem_cache *xfs_buf_cache;
>  
> @@ -1892,6 +1893,8 @@ xfs_free_buftarg(
>   list_lru_destroy(>bt_lru);
>  
>   blkdev_issue_flush(btp->bt_bdev);
> + if (btp->bt_daxdev)
> + dax_unregister_holder(btp->bt_daxdev);
>   fs_put_dax(btp->bt_daxdev);
>  
>   kmem_free(btp);
> @@ -1946,6 +1949,15 @@ xfs_alloc_buftarg(
>   btp->bt_dev =  bdev->bd_dev;
>   btp->bt_bdev = bdev;
>   btp->bt_daxdev = fs_dax_get_by_bdev(bdev, >bt_dax_part_off);
> + if (btp->bt_daxdev) {
> + if (dax_get_holder(btp->bt_daxdev)) {
> + xfs_err(mp, "DAX device already in use?!");
> + goto error_free;
> + }
> +
> + dax_register_holder(btp->bt_daxdev, mp,
> + _dax_holder_operations);

Um... is XFS required to take a lock here?  How do we prevent parallel
mounts of filesystems on two partitions from breaking each other?

> + }
>  
>   /*
>* Buffer IO error rate limiting. Limit it to no more than 10 messages
> diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
> index 33e26690a8c4..d4d36c5bef11 100644
> --- a/fs/xfs/xfs_fsops.c
> +++ b/fs/xfs/xfs_fsops.c
> @@ -542,6 +542,9 @@ xfs_do_force_shutdown(
>   } else if (flags & SHUTDOWN_CORRUPT_INCORE) {
>   tag = XFS_PTAG_SHUTDOWN_CORRUPT;
>   why = "Corruption of in-memory data";
> + } else if (flags & SHUTDOWN_CORRUPT_ONDISK) {
> + tag = XFS_PTAG_SHUTDOWN_CORRUPT;
> + why = "Corruption of on-disk metadata";
>   } else {
>   tag = XFS_PTAG_SHUTDOWN_IOERROR;
>   why = "Metadata I/O Error";
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 00720a02e761..47ff4ac53c4c 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -435,6 +435,7 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int 
> flags, char *fname,
>  #define SHUTDOWN_LOG_IO_ERROR0x0002  /* write attempt to the log 
> failed */
>  #define SHUTDOWN_FORCE_UMOUNT0x0004  /* shutdown from a forced 
> unmount */
>  #define SHUTDOWN_CORRUPT_INCORE  0x0008  /* corrupt in-memory data 
> structures */
> +#define SHUTDOWN_CORRUPT_ONDISK  0x0010  /* corrupt metadata on device */
>  
>  #define XFS_SHUTDOWN_STRINGS \
>   { SHUTDOWN_META_IO_ERROR,   "metadata_io" }, \
> diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
> new file mode 100644
> index ..6abaa043f4bc
> --- /dev/null
> +++ b/fs/xfs/xfs_notify_failure.c
> @@ -0,0 +1,222 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2021 Fujitsu.  All Rights Reserved.

2022?

> + */
> +
> +#include "xfs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_alloc.h"
> +#include "xfs_bit.h"
> +#include "xfs_btree.h"
> +#include "xfs_inode.h"
> +#include "xfs_icache.h"
> +#include "xfs_rmap.h"
> +#include "xfs_rmap_btree.h"
> +#include "xfs_rtalloc.h"
> 

Re: [PATCH v10 8/9] xfs: Implement ->notify_failure() for XFS

2022-01-27 Thread kernel test robot
Hi Shiyang,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linux/master]
[also build test ERROR on linus/master v5.17-rc1 next-20220127]
[cannot apply to xfs-linux/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20220127-204239
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
2c271fe77d52a0555161926c232cd5bc07178b39
config: ia64-defconfig 
(https://download.01.org/0day-ci/archive/20220128/202201280314.si8wtlft-...@intel.com/config)
compiler: ia64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/cb7650562991fc273fbf4c53b6e3db4bb9bb0b5e
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20220127-204239
git checkout cb7650562991fc273fbf4c53b6e3db4bb9bb0b5e
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross 
O=build_dir ARCH=ia64 SHELL=/bin/bash fs/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   In file included from fs/xfs/xfs_buf.h:14,
from fs/xfs/xfs_linux.h:80,
from fs/xfs/xfs.h:22,
from fs/xfs/xfs_buf.c:6:
   include/linux/dax.h:73:30: warning: 'struct dax_holder_operations' declared 
inside parameter list will not be visible outside of this definition or 
declaration
  73 | const struct dax_holder_operations *ops)
 |  ^
   fs/xfs/xfs_buf.c: In function 'xfs_alloc_buftarg':
>> fs/xfs/xfs_buf.c:1959:33: error: passing argument 3 of 'dax_register_holder' 
>> from incompatible pointer type [-Werror=incompatible-pointer-types]
1959 | _dax_holder_operations);
 | ^~
 | |
 | const struct dax_holder_operations *
   In file included from fs/xfs/xfs_buf.h:14,
from fs/xfs/xfs_linux.h:80,
from fs/xfs/xfs.h:22,
from fs/xfs/xfs_buf.c:6:
   include/linux/dax.h:73:53: note: expected 'const struct 
dax_holder_operations *' but argument is of type 'const struct 
dax_holder_operations *'
  73 | const struct dax_holder_operations *ops)
 | ^~~
   cc1: some warnings being treated as errors
--
   In file included from fs/xfs/xfs_buf.h:14,
from fs/xfs/xfs_linux.h:80,
from fs/xfs/xfs.h:22,
from fs/xfs/xfs_notify_failure.c:6:
   include/linux/dax.h:73:30: warning: 'struct dax_holder_operations' declared 
inside parameter list will not be visible outside of this definition or 
declaration
  73 | const struct dax_holder_operations *ops)
 |  ^
>> fs/xfs/xfs_notify_failure.c:220:14: error: variable 
>> 'xfs_dax_holder_operations' has initializer but incomplete type
 220 | const struct dax_holder_operations xfs_dax_holder_operations = {
 |  ^
>> fs/xfs/xfs_notify_failure.c:221:10: error: 'const struct 
>> dax_holder_operations' has no member named 'notify_failure'
 221 | .notify_failure = xfs_dax_notify_failure,
 |  ^~
   fs/xfs/xfs_notify_failure.c:221:35: warning: excess elements in struct 
initializer
 221 | .notify_failure = xfs_dax_notify_failure,
 |   ^~
   fs/xfs/xfs_notify_failure.c:221:35: note: (near initialization for 
'xfs_dax_holder_operations')
>> fs/xfs/xfs_notify_failure.c:220:36: error: storage size of 
>> 'xfs_dax_holder_operations' isn't known
 220 | const struct dax_holder_operations xfs_dax_holder_operations = {
 |^


vim +/dax_register_holder +1959 fs/xfs/xfs_buf.c

  1938  
  1939  struct xfs_buftarg *
  1940  xfs_alloc_buftarg(
  1941  struct xfs_mount*mp,
  1942  struct block_device *bdev)
  1943  {
  1944  xfs_buftarg_t   *btp;
  1945  
  1946  btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
  1947  
  1948  btp->bt_mount = 

Re: [PATCH v10 8/9] xfs: Implement ->notify_failure() for XFS

2022-01-27 Thread kernel test robot
Hi Shiyang,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linux/master]
[also build test WARNING on linus/master v5.17-rc1 next-20220127]
[cannot apply to xfs-linux/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20220127-204239
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
2c271fe77d52a0555161926c232cd5bc07178b39
config: ia64-defconfig 
(https://download.01.org/0day-ci/archive/20220128/202201280101.4ecaswmd-...@intel.com/config)
compiler: ia64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/0day-ci/linux/commit/cb7650562991fc273fbf4c53b6e3db4bb9bb0b5e
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Shiyang-Ruan/fsdax-introduce-fs-query-to-support-reflink/20220127-204239
git checkout cb7650562991fc273fbf4c53b6e3db4bb9bb0b5e
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross 
O=build_dir ARCH=ia64 SHELL=/bin/bash fs/xfs/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

   In file included from fs/xfs/xfs_buf.h:14,
from fs/xfs/xfs_linux.h:80,
from fs/xfs/xfs.h:22,
from fs/xfs/xfs_notify_failure.c:6:
   include/linux/dax.h:73:30: warning: 'struct dax_holder_operations' declared 
inside parameter list will not be visible outside of this definition or 
declaration
  73 | const struct dax_holder_operations *ops)
 |  ^
   fs/xfs/xfs_notify_failure.c:220:14: error: variable 
'xfs_dax_holder_operations' has initializer but incomplete type
 220 | const struct dax_holder_operations xfs_dax_holder_operations = {
 |  ^
   fs/xfs/xfs_notify_failure.c:221:10: error: 'const struct 
dax_holder_operations' has no member named 'notify_failure'
 221 | .notify_failure = xfs_dax_notify_failure,
 |  ^~
>> fs/xfs/xfs_notify_failure.c:221:35: warning: excess elements in struct 
>> initializer
 221 | .notify_failure = xfs_dax_notify_failure,
 |   ^~
   fs/xfs/xfs_notify_failure.c:221:35: note: (near initialization for 
'xfs_dax_holder_operations')
   fs/xfs/xfs_notify_failure.c:220:36: error: storage size of 
'xfs_dax_holder_operations' isn't known
 220 | const struct dax_holder_operations xfs_dax_holder_operations = {
 |^


vim +221 fs/xfs/xfs_notify_failure.c

   219  
   220  const struct dax_holder_operations xfs_dax_holder_operations = {
 > 221  .notify_failure = xfs_dax_notify_failure,

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org