On Mon, Jan 26, 2026 at 05:51:02AM +0100, Christoph Hellwig wrote:
> Use the kernel's resizable hash table (rhashtable) to find the
> fsverity_info.  This way file systems that want to support fsverity don't
> have to bloat every inode in the system with an extra pointer.  The
> trade-off is that looking up the fsverity_info is a bit more expensive
> now, but the main operations are still dominated by I/O and hashing
> overhead.
> 
> The rhashtable implementations requires no external synchronization, and
> the _fast versions of the APIs provide the RCU critical sections required
> by the implementation.  Because struct fsverity_info is only removed on
> inode eviction and does not contain a reference count, there is no need
> for an extended critical section to grab a reference or validate the
> object state.  The file open path uses rhashtable_lookup_get_insert_fast,
> which can either find an existing object for the hash key or insert a
> new one in a single atomic operation, so that concurrent opens never
> allocate duplicate fsverity_info structure.  FS_IOC_ENABLE_VERITY must
> already be synchronized by a combination of i_rwsem and file system flags
> and uses rhashtable_lookup_insert_fast, which errors out on an existing
> object for the hash key as an additional safety check.
> 
> Because insertion into the hash table now happens before S_VERITY is set,
> fsverity just becomes a barrier and a flag check and doesn't have to look
> up the fsverity_info at all, so there is only a single lookup per
> ->read_folio or ->readahead invocation.  For btrfs there is an additional
> one for each bio completion, while for ext4 and f2fs the fsverity_info
> is stored in the per-I/O context and reused for the completion workqueue.
> 
> Signed-off-by: Christoph Hellwig <[email protected]>

Looks good to me now; sorry about missing the smb_mb thing that ebiggers
caught.

Reviewed-by: "Darrick J. Wong" <[email protected]>

--D

> ---
>  fs/btrfs/btrfs_inode.h       |  4 --
>  fs/btrfs/inode.c             |  3 --
>  fs/btrfs/verity.c            |  2 -
>  fs/ext4/ext4.h               |  4 --
>  fs/ext4/super.c              |  3 --
>  fs/ext4/verity.c             |  2 -
>  fs/f2fs/f2fs.h               |  3 --
>  fs/f2fs/super.c              |  3 --
>  fs/f2fs/verity.c             |  2 -
>  fs/verity/enable.c           | 30 +++++++-----
>  fs/verity/fsverity_private.h | 17 +++----
>  fs/verity/open.c             | 75 +++++++++++++++++++-----------
>  fs/verity/verify.c           |  2 +-
>  include/linux/fsverity.h     | 90 ++++++++++++------------------------
>  14 files changed, 104 insertions(+), 136 deletions(-)
> 
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index 73602ee8de3f..55c272fe5d92 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -339,10 +339,6 @@ struct btrfs_inode {
>  
>       struct rw_semaphore i_mmap_lock;
>  
> -#ifdef CONFIG_FS_VERITY
> -     struct fsverity_info *i_verity_info;
> -#endif
> -
>       struct inode vfs_inode;
>  };
>  
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 67c64efc5099..93b2ce75fb06 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -8097,9 +8097,6 @@ static void init_once(void *foo)
>       struct btrfs_inode *ei = foo;
>  
>       inode_init_once(&ei->vfs_inode);
> -#ifdef CONFIG_FS_VERITY
> -     ei->i_verity_info = NULL;
> -#endif
>  }
>  
>  void __cold btrfs_destroy_cachep(void)
> diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
> index c152bef71e8b..cd96fac4739f 100644
> --- a/fs/btrfs/verity.c
> +++ b/fs/btrfs/verity.c
> @@ -795,8 +795,6 @@ static int btrfs_write_merkle_tree_block(struct file 
> *file, const void *buf,
>  }
>  
>  const struct fsverity_operations btrfs_verityops = {
> -     .inode_info_offs         = (int)offsetof(struct btrfs_inode, 
> i_verity_info) -
> -                                (int)offsetof(struct btrfs_inode, vfs_inode),
>       .begin_enable_verity     = btrfs_begin_enable_verity,
>       .end_enable_verity       = btrfs_end_enable_verity,
>       .get_verity_descriptor   = btrfs_get_verity_descriptor,
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 56112f201cac..60c549bc894e 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1205,10 +1205,6 @@ struct ext4_inode_info {
>  #ifdef CONFIG_FS_ENCRYPTION
>       struct fscrypt_inode_info *i_crypt_info;
>  #endif
> -
> -#ifdef CONFIG_FS_VERITY
> -     struct fsverity_info *i_verity_info;
> -#endif
>  };
>  
>  /*
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 86131f4d8718..1fb0c90c7a4b 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1484,9 +1484,6 @@ static void init_once(void *foo)
>  #ifdef CONFIG_FS_ENCRYPTION
>       ei->i_crypt_info = NULL;
>  #endif
> -#ifdef CONFIG_FS_VERITY
> -     ei->i_verity_info = NULL;
> -#endif
>  }
>  
>  static int __init init_inodecache(void)
> diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
> index 54ae4d4a176c..e3ab3ba8799b 100644
> --- a/fs/ext4/verity.c
> +++ b/fs/ext4/verity.c
> @@ -380,8 +380,6 @@ static int ext4_write_merkle_tree_block(struct file 
> *file, const void *buf,
>  }
>  
>  const struct fsverity_operations ext4_verityops = {
> -     .inode_info_offs        = (int)offsetof(struct ext4_inode_info, 
> i_verity_info) -
> -                               (int)offsetof(struct ext4_inode_info, 
> vfs_inode),
>       .begin_enable_verity    = ext4_begin_enable_verity,
>       .end_enable_verity      = ext4_end_enable_verity,
>       .get_verity_descriptor  = ext4_get_verity_descriptor,
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index f2fcadc7a6fe..8ee8a7bc012c 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -974,9 +974,6 @@ struct f2fs_inode_info {
>  #ifdef CONFIG_FS_ENCRYPTION
>       struct fscrypt_inode_info *i_crypt_info; /* filesystem encryption info 
> */
>  #endif
> -#ifdef CONFIG_FS_VERITY
> -     struct fsverity_info *i_verity_info; /* filesystem verity info */
> -#endif
>  };
>  
>  static inline void get_read_extent_info(struct extent_info *ext,
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index c4c225e09dc4..cd00d030edda 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -504,9 +504,6 @@ static void init_once(void *foo)
>  #ifdef CONFIG_FS_ENCRYPTION
>       fi->i_crypt_info = NULL;
>  #endif
> -#ifdef CONFIG_FS_VERITY
> -     fi->i_verity_info = NULL;
> -#endif
>  }
>  
>  #ifdef CONFIG_QUOTA
> diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
> index 628e8eafa96a..4f5230d871f7 100644
> --- a/fs/f2fs/verity.c
> +++ b/fs/f2fs/verity.c
> @@ -278,8 +278,6 @@ static int f2fs_write_merkle_tree_block(struct file 
> *file, const void *buf,
>  }
>  
>  const struct fsverity_operations f2fs_verityops = {
> -     .inode_info_offs        = (int)offsetof(struct f2fs_inode_info, 
> i_verity_info) -
> -                               (int)offsetof(struct f2fs_inode_info, 
> vfs_inode),
>       .begin_enable_verity    = f2fs_begin_enable_verity,
>       .end_enable_verity      = f2fs_end_enable_verity,
>       .get_verity_descriptor  = f2fs_get_verity_descriptor,
> diff --git a/fs/verity/enable.c b/fs/verity/enable.c
> index c56c18e2605b..94c88c419054 100644
> --- a/fs/verity/enable.c
> +++ b/fs/verity/enable.c
> @@ -265,9 +265,24 @@ static int enable_verity(struct file *filp,
>               goto rollback;
>       }
>  
> +     /*
> +      * Add the fsverity_info into the hash table before finishing the
> +      * initialization so that we don't have to undo the enabling when memory
> +      * allocation for the hash table fails.  This is safe because looking up
> +      * the fsverity_info always first checks the S_VERITY flag on the inode,
> +      * which will only be set at the very end of the ->end_enable_verity
> +      * method.
> +      */
> +     err = fsverity_set_info(vi);
> +     if (err)
> +             goto rollback;
> +
>       /*
>        * Tell the filesystem to finish enabling verity on the file.
> -      * Serialized with ->begin_enable_verity() by the inode lock.
> +      * Serialized with ->begin_enable_verity() by the inode lock.  The file
> +      * system needs to set the S_VERITY flag on the inode at the very end of
> +      * the method, at which point the fsverity information can be accessed
> +      * by other threads.
>        */
>       inode_lock(inode);
>       err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size);
> @@ -275,19 +290,10 @@ static int enable_verity(struct file *filp,
>       if (err) {
>               fsverity_err(inode, "%ps() failed with err %d",
>                            vops->end_enable_verity, err);
> -             fsverity_free_info(vi);
> +             fsverity_remove_info(vi);
>       } else if (WARN_ON_ONCE(!IS_VERITY(inode))) {
> +             fsverity_remove_info(vi);
>               err = -EINVAL;
> -             fsverity_free_info(vi);
> -     } else {
> -             /* Successfully enabled verity */
> -
> -             /*
> -              * Readers can start using the inode's verity info immediately,
> -              * so it can't be rolled back once set.  So don't set it until
> -              * just after the filesystem has successfully enabled verity.
> -              */
> -             fsverity_set_info(inode, vi);
>       }
>  out:
>       kfree(params.hashstate);
> diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
> index f9f3936b0a89..4d4a0a560562 100644
> --- a/fs/verity/fsverity_private.h
> +++ b/fs/verity/fsverity_private.h
> @@ -11,6 +11,7 @@
>  #define pr_fmt(fmt) "fs-verity: " fmt
>  
>  #include <linux/fsverity.h>
> +#include <linux/rhashtable.h>
>  
>  /*
>   * Implementation limit: maximum depth of the Merkle tree.  For now 8 is 
> plenty;
> @@ -63,13 +64,14 @@ struct merkle_tree_params {
>   * fsverity_info - cached verity metadata for an inode
>   *
>   * When a verity file is first opened, an instance of this struct is 
> allocated
> - * and a pointer to it is stored in the file's in-memory inode.  It remains
> - * until the inode is evicted.  It caches information about the Merkle tree
> - * that's needed to efficiently verify data read from the file.  It also 
> caches
> - * the file digest.  The Merkle tree pages themselves are not cached here, 
> but
> - * the filesystem may cache them.
> + * and a pointer to it is stored in the global hash table, indexed by the 
> inode
> + * pointer value.  It remains alive until the inode is evicted.  It caches
> + * information about the Merkle tree that's needed to efficiently verify data
> + * read from the file.  It also caches the file digest.  The Merkle tree 
> pages
> + * themselves are not cached here, but the filesystem may cache them.
>   */
>  struct fsverity_info {
> +     struct rhash_head rhash_head;
>       struct merkle_tree_params tree_params;
>       u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE];
>       u8 file_digest[FS_VERITY_MAX_DIGEST_SIZE];
> @@ -127,9 +129,8 @@ int fsverity_init_merkle_tree_params(struct 
> merkle_tree_params *params,
>  struct fsverity_info *fsverity_create_info(struct inode *inode,
>                                          struct fsverity_descriptor *desc);
>  
> -void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
> -
> -void fsverity_free_info(struct fsverity_info *vi);
> +int fsverity_set_info(struct fsverity_info *vi);
> +void fsverity_remove_info(struct fsverity_info *vi);
>  
>  int fsverity_get_descriptor(struct inode *inode,
>                           struct fsverity_descriptor **desc_ret);
> diff --git a/fs/verity/open.c b/fs/verity/open.c
> index 128502cf0a23..1bde8fe79b3f 100644
> --- a/fs/verity/open.c
> +++ b/fs/verity/open.c
> @@ -12,6 +12,14 @@
>  #include <linux/slab.h>
>  
>  static struct kmem_cache *fsverity_info_cachep;
> +static struct rhashtable fsverity_info_hash;
> +
> +static const struct rhashtable_params fsverity_info_hash_params = {
> +     .key_len                = sizeof_field(struct fsverity_info, inode),
> +     .key_offset             = offsetof(struct fsverity_info, inode),
> +     .head_offset            = offsetof(struct fsverity_info, rhash_head),
> +     .automatic_shrinking    = true,
> +};
>  
>  /**
>   * fsverity_init_merkle_tree_params() - initialize Merkle tree parameters
> @@ -170,6 +178,13 @@ static void compute_file_digest(const struct 
> fsverity_hash_alg *hash_alg,
>       desc->sig_size = sig_size;
>  }
>  
> +static void fsverity_free_info(struct fsverity_info *vi)
> +{
> +     kfree(vi->tree_params.hashstate);
> +     kvfree(vi->hash_block_verified);
> +     kmem_cache_free(fsverity_info_cachep, vi);
> +}
> +
>  /*
>   * Create a new fsverity_info from the given fsverity_descriptor (with 
> optional
>   * appended builtin signature), and check the signature if present.  The
> @@ -241,33 +256,18 @@ struct fsverity_info *fsverity_create_info(struct inode 
> *inode,
>       return ERR_PTR(err);
>  }
>  
> -void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
> +int fsverity_set_info(struct fsverity_info *vi)
>  {
> -     /*
> -      * Multiple tasks may race to set the inode's verity info pointer, so
> -      * use cmpxchg_release().  This pairs with the smp_load_acquire() in
> -      * fsverity_get_info().  I.e., publish the pointer with a RELEASE
> -      * barrier so that other tasks can ACQUIRE it.
> -      */
> -     if (cmpxchg_release(fsverity_info_addr(inode), NULL, vi) != NULL) {
> -             /* Lost the race, so free the verity info we allocated. */
> -             fsverity_free_info(vi);
> -             /*
> -              * Afterwards, the caller may access the inode's verity info
> -              * directly, so make sure to ACQUIRE the winning verity info.
> -              */
> -             (void)fsverity_get_info(inode);
> -     }
> +     return rhashtable_lookup_insert_fast(&fsverity_info_hash,
> +                     &vi->rhash_head, fsverity_info_hash_params);
>  }
>  
> -void fsverity_free_info(struct fsverity_info *vi)
> +struct fsverity_info *__fsverity_get_info(const struct inode *inode)
>  {
> -     if (!vi)
> -             return;
> -     kfree(vi->tree_params.hashstate);
> -     kvfree(vi->hash_block_verified);
> -     kmem_cache_free(fsverity_info_cachep, vi);
> +     return rhashtable_lookup_fast(&fsverity_info_hash, &inode,
> +                     fsverity_info_hash_params);
>  }
> +EXPORT_SYMBOL_GPL(__fsverity_get_info);
>  
>  static bool validate_fsverity_descriptor(struct inode *inode,
>                                        const struct fsverity_descriptor *desc,
> @@ -352,7 +352,7 @@ int fsverity_get_descriptor(struct inode *inode,
>  
>  static int ensure_verity_info(struct inode *inode)
>  {
> -     struct fsverity_info *vi = fsverity_get_info(inode);
> +     struct fsverity_info *vi = fsverity_get_info(inode), *found;
>       struct fsverity_descriptor *desc;
>       int err;
>  
> @@ -369,8 +369,18 @@ static int ensure_verity_info(struct inode *inode)
>               goto out_free_desc;
>       }
>  
> -     fsverity_set_info(inode, vi);
> -     err = 0;
> +     /*
> +      * Multiple tasks may race to set the inode's verity info, in which case
> +      * we might find an existing fsverity_info in the hash table.
> +      */
> +     found = rhashtable_lookup_get_insert_fast(&fsverity_info_hash,
> +                     &vi->rhash_head, fsverity_info_hash_params);
> +     if (found) {
> +             fsverity_free_info(vi);
> +             if (IS_ERR(found))
> +                     err = PTR_ERR(found);
> +     }
> +
>  out_free_desc:
>       kfree(desc);
>       return err;
> @@ -384,16 +394,25 @@ int __fsverity_file_open(struct inode *inode, struct 
> file *filp)
>  }
>  EXPORT_SYMBOL_GPL(__fsverity_file_open);
>  
> +void fsverity_remove_info(struct fsverity_info *vi)
> +{
> +     rhashtable_remove_fast(&fsverity_info_hash, &vi->rhash_head,
> +                     fsverity_info_hash_params);
> +     fsverity_free_info(vi);
> +}
> +
>  void fsverity_cleanup_inode(struct inode *inode)
>  {
> -     struct fsverity_info **vi_addr = fsverity_info_addr(inode);
> +     struct fsverity_info *vi = fsverity_get_info(inode);
>  
> -     fsverity_free_info(*vi_addr);
> -     *vi_addr = NULL;
> +     if (vi)
> +             fsverity_remove_info(vi);
>  }
>  
>  void __init fsverity_init_info_cache(void)
>  {
> +     if (rhashtable_init(&fsverity_info_hash, &fsverity_info_hash_params))
> +             panic("failed to initialize fsverity hash\n");
>       fsverity_info_cachep = KMEM_CACHE_USERCOPY(
>                                       fsverity_info,
>                                       SLAB_RECLAIM_ACCOUNT | SLAB_PANIC,
> diff --git a/fs/verity/verify.c b/fs/verity/verify.c
> index 57aea5a2a0ee..75b997a936c7 100644
> --- a/fs/verity/verify.c
> +++ b/fs/verity/verify.c
> @@ -320,7 +320,7 @@ fsverity_init_verification_context(struct 
> fsverity_verification_context *ctx,
>       ctx->inode = vi->inode;
>       ctx->vi = vi;
>       ctx->num_pending = 0;
> -     if (vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 &&
> +     if (ctx->vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 &&
>           sha256_finup_2x_is_optimized())
>               ctx->max_pending = 2;
>       else
> diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> index 1d70b270e90a..c8958971f65a 100644
> --- a/include/linux/fsverity.h
> +++ b/include/linux/fsverity.h
> @@ -30,13 +30,6 @@ struct fsverity_info;
>  
>  /* Verity operations for filesystems */
>  struct fsverity_operations {
> -     /**
> -      * The offset of the pointer to struct fsverity_info in the
> -      * filesystem-specific part of the inode, relative to the beginning of
> -      * the common part of the inode (the 'struct inode').
> -      */
> -     ptrdiff_t inode_info_offs;
> -
>       /**
>        * Begin enabling verity on the given file.
>        *
> @@ -142,40 +135,6 @@ struct fsverity_operations {
>  };
>  
>  #ifdef CONFIG_FS_VERITY
> -
> -/*
> - * Returns the address of the verity info pointer within the 
> filesystem-specific
> - * part of the inode.  (To save memory on filesystems that don't support
> - * fsverity, a field in 'struct inode' itself is no longer used.)
> - */
> -static inline struct fsverity_info **
> -fsverity_info_addr(const struct inode *inode)
> -{
> -     VFS_WARN_ON_ONCE(inode->i_sb->s_vop->inode_info_offs == 0);
> -     return (void *)inode + inode->i_sb->s_vop->inode_info_offs;
> -}
> -
> -static inline struct fsverity_info *fsverity_get_info(const struct inode 
> *inode)
> -{
> -     /*
> -      * Since this function can be called on inodes belonging to filesystems
> -      * that don't support fsverity at all, and fsverity_info_addr() doesn't
> -      * work on such filesystems, we have to start with an IS_VERITY() check.
> -      * Checking IS_VERITY() here is also useful to minimize the overhead of
> -      * fsverity_active() on non-verity files.
> -      */
> -     if (!IS_VERITY(inode))
> -             return NULL;
> -
> -     /*
> -      * Pairs with the cmpxchg_release() in fsverity_set_info().  I.e.,
> -      * another task may publish the inode's verity info concurrently,
> -      * executing a RELEASE barrier.  Use smp_load_acquire() here to safely
> -      * ACQUIRE the memory the other task published.
> -      */
> -     return smp_load_acquire(fsverity_info_addr(inode));
> -}
> -
>  /* enable.c */
>  
>  int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
> @@ -204,18 +163,6 @@ void fsverity_enqueue_verify_work(struct work_struct 
> *work);
>  
>  #else /* !CONFIG_FS_VERITY */
>  
> -/*
> - * Provide a stub to allow code using this to compile.  All callsites should 
> be
> - * guarded by compiler dead code elimination, and this forces a link error if
> - * not.
> - */
> -struct fsverity_info **fsverity_info_addr(const struct inode *inode);
> -
> -static inline struct fsverity_info *fsverity_get_info(const struct inode 
> *inode)
> -{
> -     return NULL;
> -}
> -
>  /* enable.c */
>  
>  static inline int fsverity_ioctl_enable(struct file *filp,
> @@ -296,18 +243,39 @@ static inline bool fsverity_verify_page(struct 
> fsverity_info *vi,
>   * fsverity_active() - do reads from the inode need to go through fs-verity?
>   * @inode: inode to check
>   *
> - * This checks whether the inode's verity info has been set.
> - *
> - * Filesystems call this from ->readahead() to check whether the pages need 
> to
> - * be verified or not.  Don't use IS_VERITY() for this purpose; it's subject 
> to
> - * a race condition where the file is being read concurrently with
> - * FS_IOC_ENABLE_VERITY completing.  (S_VERITY is set before the verity 
> info.)
> + * This checks whether the inode's verity info has been set, and reads need
> + * to verify the verity information.
>   *
>   * Return: true if reads need to go through fs-verity, otherwise false
>   */
> -static inline bool fsverity_active(const struct inode *inode)
> +static __always_inline bool fsverity_active(const struct inode *inode)
> +{
> +     if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) {
> +             /*
> +              * This pairs with the try_cmpxchg in set_mask_bits()
> +              * used to set the S_VERITY bit in i_flags.
> +              */
> +             smp_mb();
> +             return true;
> +     }
> +
> +     return false;
> +}
> +
> +/**
> + * fsverity_get_info - get fsverity information for an inode
> + * @inode: inode to operate on.
> + *
> + * This gets the fsverity_info for @inode if it exists.  Safe to call without
> + * knowin that a fsverity_info exist for @inode, including on file systems 
> that
> + * do not support fsverity.
> + */
> +struct fsverity_info *__fsverity_get_info(const struct inode *inode);
> +static inline struct fsverity_info *fsverity_get_info(const struct inode 
> *inode)
>  {
> -     return fsverity_get_info(inode) != NULL;
> +     if (!fsverity_active(inode))
> +             return NULL;
> +     return __fsverity_get_info(inode);
>  }
>  
>  /**
> -- 
> 2.47.3
> 
> 


_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to