[PATCH v2 89/89] fs: move i_generation into new hole created after timestamp conversion

2023-10-04 Thread Jeff Layton
The recent change to use discrete integers instead of struct timespec64
shaved 8 bytes off of struct inode, but it also moves the i_lock
into the previous cacheline, away from the fields that it protects.

Move i_generation above the i_lock, which moves the new 4 byte hole to
just after the i_fsnotify_mask in my setup.

Suggested-by: Amir Goldstein 
Signed-off-by: Jeff Layton 
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 485b5e21c8e5..686c9f33e725 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -677,6 +677,7 @@ struct inode {
u32 i_atime_nsec;
u32 i_mtime_nsec;
u32 i_ctime_nsec;
+   u32 i_generation;
spinlock_t  i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short  i_bytes;
u8  i_blkbits;
@@ -733,7 +734,6 @@ struct inode {
unsignedi_dir_seq;
};
 
-   __u32   i_generation;
 
 #ifdef CONFIG_FSNOTIFY
__u32   i_fsnotify_mask; /* all events this inode cares 
about */
-- 
2.41.0



[PATCH v2 88/89] fs: switch timespec64 fields in inode to discrete integers

2023-10-04 Thread Jeff Layton
This shaves 8 bytes off struct inode with a garden-variety Fedora
Kconfig.

Signed-off-by: Jeff Layton 
---
 include/linux/fs.h | 53 ++
 1 file changed, 30 insertions(+), 23 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 84fdaf399fbe..485b5e21c8e5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -671,9 +671,12 @@ struct inode {
};
dev_t   i_rdev;
loff_t  i_size;
-   struct timespec64   __i_atime;
-   struct timespec64   __i_mtime;
-   struct timespec64   __i_ctime; /* use inode_*_ctime accessors! */
+   time64_ti_atime_sec;
+   time64_ti_mtime_sec;
+   time64_ti_ctime_sec;
+   u32 i_atime_nsec;
+   u32 i_mtime_nsec;
+   u32 i_ctime_nsec;
spinlock_t  i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short  i_bytes;
u8  i_blkbits;
@@ -1517,23 +1520,27 @@ struct timespec64 inode_set_ctime_current(struct inode 
*inode);
 
 static inline time64_t inode_get_atime_sec(const struct inode *inode)
 {
-   return inode->__i_atime.tv_sec;
+   return inode->i_atime_sec;
 }
 
 static inline long inode_get_atime_nsec(const struct inode *inode)
 {
-   return inode->__i_atime.tv_nsec;
+   return inode->i_atime_nsec;
 }
 
 static inline struct timespec64 inode_get_atime(const struct inode *inode)
 {
-   return inode->__i_atime;
+   struct timespec64 ts = { .tv_sec  = inode_get_atime_sec(inode),
+.tv_nsec = inode_get_atime_nsec(inode) };
+
+   return ts;
 }
 
 static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->__i_atime = ts;
+   inode->i_atime_sec = ts.tv_sec;
+   inode->i_atime_nsec = ts.tv_nsec;
return ts;
 }
 
@@ -1542,28 +1549,32 @@ static inline struct timespec64 inode_set_atime(struct 
inode *inode,
 {
struct timespec64 ts = { .tv_sec  = sec,
 .tv_nsec = nsec };
+
return inode_set_atime_to_ts(inode, ts);
 }
 
 static inline time64_t inode_get_mtime_sec(const struct inode *inode)
 {
-   return inode->__i_mtime.tv_sec;
+   return inode->i_mtime_sec;
 }
 
 static inline long inode_get_mtime_nsec(const struct inode *inode)
 {
-   return inode->__i_mtime.tv_nsec;
+   return inode->i_mtime_nsec;
 }
 
 static inline struct timespec64 inode_get_mtime(const struct inode *inode)
 {
-   return inode->__i_mtime;
+   struct timespec64 ts = { .tv_sec  = inode_get_mtime_sec(inode),
+.tv_nsec = inode_get_mtime_nsec(inode) };
+   return ts;
 }
 
 static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->__i_mtime = ts;
+   inode->i_mtime_sec = ts.tv_sec;
+   inode->i_mtime_nsec = ts.tv_nsec;
return ts;
 }
 
@@ -1577,34 +1588,30 @@ static inline struct timespec64 inode_set_mtime(struct 
inode *inode,
 
 static inline time64_t inode_get_ctime_sec(const struct inode *inode)
 {
-   return inode->__i_ctime.tv_sec;
+   return inode->i_ctime_sec;
 }
 
 static inline long inode_get_ctime_nsec(const struct inode *inode)
 {
-   return inode->__i_ctime.tv_nsec;
+   return inode->i_ctime_nsec;
 }
 
 static inline struct timespec64 inode_get_ctime(const struct inode *inode)
 {
-   return inode->__i_ctime;
+   struct timespec64 ts = { .tv_sec  = inode_get_ctime_sec(inode),
+.tv_nsec = inode_get_ctime_nsec(inode) };
+
+   return ts;
 }
 
 static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->__i_ctime = ts;
+   inode->i_ctime_sec = ts.tv_sec;
+   inode->i_ctime_nsec = ts.tv_nsec;
return ts;
 }
 
-/**
- * inode_set_ctime - set the ctime in the inode
- * @inode: inode in which to set the ctime
- * @sec: tv_sec value to set
- * @nsec: tv_nsec value to set
- *
- * Set the ctime in @inode to { @sec, @nsec }
- */
 static inline struct timespec64 inode_set_ctime(struct inode *inode,
time64_t sec, long nsec)
 {
-- 
2.41.0



[PATCH v2 87/89] fs: rename inode i_atime and i_mtime fields

2023-10-04 Thread Jeff Layton
Rename these two fields to discourage direct access (and to help ensure
that we mop up any leftover direct accesses).

Signed-off-by: Jeff Layton 
---
 include/linux/fs.h | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3ca610d42176..84fdaf399fbe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -671,8 +671,8 @@ struct inode {
};
dev_t   i_rdev;
loff_t  i_size;
-   struct timespec64   i_atime;
-   struct timespec64   i_mtime;
+   struct timespec64   __i_atime;
+   struct timespec64   __i_mtime;
struct timespec64   __i_ctime; /* use inode_*_ctime accessors! */
spinlock_t  i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short  i_bytes;
@@ -1517,23 +1517,23 @@ struct timespec64 inode_set_ctime_current(struct inode 
*inode);
 
 static inline time64_t inode_get_atime_sec(const struct inode *inode)
 {
-   return inode->i_atime.tv_sec;
+   return inode->__i_atime.tv_sec;
 }
 
 static inline long inode_get_atime_nsec(const struct inode *inode)
 {
-   return inode->i_atime.tv_nsec;
+   return inode->__i_atime.tv_nsec;
 }
 
 static inline struct timespec64 inode_get_atime(const struct inode *inode)
 {
-   return inode->i_atime;
+   return inode->__i_atime;
 }
 
 static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->i_atime = ts;
+   inode->__i_atime = ts;
return ts;
 }
 
@@ -1547,23 +1547,23 @@ static inline struct timespec64 inode_set_atime(struct 
inode *inode,
 
 static inline time64_t inode_get_mtime_sec(const struct inode *inode)
 {
-   return inode->i_mtime.tv_sec;
+   return inode->__i_mtime.tv_sec;
 }
 
 static inline long inode_get_mtime_nsec(const struct inode *inode)
 {
-   return inode->i_mtime.tv_nsec;
+   return inode->__i_mtime.tv_nsec;
 }
 
 static inline struct timespec64 inode_get_mtime(const struct inode *inode)
 {
-   return inode->i_mtime;
+   return inode->__i_mtime;
 }
 
 static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->i_mtime = ts;
+   inode->__i_mtime = ts;
return ts;
 }
 
-- 
2.41.0



[PATCH v2 02/89] fs: convert core infrastructure to new timestamp accessors

2023-10-04 Thread Jeff Layton
Convert the core vfs code to use the new timestamp accessor functions.

Signed-off-by: Jeff Layton 
---
 fs/attr.c|  4 ++--
 fs/bad_inode.c   |  2 +-
 fs/binfmt_misc.c |  2 +-
 fs/inode.c   | 35 +--
 fs/nsfs.c|  2 +-
 fs/pipe.c|  2 +-
 fs/stack.c   |  4 ++--
 fs/stat.c|  4 ++--
 8 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/fs/attr.c b/fs/attr.c
index a8ae5f6d9b16..bdf5deb06ea9 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -308,9 +308,9 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode 
*inode,
i_uid_update(idmap, attr, inode);
i_gid_update(idmap, attr, inode);
if (ia_valid & ATTR_ATIME)
-   inode->i_atime = attr->ia_atime;
+   inode_set_atime_to_ts(inode, attr->ia_atime);
if (ia_valid & ATTR_MTIME)
-   inode->i_mtime = attr->ia_mtime;
+   inode_set_mtime_to_ts(inode, attr->ia_mtime);
if (ia_valid & ATTR_CTIME)
inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (ia_valid & ATTR_MODE) {
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 83f9566c973b..316d88da2ce1 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -208,7 +208,7 @@ void make_bad_inode(struct inode *inode)
remove_inode_hash(inode);
 
inode->i_mode = S_IFREG;
-   inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+   simple_inode_init_ts(inode);
inode->i_op = _inode_ops;   
inode->i_opflags &= ~IOP_XATTR;
inode->i_fop = _file_ops;   
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e0108d17b085..5d2be9b0a0a5 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -547,7 +547,7 @@ static struct inode *bm_get_inode(struct super_block *sb, 
int mode)
if (inode) {
inode->i_ino = get_next_ino();
inode->i_mode = mode;
-   inode->i_atime = inode->i_mtime = 
inode_set_ctime_current(inode);
+   simple_inode_init_ts(inode);
}
return inode;
 }
diff --git a/fs/inode.c b/fs/inode.c
index 3bb6193f436c..4f8984b97df0 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1837,27 +1837,29 @@ EXPORT_SYMBOL(bmap);
 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
 struct timespec64 now)
 {
-   struct timespec64 ctime;
+   struct timespec64 atime, mtime, ctime;
 
if (!(mnt->mnt_flags & MNT_RELATIME))
return 1;
/*
 * Is mtime younger than or equal to atime? If yes, update atime:
 */
-   if (timespec64_compare(>i_mtime, >i_atime) >= 0)
+   atime = inode_get_atime(inode);
+   mtime = inode_get_mtime(inode);
+   if (timespec64_compare(, ) >= 0)
return 1;
/*
 * Is ctime younger than or equal to atime? If yes, update atime:
 */
ctime = inode_get_ctime(inode);
-   if (timespec64_compare(, >i_atime) >= 0)
+   if (timespec64_compare(, ) >= 0)
return 1;
 
/*
 * Is the previous atime value older than a day? If yes,
 * update atime:
 */
-   if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
+   if ((long)(now.tv_sec - atime.tv_sec) >= 24*60*60)
return 1;
/*
 * Good, we can skip the atime update:
@@ -1888,12 +1890,13 @@ int inode_update_timestamps(struct inode *inode, int 
flags)
 
if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
struct timespec64 ctime = inode_get_ctime(inode);
+   struct timespec64 mtime = inode_get_mtime(inode);
 
now = inode_set_ctime_current(inode);
if (!timespec64_equal(, ))
updated |= S_CTIME;
-   if (!timespec64_equal(, >i_mtime)) {
-   inode->i_mtime = now;
+   if (!timespec64_equal(, )) {
+   inode_set_mtime_to_ts(inode, now);
updated |= S_MTIME;
}
if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, 
updated))
@@ -1903,8 +1906,10 @@ int inode_update_timestamps(struct inode *inode, int 
flags)
}
 
if (flags & S_ATIME) {
-   if (!timespec64_equal(, >i_atime)) {
-   inode->i_atime = now;
+   struct timespec64 atime = inode_get_atime(inode);
+
+   if (!timespec64_equal(, )) {
+   inode_set_atime_to_ts(inode, now);
updated |= S_ATIME;
}
}
@@ -1963,7 +1968,7 @@ EXPORT_SYMBOL(inode_update_time);
 bool atime_needs_update(const struct path *path, struct inode *inode)
 {
struct vfsmount *mnt = path->mnt;
-   struct timespec64 now;
+   struct t

[PATCH v2 01/89] fs: new accessor methods for atime and mtime

2023-10-04 Thread Jeff Layton
Recently, we converted the ctime accesses in the kernel to use new
accessor functions. Linus recently pointed out though that if we add
accessors for the atime and mtime, then that would allow us to
seamlessly change how these timestamps are stored in the inode.

Add new accessor functions for the atime and mtime that mirror the
accessors for the ctime.

Signed-off-by: Jeff Layton 
---
 fs/libfs.c | 41 --
 include/linux/fs.h | 85 +++---
 2 files changed, 102 insertions(+), 24 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 37f2d34ee090..abe2b5a40ba1 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -541,7 +541,8 @@ void simple_recursive_removal(struct dentry *dentry,
dput(victim);   // unpin it
}
if (victim == dentry) {
-   inode->i_mtime = inode_set_ctime_current(inode);
+   inode_set_mtime_to_ts(inode,
+ 
inode_set_ctime_current(inode));
if (d_is_dir(dentry))
drop_nlink(inode);
inode_unlock(inode);
@@ -582,7 +583,7 @@ static int pseudo_fs_fill_super(struct super_block *s, 
struct fs_context *fc)
 */
root->i_ino = 1;
root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
-   root->i_atime = root->i_mtime = inode_set_ctime_current(root);
+   simple_inode_init_ts(root);
s->s_root = d_make_root(root);
if (!s->s_root)
return -ENOMEM;
@@ -638,8 +639,8 @@ int simple_link(struct dentry *old_dentry, struct inode 
*dir, struct dentry *den
 {
struct inode *inode = d_inode(old_dentry);
 
-   dir->i_mtime = inode_set_ctime_to_ts(dir,
-inode_set_ctime_current(inode));
+   inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, 
inode_set_ctime_current(inode)));
inc_nlink(inode);
ihold(inode);
dget(dentry);
@@ -673,8 +674,8 @@ int simple_unlink(struct inode *dir, struct dentry *dentry)
 {
struct inode *inode = d_inode(dentry);
 
-   dir->i_mtime = inode_set_ctime_to_ts(dir,
-inode_set_ctime_current(inode));
+   inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, 
inode_set_ctime_current(inode)));
drop_nlink(inode);
dput(dentry);
return 0;
@@ -709,9 +710,10 @@ void simple_rename_timestamp(struct inode *old_dir, struct 
dentry *old_dentry,
 {
struct inode *newino = d_inode(new_dentry);
 
-   old_dir->i_mtime = inode_set_ctime_current(old_dir);
+   inode_set_mtime_to_ts(old_dir, inode_set_ctime_current(old_dir));
if (new_dir != old_dir)
-   new_dir->i_mtime = inode_set_ctime_current(new_dir);
+   inode_set_mtime_to_ts(new_dir,
+ inode_set_ctime_current(new_dir));
inode_set_ctime_current(d_inode(old_dentry));
if (newino)
inode_set_ctime_current(newino);
@@ -926,7 +928,7 @@ int simple_fill_super(struct super_block *s, unsigned long 
magic,
 */
inode->i_ino = 1;
inode->i_mode = S_IFDIR | 0755;
-   inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+   simple_inode_init_ts(inode);
inode->i_op = _dir_inode_operations;
inode->i_fop = _dir_operations;
set_nlink(inode, 2);
@@ -952,7 +954,7 @@ int simple_fill_super(struct super_block *s, unsigned long 
magic,
goto out;
}
inode->i_mode = S_IFREG | files->mode;
-   inode->i_atime = inode->i_mtime = 
inode_set_ctime_current(inode);
+   simple_inode_init_ts(inode);
inode->i_fop = files->ops;
inode->i_ino = i;
d_add(dentry, inode);
@@ -1520,7 +1522,7 @@ struct inode *alloc_anon_inode(struct super_block *s)
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE;
-   inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+   simple_inode_init_ts(inode);
return inode;
 }
 EXPORT_SYMBOL(alloc_anon_inode);
@@ -1912,3 +1914,20 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct 
iov_iter *iter,
return direct_written + buffered_written;
 }
 EXPORT_SYMBOL_GPL(direct_write_fallback);
+
+/**
+ * simple_inode_init_ts - initialize the timestamps for a new inode
+ * @inode: inode to be initialized
+ *
+ * When a new inode is created, most filesystems set the timestamps to the
+ * current time. Add a helper to do this.
+ */
+struct timespec64 simple_inode_init_

[PATCH v2 00/89] fs: new accessor methods for inode atime and mtime

2023-10-04 Thread Jeff Layton
v2:
- bugfix in mtime handling
- incorporate _sec and _nsec accessor functions (Chuck Lever)
- move i_generation to plug hole after changing timestamps (Amir Goldstein)

While working on the multigrain timestamp changes, Linus suggested
adding some similar wrappers for accessing the atime and mtime that we
have for the ctime. With that, we could then move to using discrete
integers instead of struct timespec64 in struct inode and shrink it.

This patch implements this. Linus suggested using macros for the new
accessors, but the existing ctime wrappers were static inlines and since
there are only 3 different timestamps, I didn't see that trying to
fiddle with macros would gain us anything (other than less verbosity in
fs.h).

The second to last patch makes the conversion to discrete integers,
which shaves 8 bytes off of struct inode on my x86_64 kernel. The last
patch reshuffles things a little to keep the i_lock in the same
cacheline as the fields it protects.

About 75% of this conversion was done with coccinelle, with the rest
done by hand.

I think we probably ought to try to get everything but the last two
patches into v6.7 (though we could consider those too if we're feeling
lucky).

Jeff Layton (89):
  fs: new accessor methods for atime and mtime
  fs: convert core infrastructure to new timestamp accessors
  spufs: convert to new timestamp accessors
  hypfs: convert to new timestamp accessors
  android: convert to new timestamp accessors
  char: convert to new timestamp accessors
  qib: convert to new timestamp accessors
  ibmasm: convert to new timestamp accessors
  misc: convert to new timestamp accessors
  x86: convert to new timestamp accessors
  tty: convert to new timestamp accessors
  function: convert to new timestamp accessors
  legacy: convert to new timestamp accessors
  usb: convert to new timestamp accessors
  9p: convert to new timestamp accessors
  adfs: convert to new timestamp accessors
  affs: convert to new timestamp accessors
  afs: convert to new timestamp accessors
  autofs: convert to new timestamp accessors
  bcachefs: convert to new timestamp accessors
  befs: convert to new timestamp accessors
  bfs: convert to new timestamp accessors
  btrfs: convert to new timestamp accessors
  ceph: convert to new timestamp accessors
  coda: convert to new timestamp accessors
  configfs: convert to new timestamp accessors
  cramfs: convert to new timestamp accessors
  debugfs: convert to new timestamp accessors
  devpts: convert to new timestamp accessors
  efivarfs: convert to new timestamp accessors
  efs: convert to new timestamp accessors
  erofs: convert to new timestamp accessors
  exfat: convert to new timestamp accessors
  ext2: convert to new timestamp accessors
  ext4: convert to new timestamp accessors
  f2fs: convert to new timestamp accessors
  fat: convert to new timestamp accessors
  freevxfs: convert to new timestamp accessors
  fuse: convert to new timestamp accessors
  gfs2: convert to new timestamp accessors
  hfs: convert to new timestamp accessors
  hfsplus: convert to new timestamp accessors
  hostfs: convert to new timestamp accessors
  hpfs: convert to new timestamp accessors
  hugetlbfs: convert to new timestamp accessors
  isofs: convert to new timestamp accessors
  jffs2: convert to new timestamp accessors
  jfs: convert to new timestamp accessors
  kernfs: convert to new timestamp accessors
  minix: convert to new timestamp accessors
  nfs: convert to new timestamp accessors
  nfsd: convert to new timestamp accessors
  nilfs2: convert to new timestamp accessors
  ntfs: convert to new timestamp accessors
  ntfs3: convert to new timestamp accessors
  ocfs2: convert to new timestamp accessors
  omfs: convert to new timestamp accessors
  openpromfs: convert to new timestamp accessors
  orangefs: convert to new timestamp accessors
  overlayfs: convert to new timestamp accessors
  proc: convert to new timestamp accessors
  pstore: convert to new timestamp accessors
  qnx4: convert to new timestamp accessors
  qnx6: convert to new timestamp accessors
  ramfs: convert to new timestamp accessors
  reiserfs: convert to new timestamp accessors
  romfs: convert to new timestamp accessors
  client: convert to new timestamp accessors
  server: convert to new timestamp accessors
  squashfs: convert to new timestamp accessors
  sysv: convert to new timestamp accessors
  tracefs: convert to new timestamp accessors
  ubifs: convert to new timestamp accessors
  udf: convert to new timestamp accessors
  ufs: convert to new timestamp accessors
  vboxsf: convert to new timestamp accessors
  xfs: convert to new timestamp accessors
  zonefs: convert to new timestamp accessors
  linux: convert to new timestamp accessors
  ipc: convert to new timestamp accessors
  bpf: convert to new timestamp accessors
  mm: convert to new timestamp accessors
  sunrpc: convert to new timestamp accessors
  apparmor: convert to new timestamp accessors
  selinux: convert to new timestamp accessors

[PATCH v2 03/89] spufs: convert to new timestamp accessors

2023-10-04 Thread Jeff Layton
Convert to using the new inode timestamp accessor functions.

Signed-off-by: Jeff Layton 
---
 arch/powerpc/platforms/cell/spufs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c 
b/arch/powerpc/platforms/cell/spufs/inode.c
index 38c5be34c895..10c1320adfd0 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
-   inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+   simple_inode_init_ts(inode);
 out:
return inode;
 }
-- 
2.41.0



Re: [PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers

2023-09-29 Thread Jeff Layton
On Fri, 2023-09-29 at 11:44 +0200, Christian Brauner wrote:
> > It is a lot of churn though.
> 
> I think that i_{a,c,m}time shouldn't be accessed directly by
> filesystems same as no filesystem should really access i_{g,u}id which
> we also provide i_{g,u}id_{read,write}() accessors for. The mode is
> another example where really most often should use helpers because of all
> the set*id stripping that we need to do (and the bugs that we had
> because of this...).
> 
> The interdependency between ctime and mtime is enough to hide this in
> accessors. The other big advantage is simply grepability. So really I
> would like to see this change even without the type switch.
> 
> In other words, there's no need to lump the two changes together. Do the
> conversion part and we can argue about the switch to discrete integers
> separately.
> 
> The other adavantage is that we have a cycle to see any possible
> regression from the conversion.
> 
> Thoughts anyone?

That works for me, and sort of what I was planning anyway. I mostly just
did the change to timestamp storage to see what it would look like
afterward.

FWIW, I'm planning to do a v2 patchbomb early next week, with the
changes that Chuck suggested (specific helpers for fetching the _sec and
_nsec fields). For now, I'll drop the change from timespec64 to discrete
fields. We can do that in a separate follow-on set.
-- 
Jeff Layton 


Re: [PATCH 87/87] fs: move i_blocks up a few places in struct inode

2023-09-28 Thread Jeff Layton
On Thu, 2023-09-28 at 10:41 -0700, Linus Torvalds wrote:
> On Thu, 28 Sept 2023 at 04:06, Jeff Layton  wrote:
> > 
> > Move i_blocks up above the i_lock, which moves the new 4 byte hole to
> > just after the timestamps, without changing the size of the structure.
> 
> I'm sure others have mentioned this, but 'struct inode' is marked with
> __randomize_layout, so the actual layout may end up being very
> different.
> 
> I'm personally not convinced the whole structure randomization is
> worth it - it's easy enough to figure out for any distro kernel since
> the seed has to be the same across machines for modules to work, so
> even if the seed isn't "public", any layout is bound to be fairly
> easily discoverable.
> 
> So the whole randomization only really works for private kernel
> builds, and it adds this kind of pain where "optimizing" the structure
> layout is kind of pointless depending on various options.
> 
> I certainly *hope* no distro enables that pointless thing, but it's a worry.
> 

I've never enabled struct randomization and don't know anyone who does.
I figure if you turn that on, you get to keep all of the pieces when you
start seeing weird performance problems.

I think that we have to optimize for that being disabled. Even without
that though, turning on and off options can change the layout...and then
there are different arches, etc.

I'm using a config derived from the Fedora x86_64 kernel images and hope
that represents a reasonably common configuration. The only conditional
members before the timestamps are based on CONFIG_FS_POSIX_ACL and
CONFIG_SECURITY, which are almost always turned on with most distros.
-- 
Jeff Layton 


Re: [PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers

2023-09-28 Thread Jeff Layton
On Thu, 2023-09-28 at 10:19 -0700, Darrick J. Wong wrote:
> On Thu, Sep 28, 2023 at 01:06:03PM -0400, Jeff Layton wrote:
> > On Thu, 2023-09-28 at 11:48 -0400, Arnd Bergmann wrote:
> > > On Thu, Sep 28, 2023, at 07:05, Jeff Layton wrote:
> > > > This shaves 8 bytes off struct inode, according to pahole.
> > > > 
> > > > Signed-off-by: Jeff Layton 
> > > 
> > > FWIW, this is similar to the approach that Deepa suggested
> > > back in 2016:
> > > 
> > > https://lore.kernel.org/lkml/1452144972-15802-3-git-send-email-deepa.ker...@gmail.com/
> > > 
> > > It was NaKed at the time because of the added complexity,
> > > though it would have been much easier to do it then,
> > > as we had to touch all the timespec references anyway.
> > > 
> > > The approach still seems ok to me, but I'm not sure it's worth
> > > doing it now if we didn't do it then.
> > > 
> > 
> > I remember seeing those patches go by. I don't remember that change
> > being NaK'ed, but I wasn't paying close attention at the time 
> > 
> > Looking at it objectively now, I think it's worth it to recover 8 bytes
> > per inode and open a 4 byte hole that Amir can use to grow the
> > i_fsnotify_mask. We might even able to shave off another 12 bytes
> > eventually if we can move to a single 64-bit word per timestamp. 
> 
> I don't think you can, since btrfs timestamps utilize s64 seconds
> counting in both directions from the Unix epoch.  They also support ns
> resolution:
> 
>   struct btrfs_timespec {
>   __le64 sec;
>   __le32 nsec;
>   } __attribute__ ((__packed__));
> 

Correct. We'd lose some fidelity in currently stored timestamps, but as
Linus and Ted pointed out, anything below ~100ns granularity is
effectively just noise, as that's the floor overhead for calling into
the kernel. It's hard to argue that any application needs that sort of
timestamp resolution, at least with contemporary hardware. 

Doing that would mean that tests that store specific values in the
atime/mtime and expect to be able to fetch exactly that value back would
break though, so we'd have to be OK with that if we want to try it. The
good news is that it's relatively easy to experiment with new ways to
store timestamps with these wrappers in place.

-- 
Jeff Layton 


Re: [PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers

2023-09-28 Thread Jeff Layton
On Thu, 2023-09-28 at 07:05 -0400, Jeff Layton wrote:
> This shaves 8 bytes off struct inode, according to pahole.
> 
> Signed-off-by: Jeff Layton 
> ---
>  include/linux/fs.h | 32 +++-
>  1 file changed, 23 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 831657011036..de902ff2938b 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -671,9 +671,12 @@ struct inode {
>   };
>   dev_t   i_rdev;
>   loff_t  i_size;
> - struct timespec64   __i_atime; /* use inode_*_atime accessors */
> - struct timespec64   __i_mtime; /* use inode_*_mtime accessors */
> - struct timespec64   __i_ctime; /* use inode_*_ctime accessors */
> + time64_ti_atime_sec;
> + time64_ti_mtime_sec;
> + time64_ti_ctime_sec;
> + u32 i_atime_nsec;
> + u32 i_mtime_nsec;
> + u32 i_ctime_nsec;
>   spinlock_t  i_lock; /* i_blocks, i_bytes, maybe i_size */
>   unsigned short  i_bytes;
>   u8  i_blkbits;
> @@ -1519,7 +1522,9 @@ struct timespec64 inode_set_ctime_current(struct inode 
> *inode);
>   */
>  static inline struct timespec64 inode_get_ctime(const struct inode *inode)
>  {
> - return inode->__i_ctime;
> + struct timespec64 ts = { .tv_sec  = inode->i_ctime_sec,
> +  .tv_nsec = inode->i_ctime_nsec };
> + return ts;
>  }
> 
>
>  
>  /**
> @@ -1532,7 +1537,8 @@ static inline struct timespec64 inode_get_ctime(const 
> struct inode *inode)
>  static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
> struct timespec64 ts)
>  {
> - inode->__i_ctime = ts;
> + inode->i_ctime_sec = ts.tv_sec;
> + inode->i_ctime_nsec = ts.tv_sec;

Bug above and in the other inode_set_?time_to_ts() functions. This isn't
setting the nsec field correctly.

>   return ts;
>  }
> 
> 


>  
> @@ -1555,13 +1561,17 @@ static inline struct timespec64 
> inode_set_ctime(struct inode *inode,
>  
>  static inline struct timespec64 inode_get_atime(const struct inode *inode)
>  {
> - return inode->__i_atime;
> + struct timespec64 ts = { .tv_sec  = inode->i_atime_sec,
> +  .tv_nsec = inode->i_atime_nsec };
> +
> + return ts;
>  }
>  
>  static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
> struct timespec64 ts)
>  {
> - inode->__i_atime = ts;
> + inode->i_atime_sec = ts.tv_sec;
> + inode->i_atime_nsec = ts.tv_sec;
>   return ts;
>  }
>  
> @@ -1575,13 +1585,17 @@ static inline struct timespec64 
> inode_set_atime(struct inode *inode,
>  
>  static inline struct timespec64 inode_get_mtime(const struct inode *inode)
>  {
> - return inode->__i_mtime;
> + struct timespec64 ts = { .tv_sec  = inode->i_mtime_sec,
> +  .tv_nsec = inode->i_mtime_nsec };
> +
> + return ts;
>  }
>  
>  static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
> struct timespec64 ts)
>  {
> - inode->__i_mtime = ts;
> + inode->i_atime_sec = ts.tv_sec;
> + inode->i_atime_nsec = ts.tv_sec;

Doh! s/atime/mtime/ in the above lines.

>   return ts;
>  }
>  

Both bugs are fixed in my tree.
-- 
Jeff Layton 


Re: [PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers

2023-09-28 Thread Jeff Layton
On Thu, 2023-09-28 at 11:48 -0400, Arnd Bergmann wrote:
> On Thu, Sep 28, 2023, at 07:05, Jeff Layton wrote:
> > This shaves 8 bytes off struct inode, according to pahole.
> > 
> > Signed-off-by: Jeff Layton 
> 
> FWIW, this is similar to the approach that Deepa suggested
> back in 2016:
> 
> https://lore.kernel.org/lkml/1452144972-15802-3-git-send-email-deepa.ker...@gmail.com/
> 
> It was NaKed at the time because of the added complexity,
> though it would have been much easier to do it then,
> as we had to touch all the timespec references anyway.
> 
> The approach still seems ok to me, but I'm not sure it's worth
> doing it now if we didn't do it then.
> 

I remember seeing those patches go by. I don't remember that change
being NaK'ed, but I wasn't paying close attention at the time 

Looking at it objectively now, I think it's worth it to recover 8 bytes
per inode and open a 4 byte hole that Amir can use to grow the
i_fsnotify_mask. We might even able to shave off another 12 bytes
eventually if we can move to a single 64-bit word per timestamp. 

It is a lot of churn though.
-- 
Jeff Layton 


Re: [PATCH 87/87] fs: move i_blocks up a few places in struct inode

2023-09-28 Thread Jeff Layton
On Thu, 2023-09-28 at 14:35 +0300, Amir Goldstein wrote:
> On Thu, Sep 28, 2023 at 2:06 PM Jeff Layton  wrote:
> > 
> > The recent change to use discrete integers instead of struct timespec64
> > in struct inode shaved 8 bytes off of it, but it also moves the i_lock
> > into the previous cacheline, away from the fields that it protects.
> > 
> > Move i_blocks up above the i_lock, which moves the new 4 byte hole to
> > just after the timestamps, without changing the size of the structure.
> > 
> 
> Instead of creating an implicit hole, can you please move i_generation
> to fill the 4 bytes hole.
> 
> It makes sense in the same cache line with i_ino and I could
> use the vacant 4 bytes hole above i_fsnotify_mask to expand the
> mask to 64bit (the 32bit event mask space is running out).
> 
> Thanks,
> Amir.
> 

Sounds like a plan. Resulting struct inode size is the same (616 bytes
with my kdevops kconfig). BTW: all of these changes are in my "amtime"
branch if anyone wants to pull them down.
--
Jeff Layton 


[PATCH 87/87] fs: move i_blocks up a few places in struct inode

2023-09-28 Thread Jeff Layton
The recent change to use discrete integers instead of struct timespec64
in struct inode shaved 8 bytes off of it, but it also moves the i_lock
into the previous cacheline, away from the fields that it protects.

Move i_blocks up above the i_lock, which moves the new 4 byte hole to
just after the timestamps, without changing the size of the structure.

Signed-off-by: Jeff Layton 
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index de902ff2938b..3e0fe0f52e7c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -677,11 +677,11 @@ struct inode {
u32 i_atime_nsec;
u32 i_mtime_nsec;
u32 i_ctime_nsec;
+   blkcnt_ti_blocks;
spinlock_t  i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short  i_bytes;
u8  i_blkbits;
u8  i_write_hint;
-   blkcnt_ti_blocks;
 
 #ifdef __NEED_I_SIZE_ORDERED
seqcount_t  i_size_seqcount;
-- 
2.41.0



[PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers

2023-09-28 Thread Jeff Layton
This shaves 8 bytes off struct inode, according to pahole.

Signed-off-by: Jeff Layton 
---
 include/linux/fs.h | 32 +++-
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 831657011036..de902ff2938b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -671,9 +671,12 @@ struct inode {
};
dev_t   i_rdev;
loff_t  i_size;
-   struct timespec64   __i_atime; /* use inode_*_atime accessors */
-   struct timespec64   __i_mtime; /* use inode_*_mtime accessors */
-   struct timespec64   __i_ctime; /* use inode_*_ctime accessors */
+   time64_ti_atime_sec;
+   time64_ti_mtime_sec;
+   time64_ti_ctime_sec;
+   u32 i_atime_nsec;
+   u32 i_mtime_nsec;
+   u32 i_ctime_nsec;
spinlock_t  i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short  i_bytes;
u8  i_blkbits;
@@ -1519,7 +1522,9 @@ struct timespec64 inode_set_ctime_current(struct inode 
*inode);
  */
 static inline struct timespec64 inode_get_ctime(const struct inode *inode)
 {
-   return inode->__i_ctime;
+   struct timespec64 ts = { .tv_sec  = inode->i_ctime_sec,
+.tv_nsec = inode->i_ctime_nsec };
+   return ts;
 }
 
 /**
@@ -1532,7 +1537,8 @@ static inline struct timespec64 inode_get_ctime(const 
struct inode *inode)
 static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->__i_ctime = ts;
+   inode->i_ctime_sec = ts.tv_sec;
+   inode->i_ctime_nsec = ts.tv_sec;
return ts;
 }
 
@@ -1555,13 +1561,17 @@ static inline struct timespec64 inode_set_ctime(struct 
inode *inode,
 
 static inline struct timespec64 inode_get_atime(const struct inode *inode)
 {
-   return inode->__i_atime;
+   struct timespec64 ts = { .tv_sec  = inode->i_atime_sec,
+.tv_nsec = inode->i_atime_nsec };
+
+   return ts;
 }
 
 static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->__i_atime = ts;
+   inode->i_atime_sec = ts.tv_sec;
+   inode->i_atime_nsec = ts.tv_sec;
return ts;
 }
 
@@ -1575,13 +1585,17 @@ static inline struct timespec64 inode_set_atime(struct 
inode *inode,
 
 static inline struct timespec64 inode_get_mtime(const struct inode *inode)
 {
-   return inode->__i_mtime;
+   struct timespec64 ts = { .tv_sec  = inode->i_mtime_sec,
+.tv_nsec = inode->i_mtime_nsec };
+
+   return ts;
 }
 
 static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->__i_mtime = ts;
+   inode->i_atime_sec = ts.tv_sec;
+   inode->i_atime_nsec = ts.tv_sec;
return ts;
 }
 
-- 
2.41.0



[PATCH 85/87] fs: rename i_atime and i_mtime fields to __i_atime and __i_mtime

2023-09-28 Thread Jeff Layton
Make it clear that these fields are private now, and that the accessors
should be used instead.

Signed-off-by: Jeff Layton 
---
 include/linux/fs.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 12d247b82aa0..831657011036 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -671,9 +671,9 @@ struct inode {
};
dev_t   i_rdev;
loff_t  i_size;
-   struct timespec64   i_atime;
-   struct timespec64   i_mtime;
-   struct timespec64   __i_ctime; /* use inode_*_ctime accessors! */
+   struct timespec64   __i_atime; /* use inode_*_atime accessors */
+   struct timespec64   __i_mtime; /* use inode_*_mtime accessors */
+   struct timespec64   __i_ctime; /* use inode_*_ctime accessors */
spinlock_t  i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short  i_bytes;
u8  i_blkbits;
@@ -1555,13 +1555,13 @@ static inline struct timespec64 inode_set_ctime(struct 
inode *inode,
 
 static inline struct timespec64 inode_get_atime(const struct inode *inode)
 {
-   return inode->i_atime;
+   return inode->__i_atime;
 }
 
 static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->i_atime = ts;
+   inode->__i_atime = ts;
return ts;
 }
 
@@ -1575,13 +1575,13 @@ static inline struct timespec64 inode_set_atime(struct 
inode *inode,
 
 static inline struct timespec64 inode_get_mtime(const struct inode *inode)
 {
-   return inode->i_mtime;
+   return inode->__i_mtime;
 }
 
 static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->i_mtime = ts;
+   inode->__i_mtime = ts;
return ts;
 }
 
-- 
2.41.0



[PATCH 01/87] fs: new accessor methods for atime and mtime

2023-09-28 Thread Jeff Layton
Recently, we converted the ctime accesses in the kernel to use new
accessor functions. Linus recently pointed out though that if we add
accessors for the atime and mtime, then that would allow us to
seamlessly change how these timestamps are stored in the inode.

Add new accessor functions for the atime and mtime that mirror the
accessors for the ctime.

Signed-off-by: Jeff Layton 
---
 fs/libfs.c | 13 +
 include/linux/fs.h | 42 ++
 2 files changed, 55 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index 37f2d34ee090..f5cdc7f7f5b5 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1912,3 +1912,16 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct 
iov_iter *iter,
return direct_written + buffered_written;
 }
 EXPORT_SYMBOL_GPL(direct_write_fallback);
+
+/**
+ * simple_inode_init_ts - initialize the timestamps for a new inode
+ * @inode: inode to be initialized
+ *
+ * When a new inode is created, most filesystems set the timestamps to the
+ * current time. Add a helper to do this.
+ */
+struct timespec64 simple_inode_init_ts(struct inode *inode);
+{
+   return inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+}
+EXPORT_SYMBOL(simple_inode_init_ts);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b528f063e8ff..12d247b82aa0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1553,6 +1553,48 @@ static inline struct timespec64 inode_set_ctime(struct 
inode *inode,
return inode_set_ctime_to_ts(inode, ts);
 }
 
+static inline struct timespec64 inode_get_atime(const struct inode *inode)
+{
+   return inode->i_atime;
+}
+
+static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+   inode->i_atime = ts;
+   return ts;
+}
+
+static inline struct timespec64 inode_set_atime(struct inode *inode,
+   time64_t sec, long nsec)
+{
+   struct timespec64 ts = { .tv_sec  = sec,
+.tv_nsec = nsec };
+   return inode_set_atime_to_ts(inode, ts);
+}
+
+static inline struct timespec64 inode_get_mtime(const struct inode *inode)
+{
+   return inode->i_mtime;
+}
+
+static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+   inode->i_mtime = ts;
+   return ts;
+}
+
+static inline struct timespec64 inode_set_mtime(struct inode *inode,
+   time64_t sec, long nsec)
+{
+   struct timespec64 ts = { .tv_sec  = sec,
+.tv_nsec = nsec };
+   return inode_set_mtime_to_ts(inode, ts);
+}
+
+struct timespec64 simple_inode_init_ts(struct inode *inode);
+
 /*
  * Snapshotting support.
  */
-- 
2.41.0



[PATCH 00/87] fs: new accessor methods for atime and mtime

2023-09-28 Thread Jeff Layton
While working on the multigrain timestamp changes, Linus suggested
adding some similar wrappers for accessing the atime and mtime that we
have for the ctime. With that, we could then move to using discrete
integers instead of timespec64 in struct inode, and shrink it.

Linus suggested using macros for the new accessors, but the existing
ctime wrappers were static inlines and since there are only 3 different
timestamps, I didn't see that trying to fiddle with macros would gain us
anything.

The first patches start with some new infrastructure, and then we move
to converting different subsystems to use it. The second to last patch
makes the conversion to discrete integers, which shaves 8 bytes off of
struct inode on my x86_64 kernel. The last patch reshuffles things a
bit more, to keep the i_lock in the same cacheline as the fields it
protects (at least on x86_64).

About 75% of this conversion was done with coccinelle, with the rest
done by hand with vim.

Jeff Layton (87):
  fs: new accessor methods for atime and mtime
  fs: convert core infrastructure to new {a,m}time accessors
  arch/powerpc/platforms/cell/spufs: convert to new inode {a,m}time
accessors
  arch/s390/hypfs: convert to new inode {a,m}time accessors
  drivers/android: convert to new inode {a,m}time accessors
  drivers/char: convert to new inode {a,m}time accessors
  drivers/infiniband/hw/qib: convert to new inode {a,m}time accessors
  drivers/misc/ibmasm: convert to new inode {a,m}time accessors
  drivers/misc: convert to new inode {a,m}time accessors
  drivers/platform/x86: convert to new inode {a,m}time accessors
  drivers/tty: convert to new inode {a,m}time accessors
  drivers/usb/core: convert to new inode {a,m}time accessors
  drivers/usb/gadget/function: convert to new inode {a,m}time accessors
  drivers/usb/gadget/legacy: convert to new inode {a,m}time accessors
  fs/9p: convert to new inode {a,m}time accessors
  fs/adfs: convert to new inode {a,m}time accessors
  fs/affs: convert to new inode {a,m}time accessors
  fs/afs: convert to new inode {a,m}time accessors
  fs/autofs: convert to new inode {a,m}time accessors
  fs/befs: convert to new inode {a,m}time accessors
  fs/bfs: convert to new inode {a,m}time accessors
  fs/btrfs: convert to new inode {a,m}time accessors
  fs/ceph: convert to new inode {a,m}time accessors
  fs/coda: convert to new inode {a,m}time accessors
  fs/configfs: convert to new inode {a,m}time accessors
  fs/cramfs: convert to new inode {a,m}time accessors
  fs/debugfs: convert to new inode {a,m}time accessors
  fs/devpts: convert to new inode {a,m}time accessors
  fs/efivarfs: convert to new inode {a,m}time accessors
  fs/efs: convert to new inode {a,m}time accessors
  fs/erofs: convert to new inode {a,m}time accessors
  fs/exfat: convert to new inode {a,m}time accessors
  fs/ext2: convert to new inode {a,m}time accessors
  fs/ext4: convert to new inode {a,m}time accessors
  fs/f2fs: convert to new inode {a,m}time accessors
  fs/fat: convert to new inode {a,m}time accessors
  fs/freevxfs: convert to new inode {a,m}time accessors
  fs/fuse: convert to new inode {a,m}time accessors
  fs/gfs2: convert to new inode {a,m}time accessors
  fs/hfs: convert to new inode {a,m}time accessors
  fs/hfsplus: convert to new inode {a,m}time accessors
  fs/hostfs: convert to new inode {a,m}time accessors
  fs/hpfs: convert to new inode {a,m}time accessors
  fs/hugetlbfs: convert to new inode {a,m}time accessors
  fs/isofs: convert to new inode {a,m}time accessors
  fs/jffs2: convert to new inode {a,m}time accessors
  fs/jfs: convert to new inode {a,m}time accessors
  fs/kernfs: convert to new inode {a,m}time accessors
  fs/minix: convert to new inode {a,m}time accessors
  fs/nfs: convert to new inode {a,m}time accessors
  fs/nfsd: convert to new inode {a,m}time accessors
  fs/nilfs2: convert to new inode {a,m}time accessors
  fs/ntfs: convert to new inode {a,m}time accessors
  fs/ntfs3: convert to new inode {a,m}time accessors
  fs/ocfs2: convert to new inode {a,m}time accessors
  fs/omfs: convert to new inode {a,m}time accessors
  fs/openpromfs: convert to new inode {a,m}time accessors
  fs/orangefs: convert to new inode {a,m}time accessors
  fs/overlayfs: convert to new inode {a,m}time accessors
  fs/proc: convert to new inode {a,m}time accessors
  fs/pstore: convert to new inode {a,m}time accessors
  fs/qnx4: convert to new inode {a,m}time accessors
  fs/qnx6: convert to new inode {a,m}time accessors
  fs/ramfs: convert to new inode {a,m}time accessors
  fs/reiserfs: convert to new inode {a,m}time accessors
  fs/romfs: convert to new inode {a,m}time accessors
  fs/smb/client: convert to new inode {a,m}time accessors
  fs/smb/server: convert to new inode {a,m}time accessors
  fs/squashfs: convert to new inode {a,m}time accessors
  fs/sysv: convert to new inode {a,m}time accessors
  fs/tracefs: convert to new inode {a,m}time accessors
  fs/ubifs: convert to new inode {a,m}time accessors
  fs/udf: convert to new inode {a,m

[PATCH 03/87] arch/powerpc/platforms/cell/spufs: convert to new inode {a,m}time accessors

2023-09-28 Thread Jeff Layton
Signed-off-by: Jeff Layton 
---
 arch/powerpc/platforms/cell/spufs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c 
b/arch/powerpc/platforms/cell/spufs/inode.c
index 38c5be34c895..10c1320adfd0 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
-   inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+   simple_inode_init_ts(inode);
 out:
return inode;
 }
-- 
2.41.0



Re: [PATCH v2 08/92] fs: new helper: simple_rename_timestamp

2023-08-29 Thread Jeff Layton
On Wed, 2023-08-30 at 01:19 +0100, Al Viro wrote:
> On Wed, Jul 05, 2023 at 02:58:11PM -0400, Jeff Layton wrote:
> 
> > + * POSIX mandates that the old and new parent directories have their ctime 
> > and
> > + * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), 
> > have
> > + * their ctime updated.
> 
> APPLICATION USAGE
> Some implementations mark for update the last file status change timestamp
> of renamed files and some do not. Applications which make use of the
> last file status change timestamp may behave differently with respect
> to renamed files unless they are designed to allow for either behavior.
>
> So for children POSIX permits rather than mandates.  Doesn't really matter;
> Linux behaviour had been to touch ctime on children since way back, if
> not since the very beginning.

Mea culpa. You're quite correct. I'll plan to roll a small patch to
update the comment over this function.

Thanks!
-- 
Jeff Layton 


Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime

2023-07-10 Thread Jeff Layton
On Mon, 2023-07-10 at 14:35 +0200, Christian Brauner wrote:
> On Fri, Jul 07, 2023 at 08:42:31AM -0400, Jeff Layton wrote:
> > On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote:
> > > v2:
> > > - prepend patches to add missing ctime updates
> > > - add simple_rename_timestamp helper function
> > > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_*
> > > - drop individual inode_ctime_set_{sec,nsec} helpers
> > > 
> > 
> > After review by Jan and others, and Jan's ext4 rework, the diff on top
> > of the series I posted a couple of days ago is below. I don't really
> > want to spam everyone with another ~100 patch v3 series, but I can if
> > you think that's best.
> > 
> > Christian, what would you like me to do here?
> 
> I picked up the series from the list and folded the fixups you posted
> here into the respective fs conversion patches. I hope that helps you
> avoid a resend. You should have received a separate "thank you" mail for
> all of this.
> 
> To each patch that I folded one of the fixlets from below into I added a
> git note that records a link to your mail here and the respective patch
> hunk from this mail that I folded into the patch. git.kernel.org will
> show notes by default. For example,
> https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git/commit/?h=vfs.ctime=8b0e3c2e99004609a16ba145bcbdfdddb78e220e
> should show you the note I added. You can also fetch them via
> git fetch $remote refs/notes/*:refs/notes/*
> (You probably know that ofc but jic.) if you're interested.
> 
> Based on v6.5-rc1 as of today.
> 

Many thanks!!! I'll get to work rebasing the multigrain timestamp series
on top of that.

> Btw, both b4 and patchwork somehow treat the series in weird was.
> IOW, based on the message id of the cover letter I was able to pull most
> messages except for:
> 
> [07/92] fs: add ctime accessors infrastructure
> [08/92] fs: new helper: simple_rename_timestamp
> [92/92] fs: rename i_ctime field to __i_ctime
> 
> which I pulled in separately. Not sure what the cause of 
> 
> this is.

Good to know.

I ended up doing the send in two phases: one for the cover letter and
infrastructure patches that went to everyone, and one for the per-
subsystem patches that went do individual maintainers and lists.

I suspect that screwed up the message IDs somehow. Hopefully I won't
need to do a posting like that again soon, but I'll pay closer attention
to the message id handling next time.

Thanks again!
-- 
Jeff Layton 


Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime

2023-07-07 Thread Jeff Layton
On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote:
> v2:
> - prepend patches to add missing ctime updates
> - add simple_rename_timestamp helper function
> - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_*
> - drop individual inode_ctime_set_{sec,nsec} helpers
> 

After review by Jan and others, and Jan's ext4 rework, the diff on top
of the series I posted a couple of days ago is below. I don't really
want to spam everyone with another ~100 patch v3 series, but I can if
you think that's best.

Christian, what would you like me to do here?

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bcdb1a0beccf..5f6e93714f5a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -699,8 +699,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
if (ci->i_version == 0 ||
timespec64_compare(ctime, ) > 0) {
dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
-inode_get_ctime(inode).tv_sec,
-inode_get_ctime(inode).tv_nsec,
+ictime.tv_sec, ictime.tv_nsec,
 ctime->tv_sec, ctime->tv_nsec);
inode_set_ctime_to_ts(inode, *ctime);
}
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 806374d866d1..567c0d305ea4 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -175,10 +175,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
vi->chunkbits = sb->s_blocksize_bits +
(vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
}
-   inode->i_mtime.tv_sec = inode_get_ctime(inode).tv_sec;
-   inode->i_atime.tv_sec = inode_get_ctime(inode).tv_sec;
-   inode->i_mtime.tv_nsec = inode_get_ctime(inode).tv_nsec;
-   inode->i_atime.tv_nsec = inode_get_ctime(inode).tv_nsec;
+   inode->i_mtime = inode->i_atime = inode_get_ctime(inode);
 
inode->i_flags &= ~S_DAX;
if (test_opt(>opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index c007de6ac1c7..1b9f587f6cca 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -1351,7 +1351,7 @@ static int exfat_rename(struct mnt_idmap *idmap,
exfat_warn(sb, "abnormal access to an inode dropped");
WARN_ON(new_inode->i_nlink == 0);
}
-   EXFAT_I(new_inode)->i_crtime = 
inode_set_ctime_current(new_inode);
+   EXFAT_I(new_inode)->i_crtime = current_time(new_inode);
}
 
 unlock:
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d502b930431b..d63543187359 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -868,64 +868,63 @@ struct ext4_inode {
  * affected filesystem before 2242.
  */
 
-static inline __le32 ext4_encode_extra_time(struct timespec64 *time)
+static inline __le32 ext4_encode_extra_time(struct timespec64 ts)
 {
-   u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK;
-   return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
+   u32 extra = ((ts.tv_sec - (s32)ts.tv_sec) >> 32) & EXT4_EPOCH_MASK;
+   return cpu_to_le32(extra | (ts.tv_nsec << EXT4_EPOCH_BITS));
 }
 
-static inline void ext4_decode_extra_time(struct timespec64 *time,
- __le32 extra)
+static inline struct timespec64 ext4_decode_extra_time(__le32 base,
+  __le32 extra)
 {
+   struct timespec64 ts = { .tv_sec = le32_to_cpu(base) };
+
if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK)))
-   time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 
32;
-   time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 
EXT4_EPOCH_BITS;
+   ts.tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
+   ts.tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+   return ts;
 }
 
-#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)  
\
+#define EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, ts)  
\
 do {   
\
-   if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) 
{\
-   (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);
\
-   (raw_inode)->xtime ## _extra =  
\
-   ext4_encode_extra_time(&(inode)->xtime);
\
-   }   
\
-   else\
-   (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, 
(inode)->xtime.tv_sec, 

Re: [apparmor] [PATCH v2 08/92] fs: new helper: simple_rename_timestamp

2023-07-07 Thread Jeff Layton
On Thu, 2023-07-06 at 21:02 +, Seth Arnold wrote:
> On Wed, Jul 05, 2023 at 08:04:41PM -0400, Jeff Layton wrote:
> > 
> > I don't believe it's an issue. I've seen nothing in the POSIX spec that
> > mandates that timestamp updates to different inodes involved in an
> > operation be set to the _same_ value. It just says they must be updated.
> > 
> > It's also hard to believe that any software would depend on this either,
> > given that it's very inconsistent across filesystems today. AFAICT, this
> > was mostly done in the past just as a matter of convenience.
> 
> I've seen this assumption in several programs:
> 

Thanks for looking into this!

To be clear, POSIX doesn't require that _different_ inodes ever be set
to the same timestamp value. IOW, it certainly doesn't require that the
source and target directories on a rename() end up with the exact same
timestamp value.

Granted, POSIX is rather vague on timestamps in general, but most of the
examples below involve comparing different timestamps on the _same_
inode.


> mutt buffy.c
> https://sources.debian.org/src/mutt/2.2.9-1/buffy.c/?hl=625#L625
> 
>   if (mailbox->newly_created &&
>   (sb->st_ctime != sb->st_mtime || sb->st_ctime != sb->st_atime))
> mailbox->newly_created = 0;
> 

This should be fine with this patchset. Note that this is comparing
a/c/mtime on the same inode, and our usual pattern on inode
instantiation is:

inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);

...which should result in all of inode's timestamps being synchronized.

> 
> neomutt mbox/mbox.c
> https://sources.debian.org/src/neomutt/20220429+dfsg1-4.1/mbox/mbox.c/?hl=1820#L1820
> 
>   if (m->newly_created && ((st.st_ctime != st.st_mtime) || (st.st_ctime != 
> st.st_atime)))
> m->newly_created = false;
> 

Ditto here.

> 
> screen logfile.c
> https://sources.debian.org/src/screen/4.9.0-4/logfile.c/?hl=130#L130
> 
>   if ((!s->st_dev && !s->st_ino) || /* stat failed, that's new! */
>   !s->st_nlink ||   /* red alert: file unlinked */
>   (s->st_size < o.st_size) ||   /*   file truncated */
>   (s->st_mtime != o.st_mtime) ||/*file modified */
>   ((s->st_ctime != o.st_ctime) &&   /* file changed (moved) */
>!(s->st_mtime == s->st_ctime &&  /*  and it was not a change */
>  o.st_ctime < s->st_ctime)))/* due to delayed nfs write */
>   {
> 

This one is really weird. You have two different struct stat's, "o" and
"s". I assume though that these should be stat values from the same
inode, because otherwise this comparison would make no sense:

  ((s->st_ctime != o.st_ctime) &&   /* file changed (moved) */

In general, we can never contrive to ensure that the ctime of two
different inodes are the same, since that is always set by the kernel to
the current time, and you'd have to ensure that they were created within
the same jiffy (at least with today's code).

> nemo libnemo-private/nemo-vfs-file.c
> https://sources.debian.org/src/nemo/5.6.5-1/libnemo-private/nemo-vfs-file.c/?hl=344#L344
> 
>   /* mtime is when the contents changed; ctime is when the
>* contents or the permissions (inc. owner/group) changed.
>* So we can only know when the permissions changed if mtime
>* and ctime are different.
>*/
>   if (file->details->mtime == file->details->ctime) {
>   return FALSE;
>   }
> 

Ditto here with the first examples. This involves comparing timestamps
on the same inode, which should be fine.

> 
> While looking for more examples, I found a perl test that seems to suggest
> that at least Solaris, AFS, AmigaOS, DragonFly BSD do as you suggest:
> https://sources.debian.org/src/perl/5.36.0-7/t/op/stat.t/?hl=158#L140
> 

(I kinda miss Perl. I wrote a bunch of stuff in it in the 90's and early
naughties)

I think this test is supposed to be testing whether the mtime changes on
link() ?

-8<
my($nlink, $mtime, $ctime) = (stat($tmpfile))[$NLINK, $MTIME, $CTIME];

[...]


skip "Solaris tmpfs has different mtime/ctime link semantics", 2
 if $Is_Solaris and $cwd =~ m#^/tmp# and
    $mtime && $mtime == $ctime;
-8<

...again, I think this would be ok too since it's just comparing the
mtime and ctime of the same inode. Granted this is a Solaris-specific
test, but Linux would be fine here too.

So in conclusion, I don't think this patchset will cause problems with
any of the above code.
-- 
Jeff Layton 


Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime

2023-07-06 Thread Jeff Layton
On Thu, 2023-07-06 at 10:16 -0500, Eric W. Biederman wrote:
> Jeff Layton  writes:
> 
> > On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote:
> > > v2:
> > > - prepend patches to add missing ctime updates
> > > - add simple_rename_timestamp helper function
> > > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_*
> > > - drop individual inode_ctime_set_{sec,nsec} helpers
> > > 
> > > I've been working on a patchset to change how the inode->i_ctime is
> > > accessed in order to give us conditional, high-res timestamps for the
> > > ctime and mtime. struct timespec64 has unused bits in it that we can use
> > > to implement this. In order to do that however, we need to wrap all
> > > accesses of inode->i_ctime to ensure that bits used as flags are
> > > appropriately handled.
> > > 
> > > The patchset starts with reposts of some missing ctime updates that I
> > > spotted in the tree. It then adds a new helper function for updating the
> > > timestamp after a successful rename, and new ctime accessor
> > > infrastructure.
> > > 
> > > The bulk of the patchset is individual conversions of different
> > > subsysteme to use the new infrastructure. Finally, the patchset renames
> > > the i_ctime field to __i_ctime to help ensure that I didn't miss
> > > anything.
> > > 
> > > This should apply cleanly to linux-next as of this morning.
> > > 
> > > Most of this conversion was done via 5 different coccinelle scripts, run
> > > in succession, with a large swath of by-hand conversions to clean up the
> > > remainder.
> > > 
> > 
> > A couple of other things I should note:
> > 
> > If you sent me an Acked-by or Reviewed-by in the previous set, then I
> > tried to keep it on the patch here, since the respun patches are mostly
> > just renaming stuff from v1. Let me know if I've missed any.
> > 
> > I've also pushed the pile to my tree as this tag:
> > 
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git/tag/?h=ctime.20230705
> > 
> > In case that's easier to work with.
> 
> Are there any preliminary patches showing what you want your introduced
> accessors to turn into?  It is hard to judge the sanity of the
> introduction of wrappers without seeing what the wrappers are ultimately
> going to do.
> 
> Eric

I have a draft version of the multigrain patches on top of the wrapper
conversion I've already posted in my "mgctime-experimental" branch:


https://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git/log/?h=mgctime-experimental

The rationale is best explained in this changelog:


https://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git/commit/?h=mgctime-experimental=face437a144d3375afb7f70c233b0644b4edccba

The idea will be to enable this on a per-fs basis.
-- 
Jeff Layton 


Re: [PATCH v2 08/92] fs: new helper: simple_rename_timestamp

2023-07-05 Thread Jeff Layton
On Thu, 2023-07-06 at 08:19 +0900, Damien Le Moal wrote:
> On 7/6/23 03:58, Jeff Layton wrote:
> > A rename potentially involves updating 4 different inode timestamps. Add
> > a function that handles the details sanely, and convert the libfs.c
> > callers to use it.
> > 
> > Signed-off-by: Jeff Layton 
> > ---
> >  fs/libfs.c | 36 +++-
> >  include/linux/fs.h |  2 ++
> >  2 files changed, 29 insertions(+), 9 deletions(-)
> > 
> > diff --git a/fs/libfs.c b/fs/libfs.c
> > index a7e56baf8bbd..9ee79668c909 100644
> > --- a/fs/libfs.c
> > +++ b/fs/libfs.c
> > @@ -692,6 +692,31 @@ int simple_rmdir(struct inode *dir, struct dentry 
> > *dentry)
> >  }
> >  EXPORT_SYMBOL(simple_rmdir);
> >  
> > +/**
> > + * simple_rename_timestamp - update the various inode timestamps for rename
> > + * @old_dir: old parent directory
> > + * @old_dentry: dentry that is being renamed
> > + * @new_dir: new parent directory
> > + * @new_dentry: target for rename
> > + *
> > + * POSIX mandates that the old and new parent directories have their ctime 
> > and
> > + * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), 
> > have
> > + * their ctime updated.
> > + */
> > +void simple_rename_timestamp(struct inode *old_dir, struct dentry 
> > *old_dentry,
> > +struct inode *new_dir, struct dentry *new_dentry)
> > +{
> > +   struct inode *newino = d_inode(new_dentry);
> > +
> > +   old_dir->i_mtime = inode_set_ctime_current(old_dir);
> > +   if (new_dir != old_dir)
> > +   new_dir->i_mtime = inode_set_ctime_current(new_dir);
> > +   inode_set_ctime_current(d_inode(old_dentry));
> > +   if (newino)
> > +   inode_set_ctime_current(newino);
> > +}
> > +EXPORT_SYMBOL_GPL(simple_rename_timestamp);
> > +
> >  int simple_rename_exchange(struct inode *old_dir, struct dentry 
> > *old_dentry,
> >struct inode *new_dir, struct dentry *new_dentry)
> >  {
> > @@ -707,11 +732,7 @@ int simple_rename_exchange(struct inode *old_dir, 
> > struct dentry *old_dentry,
> > inc_nlink(old_dir);
> > }
> > }
> > -   old_dir->i_ctime = old_dir->i_mtime =
> > -   new_dir->i_ctime = new_dir->i_mtime =
> > -   d_inode(old_dentry)->i_ctime =
> > -   d_inode(new_dentry)->i_ctime = current_time(old_dir);
> > -
> > +   simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
> 
> This is somewhat changing the current behavior: before the patch, the mtime 
> and
> ctime of old_dir, new_dir and the inodes associated with the dentries are 
> always
> equal. But given that simple_rename_timestamp() calls 
> inode_set_ctime_current()
> 4 times, the times could potentially be different.
> 
> I am not sure if that is an issue, but it seems that calling
> inode_set_ctime_current() once, recording the "now" time it sets and using 
> that
> value to set all times may be more efficient and preserve the existing 
> behavior.
> 

I don't believe it's an issue. I've seen nothing in the POSIX spec that
mandates that timestamp updates to different inodes involved in an
operation be set to the _same_ value. It just says they must be updated.

It's also hard to believe that any software would depend on this either,
given that it's very inconsistent across filesystems today. AFAICT, this
was mostly done in the past just as a matter of convenience.

The other problem with doing it that way is that it assumes that
current_time(inode) should always return the same value when given
different inodes. Is it really correct to do this?

inode_set_ctime(dir, inode_set_ctime_current(inode));

"dir" and "inode" are different inodes, after all, and you're setting
dir's timestamp to "inode"'s value. It's not a big deal today since
they're always on the same sb, but the ultimate goal of these changes is
to implement multigrain timestamps. That will mean that fetching a fine-
grained timestamp for an update when the existing mtime or ctime value
has been queried via getattr.

With that change, I think it's best that we treat updates to different
inodes individually, as some of them may require updating with a fine-
grained timestamp and some may not.

> > return 0;
> >  }
> >  EXPORT_SYMBOL_GPL(simple_rename_exchange);
> > @@ -720,7 +741,6 @@ int simple_rename(struct mnt_idmap *idmap, struct inode 
> > *old_dir,
> >   struct dentry *old_dentry, struct inode *new_dir,
&g

Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime

2023-07-05 Thread Jeff Layton
On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote:
> v2:
> - prepend patches to add missing ctime updates
> - add simple_rename_timestamp helper function
> - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_*
> - drop individual inode_ctime_set_{sec,nsec} helpers
> 
> I've been working on a patchset to change how the inode->i_ctime is
> accessed in order to give us conditional, high-res timestamps for the
> ctime and mtime. struct timespec64 has unused bits in it that we can use
> to implement this. In order to do that however, we need to wrap all
> accesses of inode->i_ctime to ensure that bits used as flags are
> appropriately handled.
> 
> The patchset starts with reposts of some missing ctime updates that I
> spotted in the tree. It then adds a new helper function for updating the
> timestamp after a successful rename, and new ctime accessor
> infrastructure.
> 
> The bulk of the patchset is individual conversions of different
> subsysteme to use the new infrastructure. Finally, the patchset renames
> the i_ctime field to __i_ctime to help ensure that I didn't miss
> anything.
> 
> This should apply cleanly to linux-next as of this morning.
> 
> Most of this conversion was done via 5 different coccinelle scripts, run
> in succession, with a large swath of by-hand conversions to clean up the
> remainder.
> 

A couple of other things I should note:

If you sent me an Acked-by or Reviewed-by in the previous set, then I
tried to keep it on the patch here, since the respun patches are mostly
just renaming stuff from v1. Let me know if I've missed any.

I've also pushed the pile to my tree as this tag:


https://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git/tag/?h=ctime.20230705

In case that's easier to work with.

Cheers,
-- 
Jeff Layton 


[PATCH v2 92/92] fs: rename i_ctime field to __i_ctime

2023-07-05 Thread Jeff Layton
Now that everything in-tree is converted to use the accessor functions,
rename the i_ctime field in the inode to discourage direct access.

Signed-off-by: Jeff Layton 
---
 include/linux/fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 14e38bd900f1..b66442f91835 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -642,7 +642,7 @@ struct inode {
loff_t  i_size;
struct timespec64   i_atime;
struct timespec64   i_mtime;
-   struct timespec64   i_ctime;
+   struct timespec64   __i_ctime; /* use inode_*_ctime accessors! */
spinlock_t  i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short  i_bytes;
u8  i_blkbits;
@@ -1485,7 +1485,7 @@ struct timespec64 inode_set_ctime_current(struct inode 
*inode);
  */
 static inline struct timespec64 inode_get_ctime(const struct inode *inode)
 {
-   return inode->i_ctime;
+   return inode->__i_ctime;
 }
 
 /**
@@ -1498,7 +1498,7 @@ static inline struct timespec64 inode_get_ctime(const 
struct inode *inode)
 static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
  struct timespec64 ts)
 {
-   inode->i_ctime = ts;
+   inode->__i_ctime = ts;
return ts;
 }
 
-- 
2.41.0



[PATCH v2 08/92] fs: new helper: simple_rename_timestamp

2023-07-05 Thread Jeff Layton
A rename potentially involves updating 4 different inode timestamps. Add
a function that handles the details sanely, and convert the libfs.c
callers to use it.

Signed-off-by: Jeff Layton 
---
 fs/libfs.c | 36 +++-
 include/linux/fs.h |  2 ++
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index a7e56baf8bbd..9ee79668c909 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -692,6 +692,31 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL(simple_rmdir);
 
+/**
+ * simple_rename_timestamp - update the various inode timestamps for rename
+ * @old_dir: old parent directory
+ * @old_dentry: dentry that is being renamed
+ * @new_dir: new parent directory
+ * @new_dentry: target for rename
+ *
+ * POSIX mandates that the old and new parent directories have their ctime and
+ * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have
+ * their ctime updated.
+ */
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+struct inode *new_dir, struct dentry *new_dentry)
+{
+   struct inode *newino = d_inode(new_dentry);
+
+   old_dir->i_mtime = inode_set_ctime_current(old_dir);
+   if (new_dir != old_dir)
+   new_dir->i_mtime = inode_set_ctime_current(new_dir);
+   inode_set_ctime_current(d_inode(old_dentry));
+   if (newino)
+   inode_set_ctime_current(newino);
+}
+EXPORT_SYMBOL_GPL(simple_rename_timestamp);
+
 int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
   struct inode *new_dir, struct dentry *new_dentry)
 {
@@ -707,11 +732,7 @@ int simple_rename_exchange(struct inode *old_dir, struct 
dentry *old_dentry,
inc_nlink(old_dir);
}
}
-   old_dir->i_ctime = old_dir->i_mtime =
-   new_dir->i_ctime = new_dir->i_mtime =
-   d_inode(old_dentry)->i_ctime =
-   d_inode(new_dentry)->i_ctime = current_time(old_dir);
-
+   simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
return 0;
 }
 EXPORT_SYMBOL_GPL(simple_rename_exchange);
@@ -720,7 +741,6 @@ int simple_rename(struct mnt_idmap *idmap, struct inode 
*old_dir,
  struct dentry *old_dentry, struct inode *new_dir,
  struct dentry *new_dentry, unsigned int flags)
 {
-   struct inode *inode = d_inode(old_dentry);
int they_are_dirs = d_is_dir(old_dentry);
 
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
@@ -743,9 +763,7 @@ int simple_rename(struct mnt_idmap *idmap, struct inode 
*old_dir,
inc_nlink(new_dir);
}
 
-   old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
-   new_dir->i_mtime = inode->i_ctime = current_time(old_dir);
-
+   simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
return 0;
 }
 EXPORT_SYMBOL(simple_rename);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bdfbd11a5811..14e38bd900f1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2979,6 +2979,8 @@ extern int simple_open(struct inode *inode, struct file 
*file);
 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
 extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+struct inode *new_dir, struct dentry *new_dentry);
 extern int simple_rename_exchange(struct inode *old_dir, struct dentry 
*old_dentry,
  struct inode *new_dir, struct dentry 
*new_dentry);
 extern int simple_rename(struct mnt_idmap *, struct inode *,
-- 
2.41.0



[PATCH v2 07/92] fs: add ctime accessors infrastructure

2023-07-05 Thread Jeff Layton
struct timespec64 has unused bits in the tv_nsec field that can be used
for other purposes. In future patches, we're going to change how the
inode->i_ctime is accessed in certain inodes in order to make use of
them. In order to do that safely though, we'll need to eradicate raw
accesses of the inode->i_ctime field from the kernel.

Add new accessor functions for the ctime that we use to replace them.

Reviewed-by: Jan Kara 
Reviewed-by: Luis Chamberlain 
Signed-off-by: Jeff Layton 
---
 fs/inode.c | 16 
 include/linux/fs.h | 45 -
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/fs/inode.c b/fs/inode.c
index d37fad91c8da..21b026d95b51 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2499,6 +2499,22 @@ struct timespec64 current_time(struct inode *inode)
 }
 EXPORT_SYMBOL(current_time);
 
+/**
+ * inode_set_ctime_current - set the ctime to current_time
+ * @inode: inode
+ *
+ * Set the inode->i_ctime to the current value for the inode. Returns
+ * the current value that was assigned to i_ctime.
+ */
+struct timespec64 inode_set_ctime_current(struct inode *inode)
+{
+   struct timespec64 now = current_time(inode);
+
+   inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
+   return now;
+}
+EXPORT_SYMBOL(inode_set_ctime_current);
+
 /**
  * in_group_or_capable - check whether caller is CAP_FSETID privileged
  * @idmap: idmap of the mount @inode was found from
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 824accb89a91..bdfbd11a5811 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1474,7 +1474,50 @@ static inline bool fsuidgid_has_mapping(struct 
super_block *sb,
   kgid_has_mapping(fs_userns, kgid);
 }
 
-extern struct timespec64 current_time(struct inode *inode);
+struct timespec64 current_time(struct inode *inode);
+struct timespec64 inode_set_ctime_current(struct inode *inode);
+
+/**
+ * inode_get_ctime - fetch the current ctime from the inode
+ * @inode: inode from which to fetch ctime
+ *
+ * Grab the current ctime from the inode and return it.
+ */
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+{
+   return inode->i_ctime;
+}
+
+/**
+ * inode_set_ctime_to_ts - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @ts: value to set in the ctime field
+ *
+ * Set the ctime in @inode to @ts
+ */
+static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+   inode->i_ctime = ts;
+   return ts;
+}
+
+/**
+ * inode_set_ctime - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @sec: tv_sec value to set
+ * @nsec: tv_nsec value to set
+ *
+ * Set the ctime in @inode to { @sec, @nsec }
+ */
+static inline struct timespec64 inode_set_ctime(struct inode *inode,
+   time64_t sec, long nsec)
+{
+   struct timespec64 ts = { .tv_sec  = sec,
+.tv_nsec = nsec };
+
+   return inode_set_ctime_to_ts(inode, ts);
+}
 
 /*
  * Snapshotting support.
-- 
2.41.0



[PATCH v2 00/89] fs: new accessors for inode->i_ctime

2023-07-05 Thread Jeff Layton
v2:
- prepend patches to add missing ctime updates
- add simple_rename_timestamp helper function
- rename ctime accessor functions as inode_get_ctime/inode_set_ctime_*
- drop individual inode_ctime_set_{sec,nsec} helpers

I've been working on a patchset to change how the inode->i_ctime is
accessed in order to give us conditional, high-res timestamps for the
ctime and mtime. struct timespec64 has unused bits in it that we can use
to implement this. In order to do that however, we need to wrap all
accesses of inode->i_ctime to ensure that bits used as flags are
appropriately handled.

The patchset starts with reposts of some missing ctime updates that I
spotted in the tree. It then adds a new helper function for updating the
timestamp after a successful rename, and new ctime accessor
infrastructure.

The bulk of the patchset is individual conversions of different
subsysteme to use the new infrastructure. Finally, the patchset renames
the i_ctime field to __i_ctime to help ensure that I didn't miss
anything.

This should apply cleanly to linux-next as of this morning.

Most of this conversion was done via 5 different coccinelle scripts, run
in succession, with a large swath of by-hand conversions to clean up the
remainder.

The coccinelle scripts that were used are below:

::
cocci/ctime1.cocci
::
// convert as much to use inode_set_ctime_current as possible
@@
identifier timei;
struct inode *inode;
expression E1, E2;
@@
(
- inode->i_ctime = E1 = E2 = current_time(timei)
+ E1 = E2 = inode_set_ctime_current(inode)
|
- inode->i_ctime = E1 = current_time(timei)
+ E1 = inode_set_ctime_current(inode)
|
- E1 = inode->i_ctime = current_time(timei)
+ E1 = inode_set_ctime_current(inode)
|
- inode->i_ctime = current_time(timei)
+ inode_set_ctime_current(inode)
)

@@
struct inode *inode;
expression E1, E2, E3;
@@
(
- E1 = current_time(inode)
+ E1 = inode_set_ctime_current(inode)
|
- E1 = current_time(E3)
+ E1 = inode_set_ctime_current(inode)
)
...
(
- inode->i_ctime = E1;
|
- E2 = inode->i_ctime = E1;
+ E2 = E1;
)
::
cocci/ctime2.cocci
::
// get the places that set individual timespec64 fields
@@
struct inode *inode;
expression val, val2;
@@
- inode->i_ctime.tv_sec = val
+ inode_set_ctime(inode, val, val2)
...
- inode->i_ctime.tv_nsec = val2;

// get places that just set the tv_sec
@@
struct inode *inode;
expression sec, E1, E2, E3;
@@
(
- E3 = inode->i_ctime.tv_sec = sec
+ E3 = inode_set_ctime(inode, sec, 0).tv_sec
|
- inode->i_ctime.tv_sec = sec
+ inode_set_ctime(inode, sec, 0)
)
<...
(
- inode->i_ctime.tv_nsec = 0;
|
- E1 = inode->i_ctime.tv_nsec = 0
+ E1 = 0
|
- inode->i_ctime.tv_nsec = E1 = 0
+ E1 = 0
|
- inode->i_ctime.tv_nsec = E1 = E2 = 0
+ E1 = E2 = 0
)
...>

::
cocci/ctime3.cocci
::
// convert places that set i_ctime to a timespec64 directly
@@
struct inode *inode;
expression ts, E1, E2;
@@
(
- inode->i_ctime = E1 = E2 = ts
+ E1 = E2 = inode_set_ctime_to_ts(inode, ts)
|
- inode->i_ctime = E1 = ts
+ E1 = inode_set_ctime_to_ts(inode, ts)
|
- inode->i_ctime = ts
+ inode_set_ctime_to_ts(inode, ts)
)
::
cocci/ctime4.cocci
::
// catch places that set the i_ctime in an inode embedded in another structure
@@
expression E1, E2, E3;
@@
(
- E3.i_ctime = E1 = E2 = current_time()
+ E1 = E2 = inode_set_ctime_current()
|
- E3.i_ctime = E1 = current_time()
+ E1 = inode_set_ctime_current()
|
- E1 = E3.i_ctime = current_time()
+ E1 = inode_set_ctime_current()
|
- E3.i_ctime = current_time()
+ inode_set_ctime_current()
)
::
cocci/ctime5.cocci
::
// convert the remaining i_ctime accesses
@@
struct inode *inode;
@@
- inode->i_ctime
+ inode_get_ctime(inode)


Jeff Layton (92):
  ibmvmc: update ctime in conjunction with mtime on write
  bfs: update ctime in addition to mtime when adding entries
  efivarfs: update ctime when mtime changes on a write
  exfat: ensure that ctime is updated whenever the mtime is
  apparmor: update ctime whenever the mtime changes on an inode
  cifs: update the ctime on a partial page write
  fs: add ctime accessors infrastructure
  fs: new helper: simple_rename_timestamp
  btrfs: convert to simple_rename_timestamp
  ubifs: convert to simple_rename_timestamp
  shmem: convert to simple_rename_timestamp
  exfat: convert to simple_rename_timestamp
  ntfs3: convert to simple_rename_timestamp
  reiserfs: convert to simple_rename_timestamp
  spufs: convert to ctime accessor functions
  s390: convert to ctime accessor functions
  binderfs: convert to ctime accessor functions
  infiniband: convert to ctime accessor functions
  ibm: convert to ctime accessor functions
  usb: convert to ctime accessor functions
  9p: convert to ctime accessor functions
  adfs: convert to ctime accessor functions
  affs: convert to ctime accessor functions
  afs: convert to ctime accessor functions
  fs: convert to ctime accessor functions
  au

[PATCH v2 15/92] spufs: convert to ctime accessor functions

2023-07-05 Thread Jeff Layton
In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Jeremy Kerr 
Reviewed-by: Jan Kara 
Signed-off-by: Jeff Layton 
---
 arch/powerpc/platforms/cell/spufs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c 
b/arch/powerpc/platforms/cell/spufs/inode.c
index ea807aa0c31a..38c5be34c895 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
-   inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+   inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 out:
return inode;
 }
-- 
2.41.0



Re: [PATCH 01/79] fs: add ctime accessors infrastructure

2023-06-22 Thread Jeff Layton
On Thu, 2023-06-22 at 09:46 +0900, Damien Le Moal wrote:
> On 6/21/23 23:45, Jeff Layton wrote:
> > struct timespec64 has unused bits in the tv_nsec field that can be used
> > for other purposes. In future patches, we're going to change how the
> > inode->i_ctime is accessed in certain inodes in order to make use of
> > them. In order to do that safely though, we'll need to eradicate raw
> > accesses of the inode->i_ctime field from the kernel.
> > 
> > Add new accessor functions for the ctime that we can use to replace them.
> > 
> > Signed-off-by: Jeff Layton 
> 
> [...]
> 
> > +/**
> > + * inode_ctime_peek - fetch the current ctime from the inode
> > + * @inode: inode from which to fetch ctime
> > + *
> > + * Grab the current ctime from the inode and return it.
> > + */
> > +static inline struct timespec64 inode_ctime_peek(const struct inode *inode)
> 
> To be consistent with inode_ctime_set(), why not call this one 
> inode_ctime_get()

In later patches fetching the ctime for presentation may have side
effects on certain filesystems. Using "peek" here is a hint that we want
to avoid those side effects in these calls.

> ? Also, inode_set_ctime() & inode_get_ctime() may be a little more natural. 
> But
> no strong opinion about that though.
> 

I like the consistency of the inode_ctime_* prefix. It makes it simpler
to find these calls when grepping, etc.

That said, my opinions on naming are pretty loosely-held, so if the
consensus is that the names should as you suggest, I'll go along with
it.
-- 
Jeff Layton 


Re: [PATCH 00/79] fs: new accessors for inode->i_ctime

2023-06-21 Thread Jeff Layton
On Wed, 2023-06-21 at 15:21 -0400, Steven Rostedt wrote:
> On Wed, 21 Jun 2023 10:45:05 -0400
> Jeff Layton  wrote:
> 
> > Most of this conversion was done via coccinelle, with a few of the more
> > non-standard accesses done by hand. There should be no behavioral
> > changes with this set. That will come later, as we convert individual
> > filesystems to use multigrain timestamps.
> 
> BTW, Linus has suggested to me that whenever a conccinelle script is used,
> it should be included in the change log.
> 

Ok, here's what I have. I note again that my usage of coccinelle is
pretty primitive, so I ended up doing a fair bit of by-hand fixing after
applying these.

Given the way that this change is broken up into 77 patches by
subsystem, to which changelogs should I add it? I could add it to the
"infrastructure" patch, but that's the one where I _didn't_ use it. 

Maybe to patch #79 (the one that renames i_ctime)?


8<--
@@
expression inode;
@@

- inode->i_ctime = current_time(inode)
+ inode_set_current_ctime(inode)

@@
expression inode;
@@

- inode->i_ctime = inode->i_mtime = current_time(inode)
+ inode->i_mtime = inode_set_current_ctime(inode)

@@
struct inode *inode;
expression value;
@@

- inode->i_ctime = value;
+ inode_set_ctime(inode, value);

@@
struct inode *inode;
expression val;
@@
- inode->i_ctime.tv_sec = val
+ inode_set_ctime_sec(inode, val)

@@
struct inode *inode;
expression val;
@@
- inode->i_ctime.tv_nsec = val
+ inode_set_ctime_nsec(inode, val)

@@
struct inode *inode;
@@
- inode->i_ctime
+ inode_ctime_peek(inode)



Re: [PATCH 01/79] fs: add ctime accessors infrastructure

2023-06-21 Thread Jeff Layton
On Wed, 2023-06-21 at 14:19 -0400, Tom Talpey wrote:
> On 6/21/2023 2:01 PM, Jeff Layton wrote:
> > On Wed, 2023-06-21 at 13:29 -0400, Tom Talpey wrote:
> > > On 6/21/2023 10:45 AM, Jeff Layton wrote:
> > > > struct timespec64 has unused bits in the tv_nsec field that can be used
> > > > for other purposes. In future patches, we're going to change how the
> > > > inode->i_ctime is accessed in certain inodes in order to make use of
> > > > them. In order to do that safely though, we'll need to eradicate raw
> > > > accesses of the inode->i_ctime field from the kernel.
> > > > 
> > > > Add new accessor functions for the ctime that we can use to replace 
> > > > them.
> > > > 
> > > > Signed-off-by: Jeff Layton 
> > > > ---
> > > >fs/inode.c | 16 ++
> > > >include/linux/fs.h | 53 
> > > > +-
> > > >2 files changed, 68 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/fs/inode.c b/fs/inode.c
> > > > index d37fad91c8da..c005e7328fbb 100644
> > > > --- a/fs/inode.c
> > > > +++ b/fs/inode.c
> > > > @@ -2499,6 +2499,22 @@ struct timespec64 current_time(struct inode 
> > > > *inode)
> > > >}
> > > >EXPORT_SYMBOL(current_time);
> > > >
> > > > +/**
> > > > + * inode_ctime_set_current - set the ctime to current_time
> > > > + * @inode: inode
> > > > + *
> > > > + * Set the inode->i_ctime to the current value for the inode. Returns
> > > > + * the current value that was assigned to i_ctime.
> > > > + */
> > > > +struct timespec64 inode_ctime_set_current(struct inode *inode)
> > > > +{
> > > > +   struct timespec64 now = current_time(inode);
> > > > +
> > > > +   inode_set_ctime(inode, now);
> > > > +   return now;
> > > > +}
> > > > +EXPORT_SYMBOL(inode_ctime_set_current);
> > > > +
> > > >/**
> > > > * in_group_or_capable - check whether caller is CAP_FSETID 
> > > > privileged
> > > > * @idmap:   idmap of the mount @inode was found from
> > > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > > index 6867512907d6..9afb30606373 100644
> > > > --- a/include/linux/fs.h
> > > > +++ b/include/linux/fs.h
> > > > @@ -1474,7 +1474,58 @@ static inline bool fsuidgid_has_mapping(struct 
> > > > super_block *sb,
> > > >kgid_has_mapping(fs_userns, kgid);
> > > >}
> > > >
> > > > -extern struct timespec64 current_time(struct inode *inode);
> > > > +struct timespec64 current_time(struct inode *inode);
> > > > +struct timespec64 inode_ctime_set_current(struct inode *inode);
> > > > +
> > > > +/**
> > > > + * inode_ctime_peek - fetch the current ctime from the inode
> > > > + * @inode: inode from which to fetch ctime
> > > > + *
> > > > + * Grab the current ctime from the inode and return it.
> > > > + */
> > > > +static inline struct timespec64 inode_ctime_peek(const struct inode 
> > > > *inode)
> > > > +{
> > > > +   return inode->i_ctime;
> > > > +}
> > > > +
> > > > +/**
> > > > + * inode_ctime_set - set the ctime in the inode to the given value
> > > > + * @inode: inode in which to set the ctime
> > > > + * @ts: timespec value to set the ctime
> > > > + *
> > > > + * Set the ctime in @inode to @ts.
> > > > + */
> > > > +static inline struct timespec64 inode_ctime_set(struct inode *inode, 
> > > > struct timespec64 ts)
> > > > +{
> > > > +   inode->i_ctime = ts;
> > > > +   return ts;
> > > > +}
> > > > +
> > > > +/**
> > > > + * inode_ctime_set_sec - set only the tv_sec field in the inode ctime
> > > 
> > > I'm curious about why you choose to split the tv_sec and tv_nsec
> > > set_ functions. Do any callers not set them both? Wouldn't a
> > > single call enable a more atomic behavior someday?
> > > 
> > > inode_ctime_set_sec_nsec(struct inode *, time64_t, time64_t)
> > > 
> > > (or simply initialize a timespec64 and use inode_ctime_spec() )
&

Re: [PATCH 01/79] fs: add ctime accessors infrastructure

2023-06-21 Thread Jeff Layton
On Wed, 2023-06-21 at 13:29 -0400, Tom Talpey wrote:
> On 6/21/2023 10:45 AM, Jeff Layton wrote:
> > struct timespec64 has unused bits in the tv_nsec field that can be used
> > for other purposes. In future patches, we're going to change how the
> > inode->i_ctime is accessed in certain inodes in order to make use of
> > them. In order to do that safely though, we'll need to eradicate raw
> > accesses of the inode->i_ctime field from the kernel.
> > 
> > Add new accessor functions for the ctime that we can use to replace them.
> > 
> > Signed-off-by: Jeff Layton 
> > ---
> >   fs/inode.c | 16 ++
> >   include/linux/fs.h | 53 +-
> >   2 files changed, 68 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/inode.c b/fs/inode.c
> > index d37fad91c8da..c005e7328fbb 100644
> > --- a/fs/inode.c
> > +++ b/fs/inode.c
> > @@ -2499,6 +2499,22 @@ struct timespec64 current_time(struct inode *inode)
> >   }
> >   EXPORT_SYMBOL(current_time);
> >   
> > +/**
> > + * inode_ctime_set_current - set the ctime to current_time
> > + * @inode: inode
> > + *
> > + * Set the inode->i_ctime to the current value for the inode. Returns
> > + * the current value that was assigned to i_ctime.
> > + */
> > +struct timespec64 inode_ctime_set_current(struct inode *inode)
> > +{
> > +   struct timespec64 now = current_time(inode);
> > +
> > +   inode_set_ctime(inode, now);
> > +   return now;
> > +}
> > +EXPORT_SYMBOL(inode_ctime_set_current);
> > +
> >   /**
> >* in_group_or_capable - check whether caller is CAP_FSETID privileged
> >* @idmap:idmap of the mount @inode was found from
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index 6867512907d6..9afb30606373 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1474,7 +1474,58 @@ static inline bool fsuidgid_has_mapping(struct 
> > super_block *sb,
> >kgid_has_mapping(fs_userns, kgid);
> >   }
> >   
> > -extern struct timespec64 current_time(struct inode *inode);
> > +struct timespec64 current_time(struct inode *inode);
> > +struct timespec64 inode_ctime_set_current(struct inode *inode);
> > +
> > +/**
> > + * inode_ctime_peek - fetch the current ctime from the inode
> > + * @inode: inode from which to fetch ctime
> > + *
> > + * Grab the current ctime from the inode and return it.
> > + */
> > +static inline struct timespec64 inode_ctime_peek(const struct inode *inode)
> > +{
> > +   return inode->i_ctime;
> > +}
> > +
> > +/**
> > + * inode_ctime_set - set the ctime in the inode to the given value
> > + * @inode: inode in which to set the ctime
> > + * @ts: timespec value to set the ctime
> > + *
> > + * Set the ctime in @inode to @ts.
> > + */
> > +static inline struct timespec64 inode_ctime_set(struct inode *inode, 
> > struct timespec64 ts)
> > +{
> > +   inode->i_ctime = ts;
> > +   return ts;
> > +}
> > +
> > +/**
> > + * inode_ctime_set_sec - set only the tv_sec field in the inode ctime
> 
> I'm curious about why you choose to split the tv_sec and tv_nsec
> set_ functions. Do any callers not set them both? Wouldn't a
> single call enable a more atomic behavior someday?
> 
>inode_ctime_set_sec_nsec(struct inode *, time64_t, time64_t)
> 
> (or simply initialize a timespec64 and use inode_ctime_spec() )
> 

Yes, quite a few places set the fields individually. For example, when
loading a value from disk that doesn't have sufficient granularity to
set the nsecs field to anything but 0.

Could I have done it by declaring a local timespec64 variable and just
use the inode_ctime_set function in these places? Absolutely.

That's a bit more difficult to handle with coccinelle though. If someone
wants to suggest a way to do that without having to change all of these
call sites manually, then I'm open to redoing the set.

That might be better left for a later cleanup though.

> > + * @inode: inode in which to set the ctime
> > + * @sec:  value to set the tv_sec field
> > + *
> > + * Set the sec field in the ctime. Returns @sec.
> > + */
> > +static inline time64_t inode_ctime_set_sec(struct inode *inode, time64_t 
> > sec)
> > +{
> > +   inode->i_ctime.tv_sec = sec;
> > +   return sec;
> > +}
> > +
> > +/**
> > + * inode_ctime_set_nsec - set only the tv_nsec field in the inode ctime
> > + * @inode: inode in which to set the ctime
> > + * @nsec:  value to set the tv_nsec field
> > + *
> > + * Set the nsec field in the ctime. Returns @nsec.
> > + */
> > +static inline long inode_ctime_set_nsec(struct inode *inode, long nsec)
> > +{
> > +   inode->i_ctime.tv_nsec = nsec;
> > +   return nsec;
> > +}
> >   
> >   /*
> >* Snapshotting support.

-- 
Jeff Layton 


[PATCH 79/79] fs: rename i_ctime field to __i_ctime

2023-06-21 Thread Jeff Layton
Now that everything in-tree is converted to use the accessor functions,
rename the i_ctime field in the inode to make its accesses more
self-documenting.

Signed-off-by: Jeff Layton 
---
 include/linux/fs.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9afb30606373..2ca46c532b49 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -642,7 +642,7 @@ struct inode {
loff_t  i_size;
struct timespec64   i_atime;
struct timespec64   i_mtime;
-   struct timespec64   i_ctime;
+   struct timespec64   __i_ctime; /* use inode_ctime_* accessors! */
spinlock_t  i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short  i_bytes;
u8  i_blkbits;
@@ -1485,7 +1485,7 @@ struct timespec64 inode_ctime_set_current(struct inode 
*inode);
  */
 static inline struct timespec64 inode_ctime_peek(const struct inode *inode)
 {
-   return inode->i_ctime;
+   return inode->__i_ctime;
 }
 
 /**
@@ -1497,7 +1497,7 @@ static inline struct timespec64 inode_ctime_peek(const 
struct inode *inode)
  */
 static inline struct timespec64 inode_ctime_set(struct inode *inode, struct 
timespec64 ts)
 {
-   inode->i_ctime = ts;
+   inode->__i_ctime = ts;
return ts;
 }
 
@@ -1510,7 +1510,7 @@ static inline struct timespec64 inode_ctime_set(struct 
inode *inode, struct time
  */
 static inline time64_t inode_ctime_set_sec(struct inode *inode, time64_t sec)
 {
-   inode->i_ctime.tv_sec = sec;
+   inode->__i_ctime.tv_sec = sec;
return sec;
 }
 
@@ -1523,7 +1523,7 @@ static inline time64_t inode_ctime_set_sec(struct inode 
*inode, time64_t sec)
  */
 static inline long inode_ctime_set_nsec(struct inode *inode, long nsec)
 {
-   inode->i_ctime.tv_nsec = nsec;
+   inode->__i_ctime.tv_nsec = nsec;
return nsec;
 }
 
-- 
2.41.0



[PATCH 01/79] fs: add ctime accessors infrastructure

2023-06-21 Thread Jeff Layton
struct timespec64 has unused bits in the tv_nsec field that can be used
for other purposes. In future patches, we're going to change how the
inode->i_ctime is accessed in certain inodes in order to make use of
them. In order to do that safely though, we'll need to eradicate raw
accesses of the inode->i_ctime field from the kernel.

Add new accessor functions for the ctime that we can use to replace them.

Signed-off-by: Jeff Layton 
---
 fs/inode.c | 16 ++
 include/linux/fs.h | 53 +-
 2 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/fs/inode.c b/fs/inode.c
index d37fad91c8da..c005e7328fbb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2499,6 +2499,22 @@ struct timespec64 current_time(struct inode *inode)
 }
 EXPORT_SYMBOL(current_time);
 
+/**
+ * inode_ctime_set_current - set the ctime to current_time
+ * @inode: inode
+ *
+ * Set the inode->i_ctime to the current value for the inode. Returns
+ * the current value that was assigned to i_ctime.
+ */
+struct timespec64 inode_ctime_set_current(struct inode *inode)
+{
+   struct timespec64 now = current_time(inode);
+
+   inode_set_ctime(inode, now);
+   return now;
+}
+EXPORT_SYMBOL(inode_ctime_set_current);
+
 /**
  * in_group_or_capable - check whether caller is CAP_FSETID privileged
  * @idmap: idmap of the mount @inode was found from
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..9afb30606373 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1474,7 +1474,58 @@ static inline bool fsuidgid_has_mapping(struct 
super_block *sb,
   kgid_has_mapping(fs_userns, kgid);
 }
 
-extern struct timespec64 current_time(struct inode *inode);
+struct timespec64 current_time(struct inode *inode);
+struct timespec64 inode_ctime_set_current(struct inode *inode);
+
+/**
+ * inode_ctime_peek - fetch the current ctime from the inode
+ * @inode: inode from which to fetch ctime
+ *
+ * Grab the current ctime from the inode and return it.
+ */
+static inline struct timespec64 inode_ctime_peek(const struct inode *inode)
+{
+   return inode->i_ctime;
+}
+
+/**
+ * inode_ctime_set - set the ctime in the inode to the given value
+ * @inode: inode in which to set the ctime
+ * @ts: timespec value to set the ctime
+ *
+ * Set the ctime in @inode to @ts.
+ */
+static inline struct timespec64 inode_ctime_set(struct inode *inode, struct 
timespec64 ts)
+{
+   inode->i_ctime = ts;
+   return ts;
+}
+
+/**
+ * inode_ctime_set_sec - set only the tv_sec field in the inode ctime
+ * @inode: inode in which to set the ctime
+ * @sec:  value to set the tv_sec field
+ *
+ * Set the sec field in the ctime. Returns @sec.
+ */
+static inline time64_t inode_ctime_set_sec(struct inode *inode, time64_t sec)
+{
+   inode->i_ctime.tv_sec = sec;
+   return sec;
+}
+
+/**
+ * inode_ctime_set_nsec - set only the tv_nsec field in the inode ctime
+ * @inode: inode in which to set the ctime
+ * @nsec:  value to set the tv_nsec field
+ *
+ * Set the nsec field in the ctime. Returns @nsec.
+ */
+static inline long inode_ctime_set_nsec(struct inode *inode, long nsec)
+{
+   inode->i_ctime.tv_nsec = nsec;
+   return nsec;
+}
 
 /*
  * Snapshotting support.
-- 
2.41.0



[PATCH 00/79] fs: new accessors for inode->i_ctime

2023-06-21 Thread Jeff Layton
I've been working on a patchset to change how the inode->i_ctime is
accessed in order to give us conditional, high-res timestamps for the
ctime and mtime. struct timespec64 has unused bits in it that we can use
to implement this. In order to do that however, we need to wrap all
accesses of inode->i_ctime to ensure that bits used as flags are
appropriately handled.

This patchset first adds some new inode_ctime_* accessor functions. It
then converts all in-tree accesses of inode->i_ctime to use those new
functions and then renames the i_ctime field to __i_ctime to help ensure
that use of the accessors.

Most of this conversion was done via coccinelle, with a few of the more
non-standard accesses done by hand. There should be no behavioral
changes with this set. That will come later, as we convert individual
filesystems to use multigrain timestamps.

Some of these patches depend on the set I sent recently to add missing
ctime updates in various subsystems:


https://lore.kernel.org/linux-fsdevel/20230612104524.17058-1-jlay...@kernel.org/T/#m25399f903cc9526e46b2e0f5a35713c80b52fde9

Since this patchset is so large, I'm only going to send individual
conversion patches to the appropriate maintainers. Please send
Acked-by's or Reviewed-by's if you can. The intent is to merge these as
a set (probably in v6.6). Let me know if that causes conflicts though,
and we can work it out.

This is based on top of linux-next as of yesterday.

Jeff Layton (79):
  fs: add ctime accessors infrastructure
  spufs: switch to new ctime accessors
  s390: switch to new ctime accessors
  binderfs: switch to new ctime accessors
  qib_fs: switch to new ctime accessors
  ibm: switch to new ctime accessors
  usb: switch to new ctime accessors
  9p: switch to new ctime accessors
  adfs: switch to new ctime accessors
  affs: switch to new ctime accessors
  afs: switch to new ctime accessors
  fs: switch to new ctime accessors
  autofs: switch to new ctime accessors
  befs: switch to new ctime accessors
  bfs: switch to new ctime accessors
  btrfs: switch to new ctime accessors
  ceph: switch to new ctime accessors
  coda: switch to new ctime accessors
  configfs: switch to new ctime accessors
  cramfs: switch to new ctime accessors
  debugfs: switch to new ctime accessors
  devpts: switch to new ctime accessors
  ecryptfs: switch to new ctime accessors
  efivarfs: switch to new ctime accessors
  efs: switch to new ctime accessors
  erofs: switch to new ctime accessors
  exfat: switch to new ctime accessors
  ext2: switch to new ctime accessors
  ext4: switch to new ctime accessors
  f2fs: switch to new ctime accessors
  fat: switch to new ctime accessors
  freevxfs: switch to new ctime accessors
  fuse: switch to new ctime accessors
  gfs2: switch to new ctime accessors
  hfs: switch to new ctime accessors
  hfsplus: switch to new ctime accessors
  hostfs: switch to new ctime accessors
  hpfs: switch to new ctime accessors
  hugetlbfs: switch to new ctime accessors
  isofs: switch to new ctime accessors
  jffs2: switch to new ctime accessors
  jfs: switch to new ctime accessors
  kernfs: switch to new ctime accessors
  minix: switch to new ctime accessors
  nfs: switch to new ctime accessors
  nfsd: switch to new ctime accessors
  nilfs2: switch to new ctime accessors
  ntfs: switch to new ctime accessors
  ntfs3: switch to new ctime accessors
  ocfs2: switch to new ctime accessors
  omfs: switch to new ctime accessors
  openpromfs: switch to new ctime accessors
  orangefs: switch to new ctime accessors
  overlayfs: switch to new ctime accessors
  proc: switch to new ctime accessors
  pstore: switch to new ctime accessors
  qnx4: switch to new ctime accessors
  qnx6: switch to new ctime accessors
  ramfs: switch to new ctime accessors
  reiserfs: switch to new ctime accessors
  romfs: switch to new ctime accessors
  smb: switch to new ctime accessors
  squashfs: switch to new ctime accessors
  sysv: switch to new ctime accessors
  tracefs: switch to new ctime accessors
  ubifs: switch to new ctime accessors
  udf: switch to new ctime accessors
  ufs: switch to new ctime accessors
  vboxsf: switch to new ctime accessors
  xfs: switch to new ctime accessors
  zonefs: switch to new ctime accessors
  mqueue: switch to new ctime accessors
  bpf: switch to new ctime accessors
  shmem: switch to new ctime accessors
  rpc_pipefs: switch to new ctime accessors
  apparmor: switch to new ctime accessors
  security: switch to new ctime accessors
  selinux: switch to new ctime accessors
  fs: rename i_ctime field to __i_ctime

 arch/powerpc/platforms/cell/spufs/inode.c |  2 +-
 arch/s390/hypfs/inode.c   |  4 +-
 drivers/android/binderfs.c|  8 +--
 drivers/infiniband/hw/qib/qib_fs.c|  4 +-
 drivers/misc/ibmasm/ibmasmfs.c|  2 +-
 drivers/misc/ibmvmc.c |  2 +-
 drivers/usb/core/devio.c  | 16 +++---
 drivers/usb/gadget/function/f_fs.c|  6 +--
 dr

[PATCH 02/79] spufs: switch to new ctime accessors

2023-06-21 Thread Jeff Layton
In later patches, we're going to change how the ctime.tv_nsec field is
utilized. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton 
---
 arch/powerpc/platforms/cell/spufs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c 
b/arch/powerpc/platforms/cell/spufs/inode.c
index ea807aa0c31a..55418395bd9a 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
-   inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+   inode->i_atime = inode->i_mtime = inode_ctime_set_current(inode);
 out:
return inode;
 }
-- 
2.41.0



Re: [PATCH 4/5] uapi: always define F_GETLK64/F_SETLK64/F_SETLKW64 in fcntl.h

2022-01-12 Thread Jeff Layton
On Wed, 2022-01-12 at 09:28 +0100, Arnd Bergmann wrote:
> On Wed, Jan 12, 2022 at 8:56 AM Christoph Hellwig  wrote:
> > 
> > On Tue, Jan 11, 2022 at 04:33:30PM +0100, Arnd Bergmann wrote:
> > > This is a very subtle change to the exported UAPI header contents:
> > > On 64-bit architectures, the three unusable numbers are now always
> > > shown, rather than depending on a user-controlled symbol.
> > 
> > Well, the change is bigger and less subtle.  Before this change the
> > constants were never visible to userspace at all (except on mips),
> > because the #ifdef CONFIG_64BIT it never set for userspace builds.
> 
> I suppose you mean /always/ visible here, with that ifndef.
> 
> > > This is probably what we want here for compatibility reasons, but I think
> > > it should be explained in the changelog text, and I'd like Jeff or Bruce
> > > to comment on it as well: the alternative here would be to make the
> > > uapi definition depend on __BITS_PER_LONG==32, which is
> > > technically the right thing to do but more a of a change.
> > 
> > I can change this to #if __BITS_PER_LONG==32 || defined(__KERNEL__),
> > but it will still be change in what userspace sees.
> 
> Exactly, that is the tradeoff, which is why I'd like the flock maintainers
> to say which way they prefer. We can either do it more correctly (hiding
> the constants from user space when they are not usable), or with less
> change (removing the incorrect #ifdef). Either way sounds reasonable
> to me, I mainly care that this is explained in the changelog and that the
> maintainers are aware of the two options.
> 

I don't have a strong opinion here. If we were taking symbols away that
were previously visible to userland it would be one thing, but since
we're just adding symbols that may not have been there before, this
seems less likely to break anything.

I probably lean toward Christoph's original solution instead of keeping
the conditional definitions. It's hard to imagine there are many
programs that care whether these other symbols are defined or not.

You can add this to the original patch:

Acked-by: Jeff Layton 



Re: [PATCH 4/7] arch: Remove leftovers from mandatory file locking

2021-11-05 Thread Jeff Layton
On Fri, 2021-11-05 at 16:43 +0100, Alexandre Ghiti wrote:
> This config was removed so remove all references to it.
> 
> Fixes: f7e33bdbd6d1 ("fs: remove mandatory file locking support")
> Signed-off-by: Alexandre Ghiti 
> ---
>  arch/mips/configs/decstation_64_defconfig  | 1 -
>  arch/mips/configs/decstation_defconfig | 1 -
>  arch/mips/configs/decstation_r4k_defconfig | 1 -
>  3 files changed, 3 deletions(-)
> 
> diff --git a/arch/mips/configs/decstation_64_defconfig 
> b/arch/mips/configs/decstation_64_defconfig
> index 85f1955b4b00..e2ed105f8c97 100644
> --- a/arch/mips/configs/decstation_64_defconfig
> +++ b/arch/mips/configs/decstation_64_defconfig
> @@ -144,7 +144,6 @@ CONFIG_EXT2_FS_SECURITY=y
>  CONFIG_EXT3_FS=y
>  CONFIG_EXT3_FS_POSIX_ACL=y
>  CONFIG_EXT3_FS_SECURITY=y
> -# CONFIG_MANDATORY_FILE_LOCKING is not set
>  CONFIG_ISO9660_FS=y
>  CONFIG_JOLIET=y
>  CONFIG_PROC_KCORE=y
> diff --git a/arch/mips/configs/decstation_defconfig 
> b/arch/mips/configs/decstation_defconfig
> index 30a6eafdb1d0..7e987d6f5e34 100644
> --- a/arch/mips/configs/decstation_defconfig
> +++ b/arch/mips/configs/decstation_defconfig
> @@ -140,7 +140,6 @@ CONFIG_EXT2_FS_SECURITY=y
>  CONFIG_EXT3_FS=y
>  CONFIG_EXT3_FS_POSIX_ACL=y
>  CONFIG_EXT3_FS_SECURITY=y
> -# CONFIG_MANDATORY_FILE_LOCKING is not set
>  CONFIG_ISO9660_FS=y
>  CONFIG_JOLIET=y
>  CONFIG_PROC_KCORE=y
> diff --git a/arch/mips/configs/decstation_r4k_defconfig 
> b/arch/mips/configs/decstation_r4k_defconfig
> index e2b58dbf4aa9..6df5f6f2ac8e 100644
> --- a/arch/mips/configs/decstation_r4k_defconfig
> +++ b/arch/mips/configs/decstation_r4k_defconfig
> @@ -140,7 +140,6 @@ CONFIG_EXT2_FS_SECURITY=y
>  CONFIG_EXT3_FS=y
>  CONFIG_EXT3_FS_POSIX_ACL=y
>  CONFIG_EXT3_FS_SECURITY=y
> -# CONFIG_MANDATORY_FILE_LOCKING is not set
>  CONFIG_ISO9660_FS=y
>  CONFIG_JOLIET=y
>  CONFIG_PROC_KCORE=y

Reviewed-by: Jeff Layton 


Re: [PATCH v12 11/12] open: openat2(2) syscall

2019-09-07 Thread Jeff Layton
 no.

Also, once you want to add a new flag, then we get into the mess of how
to document whether open/openat also support it. It'd be good to freeze
changes on those syscalls and aim to only introduce new functionality in
openat2.

That would also allow us to drop some flags from openat2 that we really
don't need, and maybe expand the flag space to 64 bits initially, to
allow for expansion into the future.

Thoughts?

> + * @flags: O_* flags.
> + * @mode: O_CREAT/O_TMPFILE file mode.
> + * @upgrade_mask: UPGRADE_* flags (to restrict O_PATH re-opening).
> + * @resolve: RESOLVE_* flags.
> + */
> +struct open_how {
> + __u32 flags;
> + union {
> + __u16 mode;
> + __u16 upgrade_mask;
> + };
> + __u16 resolve;
> +};
> +
> +#define OPEN_HOW_SIZE_VER0   8 /* sizeof first published struct */
> +

Hmm, there is no version field. When you want to expand this in the
future, what is the plan? Add a new flag to indicate that it's some
length?


> +/* how->resolve flags for openat2(2). */
> +#define RESOLVE_NO_XDEV  0x01 /* Block mount-point crossings
> + (includes bind-mounts). */
> +#define RESOLVE_NO_MAGICLINKS0x02 /* Block traversal through 
> procfs-style
> + "magic-links". */
> +#define RESOLVE_NO_SYMLINKS  0x04 /* Block traversal through all symlinks
> + (implies OEXT_NO_MAGICLINKS) */
> +#define RESOLVE_BENEATH  0x08 /* Block "lexical" trickery like
> + "..", symlinks, and absolute
> + paths which escape the dirfd. */
> +#define RESOLVE_IN_ROOT  0x10 /* Make all jumps to "/" and ".."
> + be scoped inside the dirfd
> + (similar to chroot(2)). */
> +
> +/* how->upgrade flags for openat2(2). */
> +/* First bit is reserved for a future UPGRADE_NOEXEC flag. */
> +#define UPGRADE_NOREAD   0x02 /* Block re-opening with MAY_READ. 
> */
> +#define UPGRADE_NOWRITE  0x04 /* Block re-opening with 
> MAY_WRITE. */
>  
>  #endif /* _UAPI_LINUX_FCNTL_H */

-- 
Jeff Layton 



Re: [PATCH RESEND v11 7/8] open: openat2(2) syscall

2019-08-28 Thread Jeff Layton
On Mon, 2019-08-26 at 19:50 +, sba...@catern.com wrote:
> Aleksa Sarai  writes:
> > To this end, we introduce the openat2(2) syscall. It provides all of the
> > features of openat(2) through the @how->flags argument, but also
> > also provides a new @how->resolve argument which exposes RESOLVE_* flags
> > that map to our new LOOKUP_* flags. It also eliminates the long-standing
> > ugliness of variadic-open(2) by embedding it in a struct.
> 
> I don't like this usage of a structure in memory to pass arguments that
> would fit in registers. This would be quite inconvenient for me as a
> userspace developer.
> 
> Others have brought up issues with this: the issue of seccomp, and the
> issue of mismatch between the userspace interface and the kernel
> interface, are the most important for me. I want to add another,
> admittedly somewhat niche, concern.
> 
> This interfaces requires a program to allocate memory (even on the
> stack) just to pass arguments to the kernel which could be passed
> without allocating that memory. That makes it more difficult and less
> efficient to use this syscall in any case where memory is not so easily
> allocatable: such as early program startup or assembly, where the stack
> may be limited in size or not even available yet, or when injecting a
> syscall while ptracing.
> 
> A struct-passing interface was needed for clone, since we ran out of
> registers; but we have not run out of registers yet for openat, so it
> would be nice to avoid this if we can. We can always expand later...
> 

We can't really expand later like you suggest.

Suppose in a couple of years that we need to add some new argument to
openat2 that isn't just a new flag. If all these values are passed by
individual arguments, you can't add one later without adding yet another
syscall.

Using a struct for this allows this to be extended later, OTOH. You can
extend it, and add a flag that tells the kernel that it can access the
new field. No new syscall required.
-- 
Jeff Layton 



[PATCH] fs: convert a pile of fsync routines to errseq_t based reporting

2017-07-28 Thread Jeff Layton
From: Jeff Layton <jlay...@redhat.com>

This patch converts most of the in-kernel filesystems that do writeback
out of the pagecache to report errors using the errseq_t-based
infrastructure that was recently added. This allows them to report
errors once for each open file description.

Most filesystems have a fairly straightforward fsync operation. They
call filemap_write_and_wait_range to write back all of the data and
wait on it, and then (sometimes) sync out the metadata.

For those filesystems this is a straightforward conversion from calling
filemap_write_and_wait_range in their fsync operation to calling
file_write_and_wait_range.

Signed-off-by: Jeff Layton <jlay...@redhat.com>
---
 arch/powerpc/platforms/cell/spufs/file.c   | 2 +-
 drivers/staging/lustre/lustre/llite/file.c | 2 +-
 drivers/video/fbdev/core/fb_defio.c| 2 +-
 fs/9p/vfs_file.c   | 4 ++--
 fs/affs/file.c | 2 +-
 fs/afs/write.c | 2 +-
 fs/cifs/file.c | 4 ++--
 fs/exofs/file.c| 2 +-
 fs/f2fs/file.c | 2 +-
 fs/hfs/inode.c | 2 +-
 fs/hfsplus/inode.c | 2 +-
 fs/hostfs/hostfs_kern.c| 2 +-
 fs/hpfs/file.c | 2 +-
 fs/jffs2/file.c| 2 +-
 fs/jfs/file.c  | 2 +-
 fs/ncpfs/file.c| 2 +-
 fs/ntfs/dir.c  | 2 +-
 fs/ntfs/file.c | 2 +-
 fs/ocfs2/file.c| 2 +-
 fs/reiserfs/dir.c  | 2 +-
 fs/reiserfs/file.c | 2 +-
 fs/ubifs/file.c| 2 +-
 22 files changed, 24 insertions(+), 24 deletions(-)

Rolling up all of these conversions into a single patch, as Christoph
Hellwig suggested. Most of these are not tested, but the conversion
here is fairly straightforward.

Any maintainers who object, please let me know and I'll yank that
part out of this patch.

diff --git a/arch/powerpc/platforms/cell/spufs/file.c 
b/arch/powerpc/platforms/cell/spufs/file.c
index ae2f740a82f1..5ffcdeb1eb17 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1749,7 +1749,7 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t 
id)
 static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int 
datasync)
 {
struct inode *inode = file_inode(file);
-   int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+   int err = file_write_and_wait_range(file, start, end);
if (!err) {
inode_lock(inode);
err = spufs_mfc_flush(file, NULL);
diff --git a/drivers/staging/lustre/lustre/llite/file.c 
b/drivers/staging/lustre/lustre/llite/file.c
index ab1c85c1ed38..f7d07735ac66 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -2364,7 +2364,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, 
int datasync)
   PFID(ll_inode2fid(inode)), inode);
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
 
-   rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
+   rc = file_write_and_wait_range(file, start, end);
inode_lock(inode);
 
/* catch async errors that were recorded back when async writeback
diff --git a/drivers/video/fbdev/core/fb_defio.c 
b/drivers/video/fbdev/core/fb_defio.c
index 37f69c061210..487d5e336e1b 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -69,7 +69,7 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, 
loff_t end, int datasy
 {
struct fb_info *info = file->private_data;
struct inode *inode = file_inode(file);
-   int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+   int err = file_write_and_wait_range(file, start, end);
if (err)
return err;
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3de3b4a89d89..4802d75b3cf7 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -445,7 +445,7 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, 
loff_t end,
struct p9_wstat wstat;
int retval;
 
-   retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
+   retval = file_write_and_wait_range(filp, start, end);
if (retval)
return retval;
 
@@ -468,7 +468,7 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, 
loff_t end,
struct inode *inode = filp->f_mapping->host;
int retval;
 
-   retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
+   retval = file_write_and_wait_range(filp, start, end);
if (retval)
return retval;
 
d