[PATCH v2 89/89] fs: move i_generation into new hole created after timestamp conversion
The recent change to use discrete integers instead of struct timespec64 shaved 8 bytes off of struct inode, but it also moves the i_lock into the previous cacheline, away from the fields that it protects. Move i_generation above the i_lock, which moves the new 4 byte hole to just after the i_fsnotify_mask in my setup. Suggested-by: Amir Goldstein Signed-off-by: Jeff Layton --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 485b5e21c8e5..686c9f33e725 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -677,6 +677,7 @@ struct inode { u32 i_atime_nsec; u32 i_mtime_nsec; u32 i_ctime_nsec; + u32 i_generation; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -733,7 +734,6 @@ struct inode { unsignedi_dir_seq; }; - __u32 i_generation; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ -- 2.41.0
[PATCH v2 88/89] fs: switch timespec64 fields in inode to discrete integers
This shaves 8 bytes off struct inode with a garden-variety Fedora Kconfig. Signed-off-by: Jeff Layton --- include/linux/fs.h | 53 ++ 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 84fdaf399fbe..485b5e21c8e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -671,9 +671,12 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 __i_atime; - struct timespec64 __i_mtime; - struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ + time64_ti_atime_sec; + time64_ti_mtime_sec; + time64_ti_ctime_sec; + u32 i_atime_nsec; + u32 i_mtime_nsec; + u32 i_ctime_nsec; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -1517,23 +1520,27 @@ struct timespec64 inode_set_ctime_current(struct inode *inode); static inline time64_t inode_get_atime_sec(const struct inode *inode) { - return inode->__i_atime.tv_sec; + return inode->i_atime_sec; } static inline long inode_get_atime_nsec(const struct inode *inode) { - return inode->__i_atime.tv_nsec; + return inode->i_atime_nsec; } static inline struct timespec64 inode_get_atime(const struct inode *inode) { - return inode->__i_atime; + struct timespec64 ts = { .tv_sec = inode_get_atime_sec(inode), +.tv_nsec = inode_get_atime_nsec(inode) }; + + return ts; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_atime = ts; + inode->i_atime_sec = ts.tv_sec; + inode->i_atime_nsec = ts.tv_nsec; return ts; } @@ -1542,28 +1549,32 @@ static inline struct timespec64 inode_set_atime(struct inode *inode, { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; + return inode_set_atime_to_ts(inode, ts); } static inline time64_t inode_get_mtime_sec(const struct inode *inode) { - return inode->__i_mtime.tv_sec; + return inode->i_mtime_sec; } static inline long inode_get_mtime_nsec(const struct inode *inode) { - return inode->__i_mtime.tv_nsec; + return inode->i_mtime_nsec; } static inline struct timespec64 inode_get_mtime(const struct inode *inode) { - return inode->__i_mtime; + struct timespec64 ts = { .tv_sec = inode_get_mtime_sec(inode), +.tv_nsec = inode_get_mtime_nsec(inode) }; + return ts; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_mtime = ts; + inode->i_mtime_sec = ts.tv_sec; + inode->i_mtime_nsec = ts.tv_nsec; return ts; } @@ -1577,34 +1588,30 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode, static inline time64_t inode_get_ctime_sec(const struct inode *inode) { - return inode->__i_ctime.tv_sec; + return inode->i_ctime_sec; } static inline long inode_get_ctime_nsec(const struct inode *inode) { - return inode->__i_ctime.tv_nsec; + return inode->i_ctime_nsec; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - return inode->__i_ctime; + struct timespec64 ts = { .tv_sec = inode_get_ctime_sec(inode), +.tv_nsec = inode_get_ctime_nsec(inode) }; + + return ts; } static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_ctime = ts; + inode->i_ctime_sec = ts.tv_sec; + inode->i_ctime_nsec = ts.tv_nsec; return ts; } -/** - * inode_set_ctime - set the ctime in the inode - * @inode: inode in which to set the ctime - * @sec: tv_sec value to set - * @nsec: tv_nsec value to set - * - * Set the ctime in @inode to { @sec, @nsec } - */ static inline struct timespec64 inode_set_ctime(struct inode *inode, time64_t sec, long nsec) { -- 2.41.0
[PATCH v2 87/89] fs: rename inode i_atime and i_mtime fields
Rename these two fields to discourage direct access (and to help ensure that we mop up any leftover direct accesses). Signed-off-by: Jeff Layton --- include/linux/fs.h | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 3ca610d42176..84fdaf399fbe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -671,8 +671,8 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 i_atime; - struct timespec64 i_mtime; + struct timespec64 __i_atime; + struct timespec64 __i_mtime; struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; @@ -1517,23 +1517,23 @@ struct timespec64 inode_set_ctime_current(struct inode *inode); static inline time64_t inode_get_atime_sec(const struct inode *inode) { - return inode->i_atime.tv_sec; + return inode->__i_atime.tv_sec; } static inline long inode_get_atime_nsec(const struct inode *inode) { - return inode->i_atime.tv_nsec; + return inode->__i_atime.tv_nsec; } static inline struct timespec64 inode_get_atime(const struct inode *inode) { - return inode->i_atime; + return inode->__i_atime; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->i_atime = ts; + inode->__i_atime = ts; return ts; } @@ -1547,23 +1547,23 @@ static inline struct timespec64 inode_set_atime(struct inode *inode, static inline time64_t inode_get_mtime_sec(const struct inode *inode) { - return inode->i_mtime.tv_sec; + return inode->__i_mtime.tv_sec; } static inline long inode_get_mtime_nsec(const struct inode *inode) { - return inode->i_mtime.tv_nsec; + return inode->__i_mtime.tv_nsec; } static inline struct timespec64 inode_get_mtime(const struct inode *inode) { - return inode->i_mtime; + return inode->__i_mtime; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->i_mtime = ts; + inode->__i_mtime = ts; return ts; } -- 2.41.0
[PATCH v2 02/89] fs: convert core infrastructure to new timestamp accessors
Convert the core vfs code to use the new timestamp accessor functions. Signed-off-by: Jeff Layton --- fs/attr.c| 4 ++-- fs/bad_inode.c | 2 +- fs/binfmt_misc.c | 2 +- fs/inode.c | 35 +-- fs/nsfs.c| 2 +- fs/pipe.c| 2 +- fs/stack.c | 4 ++-- fs/stat.c| 4 ++-- 8 files changed, 31 insertions(+), 24 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index a8ae5f6d9b16..bdf5deb06ea9 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -308,9 +308,9 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode, i_uid_update(idmap, attr, inode); i_gid_update(idmap, attr, inode); if (ia_valid & ATTR_ATIME) - inode->i_atime = attr->ia_atime; + inode_set_atime_to_ts(inode, attr->ia_atime); if (ia_valid & ATTR_MTIME) - inode->i_mtime = attr->ia_mtime; + inode_set_mtime_to_ts(inode, attr->ia_mtime); if (ia_valid & ATTR_CTIME) inode_set_ctime_to_ts(inode, attr->ia_ctime); if (ia_valid & ATTR_MODE) { diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 83f9566c973b..316d88da2ce1 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -208,7 +208,7 @@ void make_bad_inode(struct inode *inode) remove_inode_hash(inode); inode->i_mode = S_IFREG; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); inode->i_op = _inode_ops; inode->i_opflags &= ~IOP_XATTR; inode->i_fop = _file_ops; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index e0108d17b085..5d2be9b0a0a5 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -547,7 +547,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) if (inode) { inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); } return inode; } diff --git a/fs/inode.c b/fs/inode.c index 3bb6193f436c..4f8984b97df0 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1837,27 +1837,29 @@ EXPORT_SYMBOL(bmap); static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, struct timespec64 now) { - struct timespec64 ctime; + struct timespec64 atime, mtime, ctime; if (!(mnt->mnt_flags & MNT_RELATIME)) return 1; /* * Is mtime younger than or equal to atime? If yes, update atime: */ - if (timespec64_compare(>i_mtime, >i_atime) >= 0) + atime = inode_get_atime(inode); + mtime = inode_get_mtime(inode); + if (timespec64_compare(, ) >= 0) return 1; /* * Is ctime younger than or equal to atime? If yes, update atime: */ ctime = inode_get_ctime(inode); - if (timespec64_compare(, >i_atime) >= 0) + if (timespec64_compare(, ) >= 0) return 1; /* * Is the previous atime value older than a day? If yes, * update atime: */ - if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60) + if ((long)(now.tv_sec - atime.tv_sec) >= 24*60*60) return 1; /* * Good, we can skip the atime update: @@ -1888,12 +1890,13 @@ int inode_update_timestamps(struct inode *inode, int flags) if (flags & (S_MTIME|S_CTIME|S_VERSION)) { struct timespec64 ctime = inode_get_ctime(inode); + struct timespec64 mtime = inode_get_mtime(inode); now = inode_set_ctime_current(inode); if (!timespec64_equal(, )) updated |= S_CTIME; - if (!timespec64_equal(, >i_mtime)) { - inode->i_mtime = now; + if (!timespec64_equal(, )) { + inode_set_mtime_to_ts(inode, now); updated |= S_MTIME; } if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated)) @@ -1903,8 +1906,10 @@ int inode_update_timestamps(struct inode *inode, int flags) } if (flags & S_ATIME) { - if (!timespec64_equal(, >i_atime)) { - inode->i_atime = now; + struct timespec64 atime = inode_get_atime(inode); + + if (!timespec64_equal(, )) { + inode_set_atime_to_ts(inode, now); updated |= S_ATIME; } } @@ -1963,7 +1968,7 @@ EXPORT_SYMBOL(inode_update_time); bool atime_needs_update(const struct path *path, struct inode *inode) { struct vfsmount *mnt = path->mnt; - struct timespec64 now; + struct t
[PATCH v2 01/89] fs: new accessor methods for atime and mtime
Recently, we converted the ctime accesses in the kernel to use new accessor functions. Linus recently pointed out though that if we add accessors for the atime and mtime, then that would allow us to seamlessly change how these timestamps are stored in the inode. Add new accessor functions for the atime and mtime that mirror the accessors for the ctime. Signed-off-by: Jeff Layton --- fs/libfs.c | 41 -- include/linux/fs.h | 85 +++--- 2 files changed, 102 insertions(+), 24 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 37f2d34ee090..abe2b5a40ba1 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -541,7 +541,8 @@ void simple_recursive_removal(struct dentry *dentry, dput(victim); // unpin it } if (victim == dentry) { - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, + inode_set_ctime_current(inode)); if (d_is_dir(dentry)) drop_nlink(inode); inode_unlock(inode); @@ -582,7 +583,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) */ root->i_ino = 1; root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; - root->i_atime = root->i_mtime = inode_set_ctime_current(root); + simple_inode_init_ts(root); s->s_root = d_make_root(root); if (!s->s_root) return -ENOMEM; @@ -638,8 +639,8 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den { struct inode *inode = d_inode(old_dentry); - dir->i_mtime = inode_set_ctime_to_ts(dir, -inode_set_ctime_current(inode)); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inc_nlink(inode); ihold(inode); dget(dentry); @@ -673,8 +674,8 @@ int simple_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); - dir->i_mtime = inode_set_ctime_to_ts(dir, -inode_set_ctime_current(inode)); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); drop_nlink(inode); dput(dentry); return 0; @@ -709,9 +710,10 @@ void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, { struct inode *newino = d_inode(new_dentry); - old_dir->i_mtime = inode_set_ctime_current(old_dir); + inode_set_mtime_to_ts(old_dir, inode_set_ctime_current(old_dir)); if (new_dir != old_dir) - new_dir->i_mtime = inode_set_ctime_current(new_dir); + inode_set_mtime_to_ts(new_dir, + inode_set_ctime_current(new_dir)); inode_set_ctime_current(d_inode(old_dentry)); if (newino) inode_set_ctime_current(newino); @@ -926,7 +928,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic, */ inode->i_ino = 1; inode->i_mode = S_IFDIR | 0755; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); inode->i_op = _dir_inode_operations; inode->i_fop = _dir_operations; set_nlink(inode, 2); @@ -952,7 +954,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic, goto out; } inode->i_mode = S_IFREG | files->mode; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); inode->i_fop = files->ops; inode->i_ino = i; d_add(dentry, inode); @@ -1520,7 +1522,7 @@ struct inode *alloc_anon_inode(struct super_block *s) inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); return inode; } EXPORT_SYMBOL(alloc_anon_inode); @@ -1912,3 +1914,20 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, return direct_written + buffered_written; } EXPORT_SYMBOL_GPL(direct_write_fallback); + +/** + * simple_inode_init_ts - initialize the timestamps for a new inode + * @inode: inode to be initialized + * + * When a new inode is created, most filesystems set the timestamps to the + * current time. Add a helper to do this. + */ +struct timespec64 simple_inode_init_
[PATCH v2 00/89] fs: new accessor methods for inode atime and mtime
v2: - bugfix in mtime handling - incorporate _sec and _nsec accessor functions (Chuck Lever) - move i_generation to plug hole after changing timestamps (Amir Goldstein) While working on the multigrain timestamp changes, Linus suggested adding some similar wrappers for accessing the atime and mtime that we have for the ctime. With that, we could then move to using discrete integers instead of struct timespec64 in struct inode and shrink it. This patch implements this. Linus suggested using macros for the new accessors, but the existing ctime wrappers were static inlines and since there are only 3 different timestamps, I didn't see that trying to fiddle with macros would gain us anything (other than less verbosity in fs.h). The second to last patch makes the conversion to discrete integers, which shaves 8 bytes off of struct inode on my x86_64 kernel. The last patch reshuffles things a little to keep the i_lock in the same cacheline as the fields it protects. About 75% of this conversion was done with coccinelle, with the rest done by hand. I think we probably ought to try to get everything but the last two patches into v6.7 (though we could consider those too if we're feeling lucky). Jeff Layton (89): fs: new accessor methods for atime and mtime fs: convert core infrastructure to new timestamp accessors spufs: convert to new timestamp accessors hypfs: convert to new timestamp accessors android: convert to new timestamp accessors char: convert to new timestamp accessors qib: convert to new timestamp accessors ibmasm: convert to new timestamp accessors misc: convert to new timestamp accessors x86: convert to new timestamp accessors tty: convert to new timestamp accessors function: convert to new timestamp accessors legacy: convert to new timestamp accessors usb: convert to new timestamp accessors 9p: convert to new timestamp accessors adfs: convert to new timestamp accessors affs: convert to new timestamp accessors afs: convert to new timestamp accessors autofs: convert to new timestamp accessors bcachefs: convert to new timestamp accessors befs: convert to new timestamp accessors bfs: convert to new timestamp accessors btrfs: convert to new timestamp accessors ceph: convert to new timestamp accessors coda: convert to new timestamp accessors configfs: convert to new timestamp accessors cramfs: convert to new timestamp accessors debugfs: convert to new timestamp accessors devpts: convert to new timestamp accessors efivarfs: convert to new timestamp accessors efs: convert to new timestamp accessors erofs: convert to new timestamp accessors exfat: convert to new timestamp accessors ext2: convert to new timestamp accessors ext4: convert to new timestamp accessors f2fs: convert to new timestamp accessors fat: convert to new timestamp accessors freevxfs: convert to new timestamp accessors fuse: convert to new timestamp accessors gfs2: convert to new timestamp accessors hfs: convert to new timestamp accessors hfsplus: convert to new timestamp accessors hostfs: convert to new timestamp accessors hpfs: convert to new timestamp accessors hugetlbfs: convert to new timestamp accessors isofs: convert to new timestamp accessors jffs2: convert to new timestamp accessors jfs: convert to new timestamp accessors kernfs: convert to new timestamp accessors minix: convert to new timestamp accessors nfs: convert to new timestamp accessors nfsd: convert to new timestamp accessors nilfs2: convert to new timestamp accessors ntfs: convert to new timestamp accessors ntfs3: convert to new timestamp accessors ocfs2: convert to new timestamp accessors omfs: convert to new timestamp accessors openpromfs: convert to new timestamp accessors orangefs: convert to new timestamp accessors overlayfs: convert to new timestamp accessors proc: convert to new timestamp accessors pstore: convert to new timestamp accessors qnx4: convert to new timestamp accessors qnx6: convert to new timestamp accessors ramfs: convert to new timestamp accessors reiserfs: convert to new timestamp accessors romfs: convert to new timestamp accessors client: convert to new timestamp accessors server: convert to new timestamp accessors squashfs: convert to new timestamp accessors sysv: convert to new timestamp accessors tracefs: convert to new timestamp accessors ubifs: convert to new timestamp accessors udf: convert to new timestamp accessors ufs: convert to new timestamp accessors vboxsf: convert to new timestamp accessors xfs: convert to new timestamp accessors zonefs: convert to new timestamp accessors linux: convert to new timestamp accessors ipc: convert to new timestamp accessors bpf: convert to new timestamp accessors mm: convert to new timestamp accessors sunrpc: convert to new timestamp accessors apparmor: convert to new timestamp accessors selinux: convert to new timestamp accessors
[PATCH v2 03/89] spufs: convert to new timestamp accessors
Convert to using the new inode timestamp accessor functions. Signed-off-by: Jeff Layton --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 38c5be34c895..10c1320adfd0 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); out: return inode; } -- 2.41.0
Re: [PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers
On Fri, 2023-09-29 at 11:44 +0200, Christian Brauner wrote: > > It is a lot of churn though. > > I think that i_{a,c,m}time shouldn't be accessed directly by > filesystems same as no filesystem should really access i_{g,u}id which > we also provide i_{g,u}id_{read,write}() accessors for. The mode is > another example where really most often should use helpers because of all > the set*id stripping that we need to do (and the bugs that we had > because of this...). > > The interdependency between ctime and mtime is enough to hide this in > accessors. The other big advantage is simply grepability. So really I > would like to see this change even without the type switch. > > In other words, there's no need to lump the two changes together. Do the > conversion part and we can argue about the switch to discrete integers > separately. > > The other adavantage is that we have a cycle to see any possible > regression from the conversion. > > Thoughts anyone? That works for me, and sort of what I was planning anyway. I mostly just did the change to timestamp storage to see what it would look like afterward. FWIW, I'm planning to do a v2 patchbomb early next week, with the changes that Chuck suggested (specific helpers for fetching the _sec and _nsec fields). For now, I'll drop the change from timespec64 to discrete fields. We can do that in a separate follow-on set. -- Jeff Layton
Re: [PATCH 87/87] fs: move i_blocks up a few places in struct inode
On Thu, 2023-09-28 at 10:41 -0700, Linus Torvalds wrote: > On Thu, 28 Sept 2023 at 04:06, Jeff Layton wrote: > > > > Move i_blocks up above the i_lock, which moves the new 4 byte hole to > > just after the timestamps, without changing the size of the structure. > > I'm sure others have mentioned this, but 'struct inode' is marked with > __randomize_layout, so the actual layout may end up being very > different. > > I'm personally not convinced the whole structure randomization is > worth it - it's easy enough to figure out for any distro kernel since > the seed has to be the same across machines for modules to work, so > even if the seed isn't "public", any layout is bound to be fairly > easily discoverable. > > So the whole randomization only really works for private kernel > builds, and it adds this kind of pain where "optimizing" the structure > layout is kind of pointless depending on various options. > > I certainly *hope* no distro enables that pointless thing, but it's a worry. > I've never enabled struct randomization and don't know anyone who does. I figure if you turn that on, you get to keep all of the pieces when you start seeing weird performance problems. I think that we have to optimize for that being disabled. Even without that though, turning on and off options can change the layout...and then there are different arches, etc. I'm using a config derived from the Fedora x86_64 kernel images and hope that represents a reasonably common configuration. The only conditional members before the timestamps are based on CONFIG_FS_POSIX_ACL and CONFIG_SECURITY, which are almost always turned on with most distros. -- Jeff Layton
Re: [PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers
On Thu, 2023-09-28 at 10:19 -0700, Darrick J. Wong wrote: > On Thu, Sep 28, 2023 at 01:06:03PM -0400, Jeff Layton wrote: > > On Thu, 2023-09-28 at 11:48 -0400, Arnd Bergmann wrote: > > > On Thu, Sep 28, 2023, at 07:05, Jeff Layton wrote: > > > > This shaves 8 bytes off struct inode, according to pahole. > > > > > > > > Signed-off-by: Jeff Layton > > > > > > FWIW, this is similar to the approach that Deepa suggested > > > back in 2016: > > > > > > https://lore.kernel.org/lkml/1452144972-15802-3-git-send-email-deepa.ker...@gmail.com/ > > > > > > It was NaKed at the time because of the added complexity, > > > though it would have been much easier to do it then, > > > as we had to touch all the timespec references anyway. > > > > > > The approach still seems ok to me, but I'm not sure it's worth > > > doing it now if we didn't do it then. > > > > > > > I remember seeing those patches go by. I don't remember that change > > being NaK'ed, but I wasn't paying close attention at the time > > > > Looking at it objectively now, I think it's worth it to recover 8 bytes > > per inode and open a 4 byte hole that Amir can use to grow the > > i_fsnotify_mask. We might even able to shave off another 12 bytes > > eventually if we can move to a single 64-bit word per timestamp. > > I don't think you can, since btrfs timestamps utilize s64 seconds > counting in both directions from the Unix epoch. They also support ns > resolution: > > struct btrfs_timespec { > __le64 sec; > __le32 nsec; > } __attribute__ ((__packed__)); > Correct. We'd lose some fidelity in currently stored timestamps, but as Linus and Ted pointed out, anything below ~100ns granularity is effectively just noise, as that's the floor overhead for calling into the kernel. It's hard to argue that any application needs that sort of timestamp resolution, at least with contemporary hardware. Doing that would mean that tests that store specific values in the atime/mtime and expect to be able to fetch exactly that value back would break though, so we'd have to be OK with that if we want to try it. The good news is that it's relatively easy to experiment with new ways to store timestamps with these wrappers in place. -- Jeff Layton
Re: [PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers
On Thu, 2023-09-28 at 07:05 -0400, Jeff Layton wrote: > This shaves 8 bytes off struct inode, according to pahole. > > Signed-off-by: Jeff Layton > --- > include/linux/fs.h | 32 +++- > 1 file changed, 23 insertions(+), 9 deletions(-) > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 831657011036..de902ff2938b 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -671,9 +671,12 @@ struct inode { > }; > dev_t i_rdev; > loff_t i_size; > - struct timespec64 __i_atime; /* use inode_*_atime accessors */ > - struct timespec64 __i_mtime; /* use inode_*_mtime accessors */ > - struct timespec64 __i_ctime; /* use inode_*_ctime accessors */ > + time64_ti_atime_sec; > + time64_ti_mtime_sec; > + time64_ti_ctime_sec; > + u32 i_atime_nsec; > + u32 i_mtime_nsec; > + u32 i_ctime_nsec; > spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ > unsigned short i_bytes; > u8 i_blkbits; > @@ -1519,7 +1522,9 @@ struct timespec64 inode_set_ctime_current(struct inode > *inode); > */ > static inline struct timespec64 inode_get_ctime(const struct inode *inode) > { > - return inode->__i_ctime; > + struct timespec64 ts = { .tv_sec = inode->i_ctime_sec, > + .tv_nsec = inode->i_ctime_nsec }; > + return ts; > } > > > > /** > @@ -1532,7 +1537,8 @@ static inline struct timespec64 inode_get_ctime(const > struct inode *inode) > static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, > struct timespec64 ts) > { > - inode->__i_ctime = ts; > + inode->i_ctime_sec = ts.tv_sec; > + inode->i_ctime_nsec = ts.tv_sec; Bug above and in the other inode_set_?time_to_ts() functions. This isn't setting the nsec field correctly. > return ts; > } > > > > @@ -1555,13 +1561,17 @@ static inline struct timespec64 > inode_set_ctime(struct inode *inode, > > static inline struct timespec64 inode_get_atime(const struct inode *inode) > { > - return inode->__i_atime; > + struct timespec64 ts = { .tv_sec = inode->i_atime_sec, > + .tv_nsec = inode->i_atime_nsec }; > + > + return ts; > } > > static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, > struct timespec64 ts) > { > - inode->__i_atime = ts; > + inode->i_atime_sec = ts.tv_sec; > + inode->i_atime_nsec = ts.tv_sec; > return ts; > } > > @@ -1575,13 +1585,17 @@ static inline struct timespec64 > inode_set_atime(struct inode *inode, > > static inline struct timespec64 inode_get_mtime(const struct inode *inode) > { > - return inode->__i_mtime; > + struct timespec64 ts = { .tv_sec = inode->i_mtime_sec, > + .tv_nsec = inode->i_mtime_nsec }; > + > + return ts; > } > > static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, > struct timespec64 ts) > { > - inode->__i_mtime = ts; > + inode->i_atime_sec = ts.tv_sec; > + inode->i_atime_nsec = ts.tv_sec; Doh! s/atime/mtime/ in the above lines. > return ts; > } > Both bugs are fixed in my tree. -- Jeff Layton
Re: [PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers
On Thu, 2023-09-28 at 11:48 -0400, Arnd Bergmann wrote: > On Thu, Sep 28, 2023, at 07:05, Jeff Layton wrote: > > This shaves 8 bytes off struct inode, according to pahole. > > > > Signed-off-by: Jeff Layton > > FWIW, this is similar to the approach that Deepa suggested > back in 2016: > > https://lore.kernel.org/lkml/1452144972-15802-3-git-send-email-deepa.ker...@gmail.com/ > > It was NaKed at the time because of the added complexity, > though it would have been much easier to do it then, > as we had to touch all the timespec references anyway. > > The approach still seems ok to me, but I'm not sure it's worth > doing it now if we didn't do it then. > I remember seeing those patches go by. I don't remember that change being NaK'ed, but I wasn't paying close attention at the time Looking at it objectively now, I think it's worth it to recover 8 bytes per inode and open a 4 byte hole that Amir can use to grow the i_fsnotify_mask. We might even able to shave off another 12 bytes eventually if we can move to a single 64-bit word per timestamp. It is a lot of churn though. -- Jeff Layton
Re: [PATCH 87/87] fs: move i_blocks up a few places in struct inode
On Thu, 2023-09-28 at 14:35 +0300, Amir Goldstein wrote: > On Thu, Sep 28, 2023 at 2:06 PM Jeff Layton wrote: > > > > The recent change to use discrete integers instead of struct timespec64 > > in struct inode shaved 8 bytes off of it, but it also moves the i_lock > > into the previous cacheline, away from the fields that it protects. > > > > Move i_blocks up above the i_lock, which moves the new 4 byte hole to > > just after the timestamps, without changing the size of the structure. > > > > Instead of creating an implicit hole, can you please move i_generation > to fill the 4 bytes hole. > > It makes sense in the same cache line with i_ino and I could > use the vacant 4 bytes hole above i_fsnotify_mask to expand the > mask to 64bit (the 32bit event mask space is running out). > > Thanks, > Amir. > Sounds like a plan. Resulting struct inode size is the same (616 bytes with my kdevops kconfig). BTW: all of these changes are in my "amtime" branch if anyone wants to pull them down. -- Jeff Layton
[PATCH 87/87] fs: move i_blocks up a few places in struct inode
The recent change to use discrete integers instead of struct timespec64 in struct inode shaved 8 bytes off of it, but it also moves the i_lock into the previous cacheline, away from the fields that it protects. Move i_blocks up above the i_lock, which moves the new 4 byte hole to just after the timestamps, without changing the size of the structure. Signed-off-by: Jeff Layton --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index de902ff2938b..3e0fe0f52e7c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -677,11 +677,11 @@ struct inode { u32 i_atime_nsec; u32 i_mtime_nsec; u32 i_ctime_nsec; + blkcnt_ti_blocks; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; u8 i_write_hint; - blkcnt_ti_blocks; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; -- 2.41.0
[PATCH 86/87] fs: switch timespec64 fields in inode to discrete integers
This shaves 8 bytes off struct inode, according to pahole. Signed-off-by: Jeff Layton --- include/linux/fs.h | 32 +++- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 831657011036..de902ff2938b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -671,9 +671,12 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 __i_atime; /* use inode_*_atime accessors */ - struct timespec64 __i_mtime; /* use inode_*_mtime accessors */ - struct timespec64 __i_ctime; /* use inode_*_ctime accessors */ + time64_ti_atime_sec; + time64_ti_mtime_sec; + time64_ti_ctime_sec; + u32 i_atime_nsec; + u32 i_mtime_nsec; + u32 i_ctime_nsec; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -1519,7 +1522,9 @@ struct timespec64 inode_set_ctime_current(struct inode *inode); */ static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - return inode->__i_ctime; + struct timespec64 ts = { .tv_sec = inode->i_ctime_sec, +.tv_nsec = inode->i_ctime_nsec }; + return ts; } /** @@ -1532,7 +1537,8 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode) static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_ctime = ts; + inode->i_ctime_sec = ts.tv_sec; + inode->i_ctime_nsec = ts.tv_sec; return ts; } @@ -1555,13 +1561,17 @@ static inline struct timespec64 inode_set_ctime(struct inode *inode, static inline struct timespec64 inode_get_atime(const struct inode *inode) { - return inode->__i_atime; + struct timespec64 ts = { .tv_sec = inode->i_atime_sec, +.tv_nsec = inode->i_atime_nsec }; + + return ts; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_atime = ts; + inode->i_atime_sec = ts.tv_sec; + inode->i_atime_nsec = ts.tv_sec; return ts; } @@ -1575,13 +1585,17 @@ static inline struct timespec64 inode_set_atime(struct inode *inode, static inline struct timespec64 inode_get_mtime(const struct inode *inode) { - return inode->__i_mtime; + struct timespec64 ts = { .tv_sec = inode->i_mtime_sec, +.tv_nsec = inode->i_mtime_nsec }; + + return ts; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_mtime = ts; + inode->i_atime_sec = ts.tv_sec; + inode->i_atime_nsec = ts.tv_sec; return ts; } -- 2.41.0
[PATCH 85/87] fs: rename i_atime and i_mtime fields to __i_atime and __i_mtime
Make it clear that these fields are private now, and that the accessors should be used instead. Signed-off-by: Jeff Layton --- include/linux/fs.h | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 12d247b82aa0..831657011036 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -671,9 +671,9 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 i_atime; - struct timespec64 i_mtime; - struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ + struct timespec64 __i_atime; /* use inode_*_atime accessors */ + struct timespec64 __i_mtime; /* use inode_*_mtime accessors */ + struct timespec64 __i_ctime; /* use inode_*_ctime accessors */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -1555,13 +1555,13 @@ static inline struct timespec64 inode_set_ctime(struct inode *inode, static inline struct timespec64 inode_get_atime(const struct inode *inode) { - return inode->i_atime; + return inode->__i_atime; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->i_atime = ts; + inode->__i_atime = ts; return ts; } @@ -1575,13 +1575,13 @@ static inline struct timespec64 inode_set_atime(struct inode *inode, static inline struct timespec64 inode_get_mtime(const struct inode *inode) { - return inode->i_mtime; + return inode->__i_mtime; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->i_mtime = ts; + inode->__i_mtime = ts; return ts; } -- 2.41.0
[PATCH 01/87] fs: new accessor methods for atime and mtime
Recently, we converted the ctime accesses in the kernel to use new accessor functions. Linus recently pointed out though that if we add accessors for the atime and mtime, then that would allow us to seamlessly change how these timestamps are stored in the inode. Add new accessor functions for the atime and mtime that mirror the accessors for the ctime. Signed-off-by: Jeff Layton --- fs/libfs.c | 13 + include/linux/fs.h | 42 ++ 2 files changed, 55 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index 37f2d34ee090..f5cdc7f7f5b5 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1912,3 +1912,16 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, return direct_written + buffered_written; } EXPORT_SYMBOL_GPL(direct_write_fallback); + +/** + * simple_inode_init_ts - initialize the timestamps for a new inode + * @inode: inode to be initialized + * + * When a new inode is created, most filesystems set the timestamps to the + * current time. Add a helper to do this. + */ +struct timespec64 simple_inode_init_ts(struct inode *inode); +{ + return inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); +} +EXPORT_SYMBOL(simple_inode_init_ts); diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ff..12d247b82aa0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1553,6 +1553,48 @@ static inline struct timespec64 inode_set_ctime(struct inode *inode, return inode_set_ctime_to_ts(inode, ts); } +static inline struct timespec64 inode_get_atime(const struct inode *inode) +{ + return inode->i_atime; +} + +static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->i_atime = ts; + return ts; +} + +static inline struct timespec64 inode_set_atime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, +.tv_nsec = nsec }; + return inode_set_atime_to_ts(inode, ts); +} + +static inline struct timespec64 inode_get_mtime(const struct inode *inode) +{ + return inode->i_mtime; +} + +static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->i_mtime = ts; + return ts; +} + +static inline struct timespec64 inode_set_mtime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, +.tv_nsec = nsec }; + return inode_set_mtime_to_ts(inode, ts); +} + +struct timespec64 simple_inode_init_ts(struct inode *inode); + /* * Snapshotting support. */ -- 2.41.0
[PATCH 00/87] fs: new accessor methods for atime and mtime
While working on the multigrain timestamp changes, Linus suggested adding some similar wrappers for accessing the atime and mtime that we have for the ctime. With that, we could then move to using discrete integers instead of timespec64 in struct inode, and shrink it. Linus suggested using macros for the new accessors, but the existing ctime wrappers were static inlines and since there are only 3 different timestamps, I didn't see that trying to fiddle with macros would gain us anything. The first patches start with some new infrastructure, and then we move to converting different subsystems to use it. The second to last patch makes the conversion to discrete integers, which shaves 8 bytes off of struct inode on my x86_64 kernel. The last patch reshuffles things a bit more, to keep the i_lock in the same cacheline as the fields it protects (at least on x86_64). About 75% of this conversion was done with coccinelle, with the rest done by hand with vim. Jeff Layton (87): fs: new accessor methods for atime and mtime fs: convert core infrastructure to new {a,m}time accessors arch/powerpc/platforms/cell/spufs: convert to new inode {a,m}time accessors arch/s390/hypfs: convert to new inode {a,m}time accessors drivers/android: convert to new inode {a,m}time accessors drivers/char: convert to new inode {a,m}time accessors drivers/infiniband/hw/qib: convert to new inode {a,m}time accessors drivers/misc/ibmasm: convert to new inode {a,m}time accessors drivers/misc: convert to new inode {a,m}time accessors drivers/platform/x86: convert to new inode {a,m}time accessors drivers/tty: convert to new inode {a,m}time accessors drivers/usb/core: convert to new inode {a,m}time accessors drivers/usb/gadget/function: convert to new inode {a,m}time accessors drivers/usb/gadget/legacy: convert to new inode {a,m}time accessors fs/9p: convert to new inode {a,m}time accessors fs/adfs: convert to new inode {a,m}time accessors fs/affs: convert to new inode {a,m}time accessors fs/afs: convert to new inode {a,m}time accessors fs/autofs: convert to new inode {a,m}time accessors fs/befs: convert to new inode {a,m}time accessors fs/bfs: convert to new inode {a,m}time accessors fs/btrfs: convert to new inode {a,m}time accessors fs/ceph: convert to new inode {a,m}time accessors fs/coda: convert to new inode {a,m}time accessors fs/configfs: convert to new inode {a,m}time accessors fs/cramfs: convert to new inode {a,m}time accessors fs/debugfs: convert to new inode {a,m}time accessors fs/devpts: convert to new inode {a,m}time accessors fs/efivarfs: convert to new inode {a,m}time accessors fs/efs: convert to new inode {a,m}time accessors fs/erofs: convert to new inode {a,m}time accessors fs/exfat: convert to new inode {a,m}time accessors fs/ext2: convert to new inode {a,m}time accessors fs/ext4: convert to new inode {a,m}time accessors fs/f2fs: convert to new inode {a,m}time accessors fs/fat: convert to new inode {a,m}time accessors fs/freevxfs: convert to new inode {a,m}time accessors fs/fuse: convert to new inode {a,m}time accessors fs/gfs2: convert to new inode {a,m}time accessors fs/hfs: convert to new inode {a,m}time accessors fs/hfsplus: convert to new inode {a,m}time accessors fs/hostfs: convert to new inode {a,m}time accessors fs/hpfs: convert to new inode {a,m}time accessors fs/hugetlbfs: convert to new inode {a,m}time accessors fs/isofs: convert to new inode {a,m}time accessors fs/jffs2: convert to new inode {a,m}time accessors fs/jfs: convert to new inode {a,m}time accessors fs/kernfs: convert to new inode {a,m}time accessors fs/minix: convert to new inode {a,m}time accessors fs/nfs: convert to new inode {a,m}time accessors fs/nfsd: convert to new inode {a,m}time accessors fs/nilfs2: convert to new inode {a,m}time accessors fs/ntfs: convert to new inode {a,m}time accessors fs/ntfs3: convert to new inode {a,m}time accessors fs/ocfs2: convert to new inode {a,m}time accessors fs/omfs: convert to new inode {a,m}time accessors fs/openpromfs: convert to new inode {a,m}time accessors fs/orangefs: convert to new inode {a,m}time accessors fs/overlayfs: convert to new inode {a,m}time accessors fs/proc: convert to new inode {a,m}time accessors fs/pstore: convert to new inode {a,m}time accessors fs/qnx4: convert to new inode {a,m}time accessors fs/qnx6: convert to new inode {a,m}time accessors fs/ramfs: convert to new inode {a,m}time accessors fs/reiserfs: convert to new inode {a,m}time accessors fs/romfs: convert to new inode {a,m}time accessors fs/smb/client: convert to new inode {a,m}time accessors fs/smb/server: convert to new inode {a,m}time accessors fs/squashfs: convert to new inode {a,m}time accessors fs/sysv: convert to new inode {a,m}time accessors fs/tracefs: convert to new inode {a,m}time accessors fs/ubifs: convert to new inode {a,m}time accessors fs/udf: convert to new inode {a,m
[PATCH 03/87] arch/powerpc/platforms/cell/spufs: convert to new inode {a,m}time accessors
Signed-off-by: Jeff Layton --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 38c5be34c895..10c1320adfd0 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); out: return inode; } -- 2.41.0
Re: [PATCH v2 08/92] fs: new helper: simple_rename_timestamp
On Wed, 2023-08-30 at 01:19 +0100, Al Viro wrote: > On Wed, Jul 05, 2023 at 02:58:11PM -0400, Jeff Layton wrote: > > > + * POSIX mandates that the old and new parent directories have their ctime > > and > > + * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), > > have > > + * their ctime updated. > > APPLICATION USAGE > Some implementations mark for update the last file status change timestamp > of renamed files and some do not. Applications which make use of the > last file status change timestamp may behave differently with respect > to renamed files unless they are designed to allow for either behavior. > > So for children POSIX permits rather than mandates. Doesn't really matter; > Linux behaviour had been to touch ctime on children since way back, if > not since the very beginning. Mea culpa. You're quite correct. I'll plan to roll a small patch to update the comment over this function. Thanks! -- Jeff Layton
Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime
On Mon, 2023-07-10 at 14:35 +0200, Christian Brauner wrote: > On Fri, Jul 07, 2023 at 08:42:31AM -0400, Jeff Layton wrote: > > On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote: > > > v2: > > > - prepend patches to add missing ctime updates > > > - add simple_rename_timestamp helper function > > > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_* > > > - drop individual inode_ctime_set_{sec,nsec} helpers > > > > > > > After review by Jan and others, and Jan's ext4 rework, the diff on top > > of the series I posted a couple of days ago is below. I don't really > > want to spam everyone with another ~100 patch v3 series, but I can if > > you think that's best. > > > > Christian, what would you like me to do here? > > I picked up the series from the list and folded the fixups you posted > here into the respective fs conversion patches. I hope that helps you > avoid a resend. You should have received a separate "thank you" mail for > all of this. > > To each patch that I folded one of the fixlets from below into I added a > git note that records a link to your mail here and the respective patch > hunk from this mail that I folded into the patch. git.kernel.org will > show notes by default. For example, > https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git/commit/?h=vfs.ctime=8b0e3c2e99004609a16ba145bcbdfdddb78e220e > should show you the note I added. You can also fetch them via > git fetch $remote refs/notes/*:refs/notes/* > (You probably know that ofc but jic.) if you're interested. > > Based on v6.5-rc1 as of today. > Many thanks!!! I'll get to work rebasing the multigrain timestamp series on top of that. > Btw, both b4 and patchwork somehow treat the series in weird was. > IOW, based on the message id of the cover letter I was able to pull most > messages except for: > > [07/92] fs: add ctime accessors infrastructure > [08/92] fs: new helper: simple_rename_timestamp > [92/92] fs: rename i_ctime field to __i_ctime > > which I pulled in separately. Not sure what the cause of > > this is. Good to know. I ended up doing the send in two phases: one for the cover letter and infrastructure patches that went to everyone, and one for the per- subsystem patches that went do individual maintainers and lists. I suspect that screwed up the message IDs somehow. Hopefully I won't need to do a posting like that again soon, but I'll pay closer attention to the message id handling next time. Thanks again! -- Jeff Layton
Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime
On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote: > v2: > - prepend patches to add missing ctime updates > - add simple_rename_timestamp helper function > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_* > - drop individual inode_ctime_set_{sec,nsec} helpers > After review by Jan and others, and Jan's ext4 rework, the diff on top of the series I posted a couple of days ago is below. I don't really want to spam everyone with another ~100 patch v3 series, but I can if you think that's best. Christian, what would you like me to do here? diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bcdb1a0beccf..5f6e93714f5a 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -699,8 +699,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, if (ci->i_version == 0 || timespec64_compare(ctime, ) > 0) { dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", -inode_get_ctime(inode).tv_sec, -inode_get_ctime(inode).tv_nsec, +ictime.tv_sec, ictime.tv_nsec, ctime->tv_sec, ctime->tv_nsec); inode_set_ctime_to_ts(inode, *ctime); } diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 806374d866d1..567c0d305ea4 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -175,10 +175,7 @@ static void *erofs_read_inode(struct erofs_buf *buf, vi->chunkbits = sb->s_blocksize_bits + (vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); } - inode->i_mtime.tv_sec = inode_get_ctime(inode).tv_sec; - inode->i_atime.tv_sec = inode_get_ctime(inode).tv_sec; - inode->i_mtime.tv_nsec = inode_get_ctime(inode).tv_nsec; - inode->i_atime.tv_nsec = inode_get_ctime(inode).tv_nsec; + inode->i_mtime = inode->i_atime = inode_get_ctime(inode); inode->i_flags &= ~S_DAX; if (test_opt(>opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) && diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index c007de6ac1c7..1b9f587f6cca 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -1351,7 +1351,7 @@ static int exfat_rename(struct mnt_idmap *idmap, exfat_warn(sb, "abnormal access to an inode dropped"); WARN_ON(new_inode->i_nlink == 0); } - EXFAT_I(new_inode)->i_crtime = inode_set_ctime_current(new_inode); + EXFAT_I(new_inode)->i_crtime = current_time(new_inode); } unlock: diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d502b930431b..d63543187359 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -868,64 +868,63 @@ struct ext4_inode { * affected filesystem before 2242. */ -static inline __le32 ext4_encode_extra_time(struct timespec64 *time) +static inline __le32 ext4_encode_extra_time(struct timespec64 ts) { - u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK; - return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); + u32 extra = ((ts.tv_sec - (s32)ts.tv_sec) >> 32) & EXT4_EPOCH_MASK; + return cpu_to_le32(extra | (ts.tv_nsec << EXT4_EPOCH_BITS)); } -static inline void ext4_decode_extra_time(struct timespec64 *time, - __le32 extra) +static inline struct timespec64 ext4_decode_extra_time(__le32 base, + __le32 extra) { + struct timespec64 ts = { .tv_sec = le32_to_cpu(base) }; + if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) - time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; - time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; + ts.tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; + ts.tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; + return ts; } -#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +#define EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, ts) \ do { \ - if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {\ - (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ - (raw_inode)->xtime ## _extra = \ - ext4_encode_extra_time(&(inode)->xtime); \ - } \ - else\ - (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (inode)->xtime.tv_sec,
Re: [apparmor] [PATCH v2 08/92] fs: new helper: simple_rename_timestamp
On Thu, 2023-07-06 at 21:02 +, Seth Arnold wrote: > On Wed, Jul 05, 2023 at 08:04:41PM -0400, Jeff Layton wrote: > > > > I don't believe it's an issue. I've seen nothing in the POSIX spec that > > mandates that timestamp updates to different inodes involved in an > > operation be set to the _same_ value. It just says they must be updated. > > > > It's also hard to believe that any software would depend on this either, > > given that it's very inconsistent across filesystems today. AFAICT, this > > was mostly done in the past just as a matter of convenience. > > I've seen this assumption in several programs: > Thanks for looking into this! To be clear, POSIX doesn't require that _different_ inodes ever be set to the same timestamp value. IOW, it certainly doesn't require that the source and target directories on a rename() end up with the exact same timestamp value. Granted, POSIX is rather vague on timestamps in general, but most of the examples below involve comparing different timestamps on the _same_ inode. > mutt buffy.c > https://sources.debian.org/src/mutt/2.2.9-1/buffy.c/?hl=625#L625 > > if (mailbox->newly_created && > (sb->st_ctime != sb->st_mtime || sb->st_ctime != sb->st_atime)) > mailbox->newly_created = 0; > This should be fine with this patchset. Note that this is comparing a/c/mtime on the same inode, and our usual pattern on inode instantiation is: inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); ...which should result in all of inode's timestamps being synchronized. > > neomutt mbox/mbox.c > https://sources.debian.org/src/neomutt/20220429+dfsg1-4.1/mbox/mbox.c/?hl=1820#L1820 > > if (m->newly_created && ((st.st_ctime != st.st_mtime) || (st.st_ctime != > st.st_atime))) > m->newly_created = false; > Ditto here. > > screen logfile.c > https://sources.debian.org/src/screen/4.9.0-4/logfile.c/?hl=130#L130 > > if ((!s->st_dev && !s->st_ino) || /* stat failed, that's new! */ > !s->st_nlink || /* red alert: file unlinked */ > (s->st_size < o.st_size) || /* file truncated */ > (s->st_mtime != o.st_mtime) ||/*file modified */ > ((s->st_ctime != o.st_ctime) && /* file changed (moved) */ >!(s->st_mtime == s->st_ctime && /* and it was not a change */ > o.st_ctime < s->st_ctime)))/* due to delayed nfs write */ > { > This one is really weird. You have two different struct stat's, "o" and "s". I assume though that these should be stat values from the same inode, because otherwise this comparison would make no sense: ((s->st_ctime != o.st_ctime) && /* file changed (moved) */ In general, we can never contrive to ensure that the ctime of two different inodes are the same, since that is always set by the kernel to the current time, and you'd have to ensure that they were created within the same jiffy (at least with today's code). > nemo libnemo-private/nemo-vfs-file.c > https://sources.debian.org/src/nemo/5.6.5-1/libnemo-private/nemo-vfs-file.c/?hl=344#L344 > > /* mtime is when the contents changed; ctime is when the >* contents or the permissions (inc. owner/group) changed. >* So we can only know when the permissions changed if mtime >* and ctime are different. >*/ > if (file->details->mtime == file->details->ctime) { > return FALSE; > } > Ditto here with the first examples. This involves comparing timestamps on the same inode, which should be fine. > > While looking for more examples, I found a perl test that seems to suggest > that at least Solaris, AFS, AmigaOS, DragonFly BSD do as you suggest: > https://sources.debian.org/src/perl/5.36.0-7/t/op/stat.t/?hl=158#L140 > (I kinda miss Perl. I wrote a bunch of stuff in it in the 90's and early naughties) I think this test is supposed to be testing whether the mtime changes on link() ? -8< my($nlink, $mtime, $ctime) = (stat($tmpfile))[$NLINK, $MTIME, $CTIME]; [...] skip "Solaris tmpfs has different mtime/ctime link semantics", 2 if $Is_Solaris and $cwd =~ m#^/tmp# and $mtime && $mtime == $ctime; -8< ...again, I think this would be ok too since it's just comparing the mtime and ctime of the same inode. Granted this is a Solaris-specific test, but Linux would be fine here too. So in conclusion, I don't think this patchset will cause problems with any of the above code. -- Jeff Layton
Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime
On Thu, 2023-07-06 at 10:16 -0500, Eric W. Biederman wrote: > Jeff Layton writes: > > > On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote: > > > v2: > > > - prepend patches to add missing ctime updates > > > - add simple_rename_timestamp helper function > > > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_* > > > - drop individual inode_ctime_set_{sec,nsec} helpers > > > > > > I've been working on a patchset to change how the inode->i_ctime is > > > accessed in order to give us conditional, high-res timestamps for the > > > ctime and mtime. struct timespec64 has unused bits in it that we can use > > > to implement this. In order to do that however, we need to wrap all > > > accesses of inode->i_ctime to ensure that bits used as flags are > > > appropriately handled. > > > > > > The patchset starts with reposts of some missing ctime updates that I > > > spotted in the tree. It then adds a new helper function for updating the > > > timestamp after a successful rename, and new ctime accessor > > > infrastructure. > > > > > > The bulk of the patchset is individual conversions of different > > > subsysteme to use the new infrastructure. Finally, the patchset renames > > > the i_ctime field to __i_ctime to help ensure that I didn't miss > > > anything. > > > > > > This should apply cleanly to linux-next as of this morning. > > > > > > Most of this conversion was done via 5 different coccinelle scripts, run > > > in succession, with a large swath of by-hand conversions to clean up the > > > remainder. > > > > > > > A couple of other things I should note: > > > > If you sent me an Acked-by or Reviewed-by in the previous set, then I > > tried to keep it on the patch here, since the respun patches are mostly > > just renaming stuff from v1. Let me know if I've missed any. > > > > I've also pushed the pile to my tree as this tag: > > > > > > https://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git/tag/?h=ctime.20230705 > > > > In case that's easier to work with. > > Are there any preliminary patches showing what you want your introduced > accessors to turn into? It is hard to judge the sanity of the > introduction of wrappers without seeing what the wrappers are ultimately > going to do. > > Eric I have a draft version of the multigrain patches on top of the wrapper conversion I've already posted in my "mgctime-experimental" branch: https://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git/log/?h=mgctime-experimental The rationale is best explained in this changelog: https://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git/commit/?h=mgctime-experimental=face437a144d3375afb7f70c233b0644b4edccba The idea will be to enable this on a per-fs basis. -- Jeff Layton
Re: [PATCH v2 08/92] fs: new helper: simple_rename_timestamp
On Thu, 2023-07-06 at 08:19 +0900, Damien Le Moal wrote: > On 7/6/23 03:58, Jeff Layton wrote: > > A rename potentially involves updating 4 different inode timestamps. Add > > a function that handles the details sanely, and convert the libfs.c > > callers to use it. > > > > Signed-off-by: Jeff Layton > > --- > > fs/libfs.c | 36 +++- > > include/linux/fs.h | 2 ++ > > 2 files changed, 29 insertions(+), 9 deletions(-) > > > > diff --git a/fs/libfs.c b/fs/libfs.c > > index a7e56baf8bbd..9ee79668c909 100644 > > --- a/fs/libfs.c > > +++ b/fs/libfs.c > > @@ -692,6 +692,31 @@ int simple_rmdir(struct inode *dir, struct dentry > > *dentry) > > } > > EXPORT_SYMBOL(simple_rmdir); > > > > +/** > > + * simple_rename_timestamp - update the various inode timestamps for rename > > + * @old_dir: old parent directory > > + * @old_dentry: dentry that is being renamed > > + * @new_dir: new parent directory > > + * @new_dentry: target for rename > > + * > > + * POSIX mandates that the old and new parent directories have their ctime > > and > > + * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), > > have > > + * their ctime updated. > > + */ > > +void simple_rename_timestamp(struct inode *old_dir, struct dentry > > *old_dentry, > > +struct inode *new_dir, struct dentry *new_dentry) > > +{ > > + struct inode *newino = d_inode(new_dentry); > > + > > + old_dir->i_mtime = inode_set_ctime_current(old_dir); > > + if (new_dir != old_dir) > > + new_dir->i_mtime = inode_set_ctime_current(new_dir); > > + inode_set_ctime_current(d_inode(old_dentry)); > > + if (newino) > > + inode_set_ctime_current(newino); > > +} > > +EXPORT_SYMBOL_GPL(simple_rename_timestamp); > > + > > int simple_rename_exchange(struct inode *old_dir, struct dentry > > *old_dentry, > >struct inode *new_dir, struct dentry *new_dentry) > > { > > @@ -707,11 +732,7 @@ int simple_rename_exchange(struct inode *old_dir, > > struct dentry *old_dentry, > > inc_nlink(old_dir); > > } > > } > > - old_dir->i_ctime = old_dir->i_mtime = > > - new_dir->i_ctime = new_dir->i_mtime = > > - d_inode(old_dentry)->i_ctime = > > - d_inode(new_dentry)->i_ctime = current_time(old_dir); > > - > > + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); > > This is somewhat changing the current behavior: before the patch, the mtime > and > ctime of old_dir, new_dir and the inodes associated with the dentries are > always > equal. But given that simple_rename_timestamp() calls > inode_set_ctime_current() > 4 times, the times could potentially be different. > > I am not sure if that is an issue, but it seems that calling > inode_set_ctime_current() once, recording the "now" time it sets and using > that > value to set all times may be more efficient and preserve the existing > behavior. > I don't believe it's an issue. I've seen nothing in the POSIX spec that mandates that timestamp updates to different inodes involved in an operation be set to the _same_ value. It just says they must be updated. It's also hard to believe that any software would depend on this either, given that it's very inconsistent across filesystems today. AFAICT, this was mostly done in the past just as a matter of convenience. The other problem with doing it that way is that it assumes that current_time(inode) should always return the same value when given different inodes. Is it really correct to do this? inode_set_ctime(dir, inode_set_ctime_current(inode)); "dir" and "inode" are different inodes, after all, and you're setting dir's timestamp to "inode"'s value. It's not a big deal today since they're always on the same sb, but the ultimate goal of these changes is to implement multigrain timestamps. That will mean that fetching a fine- grained timestamp for an update when the existing mtime or ctime value has been queried via getattr. With that change, I think it's best that we treat updates to different inodes individually, as some of them may require updating with a fine- grained timestamp and some may not. > > return 0; > > } > > EXPORT_SYMBOL_GPL(simple_rename_exchange); > > @@ -720,7 +741,6 @@ int simple_rename(struct mnt_idmap *idmap, struct inode > > *old_dir, > > struct dentry *old_dentry, struct inode *new_dir, &g
Re: [PATCH v2 00/89] fs: new accessors for inode->i_ctime
On Wed, 2023-07-05 at 14:58 -0400, Jeff Layton wrote: > v2: > - prepend patches to add missing ctime updates > - add simple_rename_timestamp helper function > - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_* > - drop individual inode_ctime_set_{sec,nsec} helpers > > I've been working on a patchset to change how the inode->i_ctime is > accessed in order to give us conditional, high-res timestamps for the > ctime and mtime. struct timespec64 has unused bits in it that we can use > to implement this. In order to do that however, we need to wrap all > accesses of inode->i_ctime to ensure that bits used as flags are > appropriately handled. > > The patchset starts with reposts of some missing ctime updates that I > spotted in the tree. It then adds a new helper function for updating the > timestamp after a successful rename, and new ctime accessor > infrastructure. > > The bulk of the patchset is individual conversions of different > subsysteme to use the new infrastructure. Finally, the patchset renames > the i_ctime field to __i_ctime to help ensure that I didn't miss > anything. > > This should apply cleanly to linux-next as of this morning. > > Most of this conversion was done via 5 different coccinelle scripts, run > in succession, with a large swath of by-hand conversions to clean up the > remainder. > A couple of other things I should note: If you sent me an Acked-by or Reviewed-by in the previous set, then I tried to keep it on the patch here, since the respun patches are mostly just renaming stuff from v1. Let me know if I've missed any. I've also pushed the pile to my tree as this tag: https://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git/tag/?h=ctime.20230705 In case that's easier to work with. Cheers, -- Jeff Layton
[PATCH v2 92/92] fs: rename i_ctime field to __i_ctime
Now that everything in-tree is converted to use the accessor functions, rename the i_ctime field in the inode to discourage direct access. Signed-off-by: Jeff Layton --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 14e38bd900f1..b66442f91835 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -642,7 +642,7 @@ struct inode { loff_t i_size; struct timespec64 i_atime; struct timespec64 i_mtime; - struct timespec64 i_ctime; + struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -1485,7 +1485,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode); */ static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - return inode->i_ctime; + return inode->__i_ctime; } /** @@ -1498,7 +1498,7 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode) static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->i_ctime = ts; + inode->__i_ctime = ts; return ts; } -- 2.41.0
[PATCH v2 08/92] fs: new helper: simple_rename_timestamp
A rename potentially involves updating 4 different inode timestamps. Add a function that handles the details sanely, and convert the libfs.c callers to use it. Signed-off-by: Jeff Layton --- fs/libfs.c | 36 +++- include/linux/fs.h | 2 ++ 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index a7e56baf8bbd..9ee79668c909 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -692,6 +692,31 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) } EXPORT_SYMBOL(simple_rmdir); +/** + * simple_rename_timestamp - update the various inode timestamps for rename + * @old_dir: old parent directory + * @old_dentry: dentry that is being renamed + * @new_dir: new parent directory + * @new_dentry: target for rename + * + * POSIX mandates that the old and new parent directories have their ctime and + * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have + * their ctime updated. + */ +void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, +struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *newino = d_inode(new_dentry); + + old_dir->i_mtime = inode_set_ctime_current(old_dir); + if (new_dir != old_dir) + new_dir->i_mtime = inode_set_ctime_current(new_dir); + inode_set_ctime_current(d_inode(old_dentry)); + if (newino) + inode_set_ctime_current(newino); +} +EXPORT_SYMBOL_GPL(simple_rename_timestamp); + int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { @@ -707,11 +732,7 @@ int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, inc_nlink(old_dir); } } - old_dir->i_ctime = old_dir->i_mtime = - new_dir->i_ctime = new_dir->i_mtime = - d_inode(old_dentry)->i_ctime = - d_inode(new_dentry)->i_ctime = current_time(old_dir); - + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); return 0; } EXPORT_SYMBOL_GPL(simple_rename_exchange); @@ -720,7 +741,6 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - struct inode *inode = d_inode(old_dentry); int they_are_dirs = d_is_dir(old_dentry); if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) @@ -743,9 +763,7 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir, inc_nlink(new_dir); } - old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = - new_dir->i_mtime = inode->i_ctime = current_time(old_dir); - + simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); return 0; } EXPORT_SYMBOL(simple_rename); diff --git a/include/linux/fs.h b/include/linux/fs.h index bdfbd11a5811..14e38bd900f1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2979,6 +2979,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); +void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, +struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct mnt_idmap *, struct inode *, -- 2.41.0
[PATCH v2 07/92] fs: add ctime accessors infrastructure
struct timespec64 has unused bits in the tv_nsec field that can be used for other purposes. In future patches, we're going to change how the inode->i_ctime is accessed in certain inodes in order to make use of them. In order to do that safely though, we'll need to eradicate raw accesses of the inode->i_ctime field from the kernel. Add new accessor functions for the ctime that we use to replace them. Reviewed-by: Jan Kara Reviewed-by: Luis Chamberlain Signed-off-by: Jeff Layton --- fs/inode.c | 16 include/linux/fs.h | 45 - 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index d37fad91c8da..21b026d95b51 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2499,6 +2499,22 @@ struct timespec64 current_time(struct inode *inode) } EXPORT_SYMBOL(current_time); +/** + * inode_set_ctime_current - set the ctime to current_time + * @inode: inode + * + * Set the inode->i_ctime to the current value for the inode. Returns + * the current value that was assigned to i_ctime. + */ +struct timespec64 inode_set_ctime_current(struct inode *inode) +{ + struct timespec64 now = current_time(inode); + + inode_set_ctime(inode, now.tv_sec, now.tv_nsec); + return now; +} +EXPORT_SYMBOL(inode_set_ctime_current); + /** * in_group_or_capable - check whether caller is CAP_FSETID privileged * @idmap: idmap of the mount @inode was found from diff --git a/include/linux/fs.h b/include/linux/fs.h index 824accb89a91..bdfbd11a5811 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1474,7 +1474,50 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } -extern struct timespec64 current_time(struct inode *inode); +struct timespec64 current_time(struct inode *inode); +struct timespec64 inode_set_ctime_current(struct inode *inode); + +/** + * inode_get_ctime - fetch the current ctime from the inode + * @inode: inode from which to fetch ctime + * + * Grab the current ctime from the inode and return it. + */ +static inline struct timespec64 inode_get_ctime(const struct inode *inode) +{ + return inode->i_ctime; +} + +/** + * inode_set_ctime_to_ts - set the ctime in the inode + * @inode: inode in which to set the ctime + * @ts: value to set in the ctime field + * + * Set the ctime in @inode to @ts + */ +static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->i_ctime = ts; + return ts; +} + +/** + * inode_set_ctime - set the ctime in the inode + * @inode: inode in which to set the ctime + * @sec: tv_sec value to set + * @nsec: tv_nsec value to set + * + * Set the ctime in @inode to { @sec, @nsec } + */ +static inline struct timespec64 inode_set_ctime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, +.tv_nsec = nsec }; + + return inode_set_ctime_to_ts(inode, ts); +} /* * Snapshotting support. -- 2.41.0
[PATCH v2 00/89] fs: new accessors for inode->i_ctime
v2: - prepend patches to add missing ctime updates - add simple_rename_timestamp helper function - rename ctime accessor functions as inode_get_ctime/inode_set_ctime_* - drop individual inode_ctime_set_{sec,nsec} helpers I've been working on a patchset to change how the inode->i_ctime is accessed in order to give us conditional, high-res timestamps for the ctime and mtime. struct timespec64 has unused bits in it that we can use to implement this. In order to do that however, we need to wrap all accesses of inode->i_ctime to ensure that bits used as flags are appropriately handled. The patchset starts with reposts of some missing ctime updates that I spotted in the tree. It then adds a new helper function for updating the timestamp after a successful rename, and new ctime accessor infrastructure. The bulk of the patchset is individual conversions of different subsysteme to use the new infrastructure. Finally, the patchset renames the i_ctime field to __i_ctime to help ensure that I didn't miss anything. This should apply cleanly to linux-next as of this morning. Most of this conversion was done via 5 different coccinelle scripts, run in succession, with a large swath of by-hand conversions to clean up the remainder. The coccinelle scripts that were used are below: :: cocci/ctime1.cocci :: // convert as much to use inode_set_ctime_current as possible @@ identifier timei; struct inode *inode; expression E1, E2; @@ ( - inode->i_ctime = E1 = E2 = current_time(timei) + E1 = E2 = inode_set_ctime_current(inode) | - inode->i_ctime = E1 = current_time(timei) + E1 = inode_set_ctime_current(inode) | - E1 = inode->i_ctime = current_time(timei) + E1 = inode_set_ctime_current(inode) | - inode->i_ctime = current_time(timei) + inode_set_ctime_current(inode) ) @@ struct inode *inode; expression E1, E2, E3; @@ ( - E1 = current_time(inode) + E1 = inode_set_ctime_current(inode) | - E1 = current_time(E3) + E1 = inode_set_ctime_current(inode) ) ... ( - inode->i_ctime = E1; | - E2 = inode->i_ctime = E1; + E2 = E1; ) :: cocci/ctime2.cocci :: // get the places that set individual timespec64 fields @@ struct inode *inode; expression val, val2; @@ - inode->i_ctime.tv_sec = val + inode_set_ctime(inode, val, val2) ... - inode->i_ctime.tv_nsec = val2; // get places that just set the tv_sec @@ struct inode *inode; expression sec, E1, E2, E3; @@ ( - E3 = inode->i_ctime.tv_sec = sec + E3 = inode_set_ctime(inode, sec, 0).tv_sec | - inode->i_ctime.tv_sec = sec + inode_set_ctime(inode, sec, 0) ) <... ( - inode->i_ctime.tv_nsec = 0; | - E1 = inode->i_ctime.tv_nsec = 0 + E1 = 0 | - inode->i_ctime.tv_nsec = E1 = 0 + E1 = 0 | - inode->i_ctime.tv_nsec = E1 = E2 = 0 + E1 = E2 = 0 ) ...> :: cocci/ctime3.cocci :: // convert places that set i_ctime to a timespec64 directly @@ struct inode *inode; expression ts, E1, E2; @@ ( - inode->i_ctime = E1 = E2 = ts + E1 = E2 = inode_set_ctime_to_ts(inode, ts) | - inode->i_ctime = E1 = ts + E1 = inode_set_ctime_to_ts(inode, ts) | - inode->i_ctime = ts + inode_set_ctime_to_ts(inode, ts) ) :: cocci/ctime4.cocci :: // catch places that set the i_ctime in an inode embedded in another structure @@ expression E1, E2, E3; @@ ( - E3.i_ctime = E1 = E2 = current_time() + E1 = E2 = inode_set_ctime_current() | - E3.i_ctime = E1 = current_time() + E1 = inode_set_ctime_current() | - E1 = E3.i_ctime = current_time() + E1 = inode_set_ctime_current() | - E3.i_ctime = current_time() + inode_set_ctime_current() ) :: cocci/ctime5.cocci :: // convert the remaining i_ctime accesses @@ struct inode *inode; @@ - inode->i_ctime + inode_get_ctime(inode) Jeff Layton (92): ibmvmc: update ctime in conjunction with mtime on write bfs: update ctime in addition to mtime when adding entries efivarfs: update ctime when mtime changes on a write exfat: ensure that ctime is updated whenever the mtime is apparmor: update ctime whenever the mtime changes on an inode cifs: update the ctime on a partial page write fs: add ctime accessors infrastructure fs: new helper: simple_rename_timestamp btrfs: convert to simple_rename_timestamp ubifs: convert to simple_rename_timestamp shmem: convert to simple_rename_timestamp exfat: convert to simple_rename_timestamp ntfs3: convert to simple_rename_timestamp reiserfs: convert to simple_rename_timestamp spufs: convert to ctime accessor functions s390: convert to ctime accessor functions binderfs: convert to ctime accessor functions infiniband: convert to ctime accessor functions ibm: convert to ctime accessor functions usb: convert to ctime accessor functions 9p: convert to ctime accessor functions adfs: convert to ctime accessor functions affs: convert to ctime accessor functions afs: convert to ctime accessor functions fs: convert to ctime accessor functions au
[PATCH v2 15/92] spufs: convert to ctime accessor functions
In later patches, we're going to change how the inode's ctime field is used. Switch to using accessor functions instead of raw accesses of inode->i_ctime. Acked-by: Jeremy Kerr Reviewed-by: Jan Kara Signed-off-by: Jeff Layton --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index ea807aa0c31a..38c5be34c895 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); out: return inode; } -- 2.41.0
Re: [PATCH 01/79] fs: add ctime accessors infrastructure
On Thu, 2023-06-22 at 09:46 +0900, Damien Le Moal wrote: > On 6/21/23 23:45, Jeff Layton wrote: > > struct timespec64 has unused bits in the tv_nsec field that can be used > > for other purposes. In future patches, we're going to change how the > > inode->i_ctime is accessed in certain inodes in order to make use of > > them. In order to do that safely though, we'll need to eradicate raw > > accesses of the inode->i_ctime field from the kernel. > > > > Add new accessor functions for the ctime that we can use to replace them. > > > > Signed-off-by: Jeff Layton > > [...] > > > +/** > > + * inode_ctime_peek - fetch the current ctime from the inode > > + * @inode: inode from which to fetch ctime > > + * > > + * Grab the current ctime from the inode and return it. > > + */ > > +static inline struct timespec64 inode_ctime_peek(const struct inode *inode) > > To be consistent with inode_ctime_set(), why not call this one > inode_ctime_get() In later patches fetching the ctime for presentation may have side effects on certain filesystems. Using "peek" here is a hint that we want to avoid those side effects in these calls. > ? Also, inode_set_ctime() & inode_get_ctime() may be a little more natural. > But > no strong opinion about that though. > I like the consistency of the inode_ctime_* prefix. It makes it simpler to find these calls when grepping, etc. That said, my opinions on naming are pretty loosely-held, so if the consensus is that the names should as you suggest, I'll go along with it. -- Jeff Layton
Re: [PATCH 00/79] fs: new accessors for inode->i_ctime
On Wed, 2023-06-21 at 15:21 -0400, Steven Rostedt wrote: > On Wed, 21 Jun 2023 10:45:05 -0400 > Jeff Layton wrote: > > > Most of this conversion was done via coccinelle, with a few of the more > > non-standard accesses done by hand. There should be no behavioral > > changes with this set. That will come later, as we convert individual > > filesystems to use multigrain timestamps. > > BTW, Linus has suggested to me that whenever a conccinelle script is used, > it should be included in the change log. > Ok, here's what I have. I note again that my usage of coccinelle is pretty primitive, so I ended up doing a fair bit of by-hand fixing after applying these. Given the way that this change is broken up into 77 patches by subsystem, to which changelogs should I add it? I could add it to the "infrastructure" patch, but that's the one where I _didn't_ use it. Maybe to patch #79 (the one that renames i_ctime)? 8<-- @@ expression inode; @@ - inode->i_ctime = current_time(inode) + inode_set_current_ctime(inode) @@ expression inode; @@ - inode->i_ctime = inode->i_mtime = current_time(inode) + inode->i_mtime = inode_set_current_ctime(inode) @@ struct inode *inode; expression value; @@ - inode->i_ctime = value; + inode_set_ctime(inode, value); @@ struct inode *inode; expression val; @@ - inode->i_ctime.tv_sec = val + inode_set_ctime_sec(inode, val) @@ struct inode *inode; expression val; @@ - inode->i_ctime.tv_nsec = val + inode_set_ctime_nsec(inode, val) @@ struct inode *inode; @@ - inode->i_ctime + inode_ctime_peek(inode)
Re: [PATCH 01/79] fs: add ctime accessors infrastructure
On Wed, 2023-06-21 at 14:19 -0400, Tom Talpey wrote: > On 6/21/2023 2:01 PM, Jeff Layton wrote: > > On Wed, 2023-06-21 at 13:29 -0400, Tom Talpey wrote: > > > On 6/21/2023 10:45 AM, Jeff Layton wrote: > > > > struct timespec64 has unused bits in the tv_nsec field that can be used > > > > for other purposes. In future patches, we're going to change how the > > > > inode->i_ctime is accessed in certain inodes in order to make use of > > > > them. In order to do that safely though, we'll need to eradicate raw > > > > accesses of the inode->i_ctime field from the kernel. > > > > > > > > Add new accessor functions for the ctime that we can use to replace > > > > them. > > > > > > > > Signed-off-by: Jeff Layton > > > > --- > > > >fs/inode.c | 16 ++ > > > >include/linux/fs.h | 53 > > > > +- > > > >2 files changed, 68 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/fs/inode.c b/fs/inode.c > > > > index d37fad91c8da..c005e7328fbb 100644 > > > > --- a/fs/inode.c > > > > +++ b/fs/inode.c > > > > @@ -2499,6 +2499,22 @@ struct timespec64 current_time(struct inode > > > > *inode) > > > >} > > > >EXPORT_SYMBOL(current_time); > > > > > > > > +/** > > > > + * inode_ctime_set_current - set the ctime to current_time > > > > + * @inode: inode > > > > + * > > > > + * Set the inode->i_ctime to the current value for the inode. Returns > > > > + * the current value that was assigned to i_ctime. > > > > + */ > > > > +struct timespec64 inode_ctime_set_current(struct inode *inode) > > > > +{ > > > > + struct timespec64 now = current_time(inode); > > > > + > > > > + inode_set_ctime(inode, now); > > > > + return now; > > > > +} > > > > +EXPORT_SYMBOL(inode_ctime_set_current); > > > > + > > > >/** > > > > * in_group_or_capable - check whether caller is CAP_FSETID > > > > privileged > > > > * @idmap: idmap of the mount @inode was found from > > > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > > > index 6867512907d6..9afb30606373 100644 > > > > --- a/include/linux/fs.h > > > > +++ b/include/linux/fs.h > > > > @@ -1474,7 +1474,58 @@ static inline bool fsuidgid_has_mapping(struct > > > > super_block *sb, > > > >kgid_has_mapping(fs_userns, kgid); > > > >} > > > > > > > > -extern struct timespec64 current_time(struct inode *inode); > > > > +struct timespec64 current_time(struct inode *inode); > > > > +struct timespec64 inode_ctime_set_current(struct inode *inode); > > > > + > > > > +/** > > > > + * inode_ctime_peek - fetch the current ctime from the inode > > > > + * @inode: inode from which to fetch ctime > > > > + * > > > > + * Grab the current ctime from the inode and return it. > > > > + */ > > > > +static inline struct timespec64 inode_ctime_peek(const struct inode > > > > *inode) > > > > +{ > > > > + return inode->i_ctime; > > > > +} > > > > + > > > > +/** > > > > + * inode_ctime_set - set the ctime in the inode to the given value > > > > + * @inode: inode in which to set the ctime > > > > + * @ts: timespec value to set the ctime > > > > + * > > > > + * Set the ctime in @inode to @ts. > > > > + */ > > > > +static inline struct timespec64 inode_ctime_set(struct inode *inode, > > > > struct timespec64 ts) > > > > +{ > > > > + inode->i_ctime = ts; > > > > + return ts; > > > > +} > > > > + > > > > +/** > > > > + * inode_ctime_set_sec - set only the tv_sec field in the inode ctime > > > > > > I'm curious about why you choose to split the tv_sec and tv_nsec > > > set_ functions. Do any callers not set them both? Wouldn't a > > > single call enable a more atomic behavior someday? > > > > > > inode_ctime_set_sec_nsec(struct inode *, time64_t, time64_t) > > > > > > (or simply initialize a timespec64 and use inode_ctime_spec() ) &
Re: [PATCH 01/79] fs: add ctime accessors infrastructure
On Wed, 2023-06-21 at 13:29 -0400, Tom Talpey wrote: > On 6/21/2023 10:45 AM, Jeff Layton wrote: > > struct timespec64 has unused bits in the tv_nsec field that can be used > > for other purposes. In future patches, we're going to change how the > > inode->i_ctime is accessed in certain inodes in order to make use of > > them. In order to do that safely though, we'll need to eradicate raw > > accesses of the inode->i_ctime field from the kernel. > > > > Add new accessor functions for the ctime that we can use to replace them. > > > > Signed-off-by: Jeff Layton > > --- > > fs/inode.c | 16 ++ > > include/linux/fs.h | 53 +- > > 2 files changed, 68 insertions(+), 1 deletion(-) > > > > diff --git a/fs/inode.c b/fs/inode.c > > index d37fad91c8da..c005e7328fbb 100644 > > --- a/fs/inode.c > > +++ b/fs/inode.c > > @@ -2499,6 +2499,22 @@ struct timespec64 current_time(struct inode *inode) > > } > > EXPORT_SYMBOL(current_time); > > > > +/** > > + * inode_ctime_set_current - set the ctime to current_time > > + * @inode: inode > > + * > > + * Set the inode->i_ctime to the current value for the inode. Returns > > + * the current value that was assigned to i_ctime. > > + */ > > +struct timespec64 inode_ctime_set_current(struct inode *inode) > > +{ > > + struct timespec64 now = current_time(inode); > > + > > + inode_set_ctime(inode, now); > > + return now; > > +} > > +EXPORT_SYMBOL(inode_ctime_set_current); > > + > > /** > >* in_group_or_capable - check whether caller is CAP_FSETID privileged > >* @idmap:idmap of the mount @inode was found from > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > index 6867512907d6..9afb30606373 100644 > > --- a/include/linux/fs.h > > +++ b/include/linux/fs.h > > @@ -1474,7 +1474,58 @@ static inline bool fsuidgid_has_mapping(struct > > super_block *sb, > >kgid_has_mapping(fs_userns, kgid); > > } > > > > -extern struct timespec64 current_time(struct inode *inode); > > +struct timespec64 current_time(struct inode *inode); > > +struct timespec64 inode_ctime_set_current(struct inode *inode); > > + > > +/** > > + * inode_ctime_peek - fetch the current ctime from the inode > > + * @inode: inode from which to fetch ctime > > + * > > + * Grab the current ctime from the inode and return it. > > + */ > > +static inline struct timespec64 inode_ctime_peek(const struct inode *inode) > > +{ > > + return inode->i_ctime; > > +} > > + > > +/** > > + * inode_ctime_set - set the ctime in the inode to the given value > > + * @inode: inode in which to set the ctime > > + * @ts: timespec value to set the ctime > > + * > > + * Set the ctime in @inode to @ts. > > + */ > > +static inline struct timespec64 inode_ctime_set(struct inode *inode, > > struct timespec64 ts) > > +{ > > + inode->i_ctime = ts; > > + return ts; > > +} > > + > > +/** > > + * inode_ctime_set_sec - set only the tv_sec field in the inode ctime > > I'm curious about why you choose to split the tv_sec and tv_nsec > set_ functions. Do any callers not set them both? Wouldn't a > single call enable a more atomic behavior someday? > >inode_ctime_set_sec_nsec(struct inode *, time64_t, time64_t) > > (or simply initialize a timespec64 and use inode_ctime_spec() ) > Yes, quite a few places set the fields individually. For example, when loading a value from disk that doesn't have sufficient granularity to set the nsecs field to anything but 0. Could I have done it by declaring a local timespec64 variable and just use the inode_ctime_set function in these places? Absolutely. That's a bit more difficult to handle with coccinelle though. If someone wants to suggest a way to do that without having to change all of these call sites manually, then I'm open to redoing the set. That might be better left for a later cleanup though. > > + * @inode: inode in which to set the ctime > > + * @sec: value to set the tv_sec field > > + * > > + * Set the sec field in the ctime. Returns @sec. > > + */ > > +static inline time64_t inode_ctime_set_sec(struct inode *inode, time64_t > > sec) > > +{ > > + inode->i_ctime.tv_sec = sec; > > + return sec; > > +} > > + > > +/** > > + * inode_ctime_set_nsec - set only the tv_nsec field in the inode ctime > > + * @inode: inode in which to set the ctime > > + * @nsec: value to set the tv_nsec field > > + * > > + * Set the nsec field in the ctime. Returns @nsec. > > + */ > > +static inline long inode_ctime_set_nsec(struct inode *inode, long nsec) > > +{ > > + inode->i_ctime.tv_nsec = nsec; > > + return nsec; > > +} > > > > /* > >* Snapshotting support. -- Jeff Layton
[PATCH 79/79] fs: rename i_ctime field to __i_ctime
Now that everything in-tree is converted to use the accessor functions, rename the i_ctime field in the inode to make its accesses more self-documenting. Signed-off-by: Jeff Layton --- include/linux/fs.h | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 9afb30606373..2ca46c532b49 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -642,7 +642,7 @@ struct inode { loff_t i_size; struct timespec64 i_atime; struct timespec64 i_mtime; - struct timespec64 i_ctime; + struct timespec64 __i_ctime; /* use inode_ctime_* accessors! */ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -1485,7 +1485,7 @@ struct timespec64 inode_ctime_set_current(struct inode *inode); */ static inline struct timespec64 inode_ctime_peek(const struct inode *inode) { - return inode->i_ctime; + return inode->__i_ctime; } /** @@ -1497,7 +1497,7 @@ static inline struct timespec64 inode_ctime_peek(const struct inode *inode) */ static inline struct timespec64 inode_ctime_set(struct inode *inode, struct timespec64 ts) { - inode->i_ctime = ts; + inode->__i_ctime = ts; return ts; } @@ -1510,7 +1510,7 @@ static inline struct timespec64 inode_ctime_set(struct inode *inode, struct time */ static inline time64_t inode_ctime_set_sec(struct inode *inode, time64_t sec) { - inode->i_ctime.tv_sec = sec; + inode->__i_ctime.tv_sec = sec; return sec; } @@ -1523,7 +1523,7 @@ static inline time64_t inode_ctime_set_sec(struct inode *inode, time64_t sec) */ static inline long inode_ctime_set_nsec(struct inode *inode, long nsec) { - inode->i_ctime.tv_nsec = nsec; + inode->__i_ctime.tv_nsec = nsec; return nsec; } -- 2.41.0
[PATCH 01/79] fs: add ctime accessors infrastructure
struct timespec64 has unused bits in the tv_nsec field that can be used for other purposes. In future patches, we're going to change how the inode->i_ctime is accessed in certain inodes in order to make use of them. In order to do that safely though, we'll need to eradicate raw accesses of the inode->i_ctime field from the kernel. Add new accessor functions for the ctime that we can use to replace them. Signed-off-by: Jeff Layton --- fs/inode.c | 16 ++ include/linux/fs.h | 53 +- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index d37fad91c8da..c005e7328fbb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2499,6 +2499,22 @@ struct timespec64 current_time(struct inode *inode) } EXPORT_SYMBOL(current_time); +/** + * inode_ctime_set_current - set the ctime to current_time + * @inode: inode + * + * Set the inode->i_ctime to the current value for the inode. Returns + * the current value that was assigned to i_ctime. + */ +struct timespec64 inode_ctime_set_current(struct inode *inode) +{ + struct timespec64 now = current_time(inode); + + inode_set_ctime(inode, now); + return now; +} +EXPORT_SYMBOL(inode_ctime_set_current); + /** * in_group_or_capable - check whether caller is CAP_FSETID privileged * @idmap: idmap of the mount @inode was found from diff --git a/include/linux/fs.h b/include/linux/fs.h index 6867512907d6..9afb30606373 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1474,7 +1474,58 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } -extern struct timespec64 current_time(struct inode *inode); +struct timespec64 current_time(struct inode *inode); +struct timespec64 inode_ctime_set_current(struct inode *inode); + +/** + * inode_ctime_peek - fetch the current ctime from the inode + * @inode: inode from which to fetch ctime + * + * Grab the current ctime from the inode and return it. + */ +static inline struct timespec64 inode_ctime_peek(const struct inode *inode) +{ + return inode->i_ctime; +} + +/** + * inode_ctime_set - set the ctime in the inode to the given value + * @inode: inode in which to set the ctime + * @ts: timespec value to set the ctime + * + * Set the ctime in @inode to @ts. + */ +static inline struct timespec64 inode_ctime_set(struct inode *inode, struct timespec64 ts) +{ + inode->i_ctime = ts; + return ts; +} + +/** + * inode_ctime_set_sec - set only the tv_sec field in the inode ctime + * @inode: inode in which to set the ctime + * @sec: value to set the tv_sec field + * + * Set the sec field in the ctime. Returns @sec. + */ +static inline time64_t inode_ctime_set_sec(struct inode *inode, time64_t sec) +{ + inode->i_ctime.tv_sec = sec; + return sec; +} + +/** + * inode_ctime_set_nsec - set only the tv_nsec field in the inode ctime + * @inode: inode in which to set the ctime + * @nsec: value to set the tv_nsec field + * + * Set the nsec field in the ctime. Returns @nsec. + */ +static inline long inode_ctime_set_nsec(struct inode *inode, long nsec) +{ + inode->i_ctime.tv_nsec = nsec; + return nsec; +} /* * Snapshotting support. -- 2.41.0
[PATCH 00/79] fs: new accessors for inode->i_ctime
I've been working on a patchset to change how the inode->i_ctime is accessed in order to give us conditional, high-res timestamps for the ctime and mtime. struct timespec64 has unused bits in it that we can use to implement this. In order to do that however, we need to wrap all accesses of inode->i_ctime to ensure that bits used as flags are appropriately handled. This patchset first adds some new inode_ctime_* accessor functions. It then converts all in-tree accesses of inode->i_ctime to use those new functions and then renames the i_ctime field to __i_ctime to help ensure that use of the accessors. Most of this conversion was done via coccinelle, with a few of the more non-standard accesses done by hand. There should be no behavioral changes with this set. That will come later, as we convert individual filesystems to use multigrain timestamps. Some of these patches depend on the set I sent recently to add missing ctime updates in various subsystems: https://lore.kernel.org/linux-fsdevel/20230612104524.17058-1-jlay...@kernel.org/T/#m25399f903cc9526e46b2e0f5a35713c80b52fde9 Since this patchset is so large, I'm only going to send individual conversion patches to the appropriate maintainers. Please send Acked-by's or Reviewed-by's if you can. The intent is to merge these as a set (probably in v6.6). Let me know if that causes conflicts though, and we can work it out. This is based on top of linux-next as of yesterday. Jeff Layton (79): fs: add ctime accessors infrastructure spufs: switch to new ctime accessors s390: switch to new ctime accessors binderfs: switch to new ctime accessors qib_fs: switch to new ctime accessors ibm: switch to new ctime accessors usb: switch to new ctime accessors 9p: switch to new ctime accessors adfs: switch to new ctime accessors affs: switch to new ctime accessors afs: switch to new ctime accessors fs: switch to new ctime accessors autofs: switch to new ctime accessors befs: switch to new ctime accessors bfs: switch to new ctime accessors btrfs: switch to new ctime accessors ceph: switch to new ctime accessors coda: switch to new ctime accessors configfs: switch to new ctime accessors cramfs: switch to new ctime accessors debugfs: switch to new ctime accessors devpts: switch to new ctime accessors ecryptfs: switch to new ctime accessors efivarfs: switch to new ctime accessors efs: switch to new ctime accessors erofs: switch to new ctime accessors exfat: switch to new ctime accessors ext2: switch to new ctime accessors ext4: switch to new ctime accessors f2fs: switch to new ctime accessors fat: switch to new ctime accessors freevxfs: switch to new ctime accessors fuse: switch to new ctime accessors gfs2: switch to new ctime accessors hfs: switch to new ctime accessors hfsplus: switch to new ctime accessors hostfs: switch to new ctime accessors hpfs: switch to new ctime accessors hugetlbfs: switch to new ctime accessors isofs: switch to new ctime accessors jffs2: switch to new ctime accessors jfs: switch to new ctime accessors kernfs: switch to new ctime accessors minix: switch to new ctime accessors nfs: switch to new ctime accessors nfsd: switch to new ctime accessors nilfs2: switch to new ctime accessors ntfs: switch to new ctime accessors ntfs3: switch to new ctime accessors ocfs2: switch to new ctime accessors omfs: switch to new ctime accessors openpromfs: switch to new ctime accessors orangefs: switch to new ctime accessors overlayfs: switch to new ctime accessors proc: switch to new ctime accessors pstore: switch to new ctime accessors qnx4: switch to new ctime accessors qnx6: switch to new ctime accessors ramfs: switch to new ctime accessors reiserfs: switch to new ctime accessors romfs: switch to new ctime accessors smb: switch to new ctime accessors squashfs: switch to new ctime accessors sysv: switch to new ctime accessors tracefs: switch to new ctime accessors ubifs: switch to new ctime accessors udf: switch to new ctime accessors ufs: switch to new ctime accessors vboxsf: switch to new ctime accessors xfs: switch to new ctime accessors zonefs: switch to new ctime accessors mqueue: switch to new ctime accessors bpf: switch to new ctime accessors shmem: switch to new ctime accessors rpc_pipefs: switch to new ctime accessors apparmor: switch to new ctime accessors security: switch to new ctime accessors selinux: switch to new ctime accessors fs: rename i_ctime field to __i_ctime arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/s390/hypfs/inode.c | 4 +- drivers/android/binderfs.c| 8 +-- drivers/infiniband/hw/qib/qib_fs.c| 4 +- drivers/misc/ibmasm/ibmasmfs.c| 2 +- drivers/misc/ibmvmc.c | 2 +- drivers/usb/core/devio.c | 16 +++--- drivers/usb/gadget/function/f_fs.c| 6 +-- dr
[PATCH 02/79] spufs: switch to new ctime accessors
In later patches, we're going to change how the ctime.tv_nsec field is utilized. Switch to using accessor functions instead of raw accesses of inode->i_ctime. Signed-off-by: Jeff Layton --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index ea807aa0c31a..55418395bd9a 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_ctime_set_current(inode); out: return inode; } -- 2.41.0
Re: [PATCH 4/5] uapi: always define F_GETLK64/F_SETLK64/F_SETLKW64 in fcntl.h
On Wed, 2022-01-12 at 09:28 +0100, Arnd Bergmann wrote: > On Wed, Jan 12, 2022 at 8:56 AM Christoph Hellwig wrote: > > > > On Tue, Jan 11, 2022 at 04:33:30PM +0100, Arnd Bergmann wrote: > > > This is a very subtle change to the exported UAPI header contents: > > > On 64-bit architectures, the three unusable numbers are now always > > > shown, rather than depending on a user-controlled symbol. > > > > Well, the change is bigger and less subtle. Before this change the > > constants were never visible to userspace at all (except on mips), > > because the #ifdef CONFIG_64BIT it never set for userspace builds. > > I suppose you mean /always/ visible here, with that ifndef. > > > > This is probably what we want here for compatibility reasons, but I think > > > it should be explained in the changelog text, and I'd like Jeff or Bruce > > > to comment on it as well: the alternative here would be to make the > > > uapi definition depend on __BITS_PER_LONG==32, which is > > > technically the right thing to do but more a of a change. > > > > I can change this to #if __BITS_PER_LONG==32 || defined(__KERNEL__), > > but it will still be change in what userspace sees. > > Exactly, that is the tradeoff, which is why I'd like the flock maintainers > to say which way they prefer. We can either do it more correctly (hiding > the constants from user space when they are not usable), or with less > change (removing the incorrect #ifdef). Either way sounds reasonable > to me, I mainly care that this is explained in the changelog and that the > maintainers are aware of the two options. > I don't have a strong opinion here. If we were taking symbols away that were previously visible to userland it would be one thing, but since we're just adding symbols that may not have been there before, this seems less likely to break anything. I probably lean toward Christoph's original solution instead of keeping the conditional definitions. It's hard to imagine there are many programs that care whether these other symbols are defined or not. You can add this to the original patch: Acked-by: Jeff Layton
Re: [PATCH 4/7] arch: Remove leftovers from mandatory file locking
On Fri, 2021-11-05 at 16:43 +0100, Alexandre Ghiti wrote: > This config was removed so remove all references to it. > > Fixes: f7e33bdbd6d1 ("fs: remove mandatory file locking support") > Signed-off-by: Alexandre Ghiti > --- > arch/mips/configs/decstation_64_defconfig | 1 - > arch/mips/configs/decstation_defconfig | 1 - > arch/mips/configs/decstation_r4k_defconfig | 1 - > 3 files changed, 3 deletions(-) > > diff --git a/arch/mips/configs/decstation_64_defconfig > b/arch/mips/configs/decstation_64_defconfig > index 85f1955b4b00..e2ed105f8c97 100644 > --- a/arch/mips/configs/decstation_64_defconfig > +++ b/arch/mips/configs/decstation_64_defconfig > @@ -144,7 +144,6 @@ CONFIG_EXT2_FS_SECURITY=y > CONFIG_EXT3_FS=y > CONFIG_EXT3_FS_POSIX_ACL=y > CONFIG_EXT3_FS_SECURITY=y > -# CONFIG_MANDATORY_FILE_LOCKING is not set > CONFIG_ISO9660_FS=y > CONFIG_JOLIET=y > CONFIG_PROC_KCORE=y > diff --git a/arch/mips/configs/decstation_defconfig > b/arch/mips/configs/decstation_defconfig > index 30a6eafdb1d0..7e987d6f5e34 100644 > --- a/arch/mips/configs/decstation_defconfig > +++ b/arch/mips/configs/decstation_defconfig > @@ -140,7 +140,6 @@ CONFIG_EXT2_FS_SECURITY=y > CONFIG_EXT3_FS=y > CONFIG_EXT3_FS_POSIX_ACL=y > CONFIG_EXT3_FS_SECURITY=y > -# CONFIG_MANDATORY_FILE_LOCKING is not set > CONFIG_ISO9660_FS=y > CONFIG_JOLIET=y > CONFIG_PROC_KCORE=y > diff --git a/arch/mips/configs/decstation_r4k_defconfig > b/arch/mips/configs/decstation_r4k_defconfig > index e2b58dbf4aa9..6df5f6f2ac8e 100644 > --- a/arch/mips/configs/decstation_r4k_defconfig > +++ b/arch/mips/configs/decstation_r4k_defconfig > @@ -140,7 +140,6 @@ CONFIG_EXT2_FS_SECURITY=y > CONFIG_EXT3_FS=y > CONFIG_EXT3_FS_POSIX_ACL=y > CONFIG_EXT3_FS_SECURITY=y > -# CONFIG_MANDATORY_FILE_LOCKING is not set > CONFIG_ISO9660_FS=y > CONFIG_JOLIET=y > CONFIG_PROC_KCORE=y Reviewed-by: Jeff Layton
Re: [PATCH v12 11/12] open: openat2(2) syscall
no. Also, once you want to add a new flag, then we get into the mess of how to document whether open/openat also support it. It'd be good to freeze changes on those syscalls and aim to only introduce new functionality in openat2. That would also allow us to drop some flags from openat2 that we really don't need, and maybe expand the flag space to 64 bits initially, to allow for expansion into the future. Thoughts? > + * @flags: O_* flags. > + * @mode: O_CREAT/O_TMPFILE file mode. > + * @upgrade_mask: UPGRADE_* flags (to restrict O_PATH re-opening). > + * @resolve: RESOLVE_* flags. > + */ > +struct open_how { > + __u32 flags; > + union { > + __u16 mode; > + __u16 upgrade_mask; > + }; > + __u16 resolve; > +}; > + > +#define OPEN_HOW_SIZE_VER0 8 /* sizeof first published struct */ > + Hmm, there is no version field. When you want to expand this in the future, what is the plan? Add a new flag to indicate that it's some length? > +/* how->resolve flags for openat2(2). */ > +#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings > + (includes bind-mounts). */ > +#define RESOLVE_NO_MAGICLINKS0x02 /* Block traversal through > procfs-style > + "magic-links". */ > +#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks > + (implies OEXT_NO_MAGICLINKS) */ > +#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like > + "..", symlinks, and absolute > + paths which escape the dirfd. */ > +#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." > + be scoped inside the dirfd > + (similar to chroot(2)). */ > + > +/* how->upgrade flags for openat2(2). */ > +/* First bit is reserved for a future UPGRADE_NOEXEC flag. */ > +#define UPGRADE_NOREAD 0x02 /* Block re-opening with MAY_READ. > */ > +#define UPGRADE_NOWRITE 0x04 /* Block re-opening with > MAY_WRITE. */ > > #endif /* _UAPI_LINUX_FCNTL_H */ -- Jeff Layton
Re: [PATCH RESEND v11 7/8] open: openat2(2) syscall
On Mon, 2019-08-26 at 19:50 +, sba...@catern.com wrote: > Aleksa Sarai writes: > > To this end, we introduce the openat2(2) syscall. It provides all of the > > features of openat(2) through the @how->flags argument, but also > > also provides a new @how->resolve argument which exposes RESOLVE_* flags > > that map to our new LOOKUP_* flags. It also eliminates the long-standing > > ugliness of variadic-open(2) by embedding it in a struct. > > I don't like this usage of a structure in memory to pass arguments that > would fit in registers. This would be quite inconvenient for me as a > userspace developer. > > Others have brought up issues with this: the issue of seccomp, and the > issue of mismatch between the userspace interface and the kernel > interface, are the most important for me. I want to add another, > admittedly somewhat niche, concern. > > This interfaces requires a program to allocate memory (even on the > stack) just to pass arguments to the kernel which could be passed > without allocating that memory. That makes it more difficult and less > efficient to use this syscall in any case where memory is not so easily > allocatable: such as early program startup or assembly, where the stack > may be limited in size or not even available yet, or when injecting a > syscall while ptracing. > > A struct-passing interface was needed for clone, since we ran out of > registers; but we have not run out of registers yet for openat, so it > would be nice to avoid this if we can. We can always expand later... > We can't really expand later like you suggest. Suppose in a couple of years that we need to add some new argument to openat2 that isn't just a new flag. If all these values are passed by individual arguments, you can't add one later without adding yet another syscall. Using a struct for this allows this to be extended later, OTOH. You can extend it, and add a flag that tells the kernel that it can access the new field. No new syscall required. -- Jeff Layton
[PATCH] fs: convert a pile of fsync routines to errseq_t based reporting
From: Jeff Layton <jlay...@redhat.com> This patch converts most of the in-kernel filesystems that do writeback out of the pagecache to report errors using the errseq_t-based infrastructure that was recently added. This allows them to report errors once for each open file description. Most filesystems have a fairly straightforward fsync operation. They call filemap_write_and_wait_range to write back all of the data and wait on it, and then (sometimes) sync out the metadata. For those filesystems this is a straightforward conversion from calling filemap_write_and_wait_range in their fsync operation to calling file_write_and_wait_range. Signed-off-by: Jeff Layton <jlay...@redhat.com> --- arch/powerpc/platforms/cell/spufs/file.c | 2 +- drivers/staging/lustre/lustre/llite/file.c | 2 +- drivers/video/fbdev/core/fb_defio.c| 2 +- fs/9p/vfs_file.c | 4 ++-- fs/affs/file.c | 2 +- fs/afs/write.c | 2 +- fs/cifs/file.c | 4 ++-- fs/exofs/file.c| 2 +- fs/f2fs/file.c | 2 +- fs/hfs/inode.c | 2 +- fs/hfsplus/inode.c | 2 +- fs/hostfs/hostfs_kern.c| 2 +- fs/hpfs/file.c | 2 +- fs/jffs2/file.c| 2 +- fs/jfs/file.c | 2 +- fs/ncpfs/file.c| 2 +- fs/ntfs/dir.c | 2 +- fs/ntfs/file.c | 2 +- fs/ocfs2/file.c| 2 +- fs/reiserfs/dir.c | 2 +- fs/reiserfs/file.c | 2 +- fs/ubifs/file.c| 2 +- 22 files changed, 24 insertions(+), 24 deletions(-) Rolling up all of these conversions into a single patch, as Christoph Hellwig suggested. Most of these are not tested, but the conversion here is fairly straightforward. Any maintainers who object, please let me know and I'll yank that part out of this patch. diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index ae2f740a82f1..5ffcdeb1eb17 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1749,7 +1749,7 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id) static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file_inode(file); - int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + int err = file_write_and_wait_range(file, start, end); if (!err) { inode_lock(inode); err = spufs_mfc_flush(file, NULL); diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index ab1c85c1ed38..f7d07735ac66 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -2364,7 +2364,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) PFID(ll_inode2fid(inode)), inode); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1); - rc = filemap_write_and_wait_range(inode->i_mapping, start, end); + rc = file_write_and_wait_range(file, start, end); inode_lock(inode); /* catch async errors that were recorded back when async writeback diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 37f69c061210..487d5e336e1b 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -69,7 +69,7 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasy { struct fb_info *info = file->private_data; struct inode *inode = file_inode(file); - int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + int err = file_write_and_wait_range(file, start, end); if (err) return err; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 3de3b4a89d89..4802d75b3cf7 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -445,7 +445,7 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, struct p9_wstat wstat; int retval; - retval = filemap_write_and_wait_range(inode->i_mapping, start, end); + retval = file_write_and_wait_range(filp, start, end); if (retval) return retval; @@ -468,7 +468,7 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, struct inode *inode = filp->f_mapping->host; int retval; - retval = filemap_write_and_wait_range(inode->i_mapping, start, end); + retval = file_write_and_wait_range(filp, start, end); if (retval) return retval; d