On Wed, Feb 18, 2026 at 10:01 AM Jan Kara <[email protected]> wrote: > > On Tue 17-02-26 19:22:31, T.J. Mercier wrote: > > Currently some kernfs files (e.g. cgroup.events, memory.events) support > > inotify watches for IN_MODIFY, but unlike with regular filesystems, they > > do not receive IN_DELETE_SELF or IN_IGNORED events when they are > > removed. > > Please see my email: > https://lore.kernel.org/all/lc2jgt3yrvuvtdj2kk7q3rloie2c5mzyhfdy4zvxylx732voet@ol3kl4ackrpb > > I think this is actually a bug in kernfs... > > Honza
Thanks, I'm looking at this now. I've tried calling clear_nlink in kernfs_iop_rmdir, but I've found that when we get back to vfs_rmdir and shrink_dcache_parent is called, d_walk doesn't find any entries, so shrink_kill->__dentry_kill is not called. I'm investigating why that is... > > > > This creates a problem for processes monitoring cgroups. For example, a > > service monitoring memory.events for memory.high breaches needs to know > > when a cgroup is removed to clean up its state. Where it's known that a > > cgroup is removed when all processes die, without IN_DELETE_SELF the > > service must resort to inefficient workarounds such as: > > 1. Periodically scanning procfs to detect process death (wastes CPU and > > is susceptible to PID reuse). > > 2. Placing an additional IN_DELETE watch on the parent directory > > (wastes resources managing double the watches). > > 3. Holding a pidfd for every monitored cgroup (can exhaust file > > descriptors). > > > > This patch enables kernfs to send IN_DELETE_SELF and IN_IGNORED events. > > This allows applications to rely on a single existing watch on the file > > of interest (e.g. memory.events) to receive notifications for both > > modifications and the eventual removal of the file, as well as automatic > > watch descriptor cleanup, simplifying userspace logic and improving > > resource efficiency. > > > > Implementation details: > > The kernfs notification worker is updated to handle file deletion. > > The optimized single call for MODIFY events to both the parent and the > > file is retained, however because CREATE (parent) events remain > > unsupported for kernfs files, support for DELETE (parent) events is not > > added here to retain symmetry. Only support for DELETE_SELF events is > > added. > > > > Signed-off-by: T.J. Mercier <[email protected]> > > Acked-by: Tejun Heo <[email protected]> > > --- > > fs/kernfs/dir.c | 21 +++++++++++++++++ > > fs/kernfs/file.c | 45 ++++++++++++++++++++----------------- > > fs/kernfs/kernfs-internal.h | 3 +++ > > 3 files changed, 48 insertions(+), 21 deletions(-) > > > > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c > > index 29baeeb97871..e5bda829fcb8 100644 > > --- a/fs/kernfs/dir.c > > +++ b/fs/kernfs/dir.c > > @@ -9,6 +9,7 @@ > > > > #include <linux/sched.h> > > #include <linux/fs.h> > > +#include <linux/fsnotify_backend.h> > > #include <linux/namei.h> > > #include <linux/idr.h> > > #include <linux/slab.h> > > @@ -1471,6 +1472,23 @@ void kernfs_show(struct kernfs_node *kn, bool show) > > up_write(&root->kernfs_rwsem); > > } > > > > +static void kernfs_notify_file_deleted(struct kernfs_node *kn) > > +{ > > + static DECLARE_WORK(kernfs_notify_deleted_work, > > + kernfs_notify_workfn); > > + > > + guard(spinlock_irqsave)(&kernfs_notify_lock); > > + /* may overwite already pending FS_MODIFY events */ > > + kn->attr.notify_event = FS_DELETE; > > + > > + if (!kn->attr.notify_next) { > > + kernfs_get(kn); > > + kn->attr.notify_next = kernfs_notify_list; > > + kernfs_notify_list = kn; > > + schedule_work(&kernfs_notify_deleted_work); > > + } > > +} > > + > > static void __kernfs_remove(struct kernfs_node *kn) > > { > > struct kernfs_node *pos, *parent; > > @@ -1520,6 +1538,9 @@ static void __kernfs_remove(struct kernfs_node *kn) > > struct kernfs_iattrs *ps_iattr = > > parent ? parent->iattr : NULL; > > > > + if (kernfs_type(pos) == KERNFS_FILE) > > + kernfs_notify_file_deleted(pos); > > + > > /* update timestamps on the parent */ > > down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); > > > > diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c > > index e978284ff983..4be9bbe29378 100644 > > --- a/fs/kernfs/file.c > > +++ b/fs/kernfs/file.c > > @@ -37,8 +37,8 @@ struct kernfs_open_node { > > */ > > #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) > > > > -static DEFINE_SPINLOCK(kernfs_notify_lock); > > -static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; > > +DEFINE_SPINLOCK(kernfs_notify_lock); > > +struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; > > > > static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node > > *kn) > > { > > @@ -909,7 +909,7 @@ static loff_t kernfs_fop_llseek(struct file *file, > > loff_t offset, int whence) > > return ret; > > } > > > > -static void kernfs_notify_workfn(struct work_struct *work) > > +void kernfs_notify_workfn(struct work_struct *work) > > { > > struct kernfs_node *kn; > > struct kernfs_super_info *info; > > @@ -935,11 +935,7 @@ static void kernfs_notify_workfn(struct work_struct > > *work) > > down_read(&root->kernfs_supers_rwsem); > > down_read(&root->kernfs_rwsem); > > list_for_each_entry(info, &kernfs_root(kn)->supers, node) { > > - struct kernfs_node *parent; > > - struct inode *p_inode = NULL; > > - const char *kn_name; > > struct inode *inode; > > - struct qstr name; > > > > /* > > * We want fsnotify_modify() on @kn but as the > > @@ -951,24 +947,31 @@ static void kernfs_notify_workfn(struct work_struct > > *work) > > if (!inode) > > continue; > > > > - kn_name = kernfs_rcu_name(kn); > > - name = QSTR(kn_name); > > - parent = kernfs_get_parent(kn); > > - if (parent) { > > - p_inode = ilookup(info->sb, kernfs_ino(parent)); > > - if (p_inode) { > > - fsnotify(notify_event | FS_EVENT_ON_CHILD, > > - inode, FSNOTIFY_EVENT_INODE, > > - p_inode, &name, inode, 0); > > - iput(p_inode); > > + if (notify_event == FS_DELETE) { > > + fsnotify_inoderemove(inode); > > + } else { > > + struct kernfs_node *parent = kernfs_get_parent(kn); > > + struct inode *p_inode = NULL; > > + > > + if (parent) { > > + p_inode = ilookup(info->sb, > > kernfs_ino(parent)); > > + if (p_inode) { > > + const char *kn_name = > > kernfs_rcu_name(kn); > > + struct qstr name = QSTR(kn_name); > > + > > + fsnotify(notify_event | > > FS_EVENT_ON_CHILD, > > + inode, FSNOTIFY_EVENT_INODE, > > + p_inode, &name, inode, 0); > > + iput(p_inode); > > + } > > + > > + kernfs_put(parent); > > } > > > > - kernfs_put(parent); > > + if (!p_inode) > > + fsnotify_inode(inode, notify_event); > > } > > > > - if (!p_inode) > > - fsnotify_inode(inode, notify_event); > > - > > iput(inode); > > } > > > > diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h > > index 6061b6f70d2a..cf4b21f4f3b6 100644 > > --- a/fs/kernfs/kernfs-internal.h > > +++ b/fs/kernfs/kernfs-internal.h > > @@ -199,6 +199,8 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node > > *parent, > > * file.c > > */ > > extern const struct file_operations kernfs_file_fops; > > +extern struct kernfs_node *kernfs_notify_list; > > +extern void kernfs_notify_workfn(struct work_struct *work); > > > > bool kernfs_should_drain_open_files(struct kernfs_node *kn); > > void kernfs_drain_open_files(struct kernfs_node *kn); > > @@ -212,4 +214,5 @@ extern const struct inode_operations > > kernfs_symlink_iops; > > * kernfs locks > > */ > > extern struct kernfs_global_locks *kernfs_locks; > > +extern spinlock_t kernfs_notify_lock; > > #endif /* __KERNFS_INTERNAL_H */ > > -- > > 2.53.0.310.g728cabbaf7-goog > > > -- > Jan Kara <[email protected]> > SUSE Labs, CR

