Re: [patch] inotify for 2.6.11-mm1

Christoph Hellwig Sun, 06 Mar 2005 17:24:48 -0800

> -     if ((ret + (type == READ)) > 0)
> -             dnotify_parent(file->f_dentry,
> -                             (type == READ) ? DN_ACCESS : DN_MODIFY);
> +     if ((ret + (type == READ)) > 0) {
> +             struct dentry *dentry = file->f_dentry;
> +             if (type == READ)
> +                     fsnotify_access(dentry, dentry->d_inode,
> +                                     dentry->d_name.name);
> +             else
> +                     fsnotify_modify(dentry, dentry->d_inode,
> +                                     dentry->d_name.name);
> +     }


Both fsnotify_access and fsnotify_modify always get the second argument
as ->d_inode and third argument as ->d_name.name of the first, please fix
this blatantly stupid API.

> +     fsnotify_close(dentry, inode, file->f_mode, dentry->d_name.name);

dito for thw second and last argument here..  In fact fsnotify_close
should just get a struct file *

>       might_sleep();

this one seems totally unrelated.

> +/*
> + * struct inotify_device - represents an open instance of an inotify device
> + *
> + * This structure is protected by 'lock'.
> + */
> +struct inotify_device {
> +     wait_queue_head_t       wq;             /* wait queue for i/o */
> +     struct idr              idr;            /* idr mapping wd -> watch */
> +     struct list_head        events;         /* list of queued events */
> +     struct list_head        watches;        /* list of watches */
> +     spinlock_t              lock;           /* protects this bad boy */
> +     atomic_t                count;          /* reference count */
> +     struct user_struct      *user;          /* user who opened this dev */
> +     unsigned int            queue_size;     /* size of the queue (bytes) */
> +     unsigned int            event_count;    /* number of pending events */
> +     unsigned int            max_events;     /* maximum number of events */
> +};

> +/*
> + * kernel_event - create a new kernel event with the given parameters
> + */
> +static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 
> cookie,
> +                                               const char *name)
> +{
> +     struct inotify_kernel_event *kevent;
> +
> +     /* XXX: optimally, we should use GFP_KERNEL */
> +     kevent = kmem_cache_alloc(event_cachep, GFP_ATOMIC);

indeed.  having a new atomic memory allocation in every filesystem operation
sounds like a really bad idea.

> +             /* XXX: optimally, we should use GFP_KERNEL */
> +             kevent->name = kmalloc(len, GFP_ATOMIC);

and two is even worse.  A notification that fails under slight load
isn't that helpfull.

> +static inline int inotify_dev_has_events(struct inotify_device *dev)
> +{
> +     return !list_empty(&dev->events);
> +}

just remove this bit of obsfucation.

> +     if (kevent->name)
> +             kfree(kevent->name);

kfree(NULL) is just fine.

> +
> +repeat:
> +     if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL)))
> +             return -ENOSPC;
> +     spin_lock(&dev->lock);
> +     ret = idr_get_new(&dev->idr, watch, &watch->wd);
> +     spin_unlock(&dev->lock);
> +     if (ret == -EAGAIN) /* more memory is required, try again */
> +             goto repeat;

what's the problem with a proper do { } while loop here?

> +     watch->inode = inode;   
> +     spin_lock(&inode_lock);
> +     __iget(inode);
> +     spin_unlock(&inode_lock);

do you ever calls this when igrab() would fail?  Don't think so,
and in that case I'd prefer you to use igrab instead of using these
lowlevel functions.

> +void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
> +                                    u32 cookie, const char *name)
> +{
> +     struct dentry *parent;
> +     struct inode *inode;
> +
> +     spin_lock(&dentry->d_lock);
> +     parent = dentry->d_parent;
> +     inode = parent->d_inode;
> +     if (!list_empty(&inode->inotify_watches)) {

So what's protecting ->inotify_watches?  There can be multiple dentries
so d_lock is not sufficient.

> +
> +/**
> + * inotify_super_block_umount - process watches on an unmounted fs
> + * @sb: the super_block of the filesystem in question
> + */
> +void inotify_super_block_umount(struct super_block *sb)
> +{
> +     struct inode *inode;
> +
> +     /* Walk the list of inodes and find those on this superblock */
> +     spin_lock(&inode_lock);
> +     list_for_each_entry(inode, &inode_in_use, i_list) {
> +             struct inotify_watch *watch, *next;
> +             struct list_head *watches;
> +
> +             if (inode->i_sb != sb)
> +                     continue;
> +
> +             /* for each watch, send IN_UNMOUNT and then remove it */
> +             spin_lock(&inode->inotify_lock);
> +             watches = &inode->inotify_watches;
> +             list_for_each_entry_safe(watch, next, watches, i_list) {
> +                     struct inotify_device *dev = watch->dev;
> +                     spin_lock(&dev->lock);
> +                     inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
> +                     remove_watch(watch, dev);
> +                     spin_unlock(&dev->lock);
> +             }
> +             spin_unlock(&inode->inotify_lock);
> +     }
> +     spin_unlock(&inode_lock);
> +}
> +EXPORT_SYMBOL_GPL(inotify_super_block_umount);

-mm has a list of inodes per superblock, which Andrew said he'd send
along to lines, you should probably use that one.  In fact I wonder
whether you'd be better of just calling a per-inode removal hook from
invalidate_inodes.

> +static unsigned int inotify_poll(struct file *file, poll_table *wait)
> +{
> +        struct inotify_device *dev;
> +     int ret = 0;
> +
> +        dev = file->private_data;
> +     get_inotify_dev(dev);

tabs vs spaces messups.

> +static int inotify_add_watch(struct inotify_device *dev,
> +                          struct inotify_watch_request *request)
> +{
> +     struct inode *inode;
> +     struct inotify_watch *watch, *old;
> +     struct nameidata nd;
> +     int ret;
> +
> +     ret = __user_walk(request->name, LOOKUP_FOLLOW, &nd);
> +     if (unlikely(ret))
> +             return ret;
> +
> +     /* you can only watch an inode if you have read permissions on it */
> +     ret = permission(nd.dentry->d_inode, MAY_READ, NULL);

permission takes a nameidata as last parameter, so please do so aswell.

> +
> +     switch (cmd) {
> +     case INOTIFY_WATCH:
> +             if (unlikely(copy_from_user(&request, p, sizeof (request)))) {
> +                     ret = -EFAULT;
> +                     break;
> +             }
> +             ret = inotify_add_watch(dev, &request);
> +             break;
> +     case INOTIFY_IGNORE:
> +             if (unlikely(copy_from_user(&wd, p, sizeof (wd)))) {
> +                     ret = -EFAULT;
> +                     break;
> +             }
> +             ret = inotify_ignore(dev, wd);
> +             break;
> +     case FIONREAD:
> +             ret = put_user(dev->queue_size, (int __user *) p);
> +             break;
> +     }
> +
> +     put_inotify_dev(dev);
> +
> +     return ret;

Both INOTIFY_WATCH and INOTIFY_IGNORE could easily be implemented as
writes to /dev/inotify, echo "wath /path/to/foo 42", echo "ignore 23".

> +static struct miscdevice inotify_device = {
> +     .minor  = MISC_DYNAMIC_MINOR,
> +     .name   = "inotify",
> +     .fops   = &inotify_fops,
> +};

Should probably use the /dev/mem major.

> +     default y

please don't default a new and experimental facility to y.  In fact
default is totally overused.

> +     if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))
> +             fsnotify_unlink(dentry->d_inode, dir, dentry);

first argument can be deduced from the last.

> +                     fsnotify_open(dentry, dentry->d_inode,
> +                                   dentry->d_name.name);

second and third argument can be taken from the first

> +#ifdef CONFIG_INOTIFY
> +     struct list_head        inotify_watches;  /* watches on this inode */
> +     spinlock_t              inotify_lock;  /* protects the watches list */
> +#endif

do you really need a spinlock of your own in every inode?  Inode memory
usage is a quite big problem.

> +static inline void fsnotify_rmdir(struct dentry *dentry, struct inode *inode,
> +                               struct inode *dir)
> +{
> +     inode_dir_notify(dir, DN_DELETE);
> +     inotify_inode_queue_event(dir, IN_DELETE_SUBDIR,0,dentry->d_name.name);
> +     inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL);
> +
> +     inotify_inode_is_dead(inode);
> +     d_delete(dentry);

why is the d_delete in here?

> +/*
> + * fsnotify_change - notify_change event.  file was modified and/or metadata
> + * was changed.
> + */
> +static inline void fsnotify_change(struct dentry *dentry, unsigned int 
> ia_valid)

this one is far too large to be inlined.

> +/*
> + * struct inotify_event - structure read from the inotify device for each 
> event
> + *
> + * When you are watching a directory, you will receive the filename for 
> events
> + * such as IN_CREATE, IN_DELETE, IN_OPEN, IN_CLOSE, ..., relative to the wd.
> + */
> +struct inotify_event {
> +     __s32           wd;             /* watch descriptor */
> +     __u32           mask;           /* watch mask */
> +     __u32           cookie;         /* cookie to synchronize two events */
> +     size_t          len;            /* length (including nulls) of name */

please don't use size_t in a user ABI.  Just use __u32

> diff -urN linux-2.6.11-mm1/include/linux/sched.h linux/include/linux/sched.h
> --- linux-2.6.11-mm1/include/linux/sched.h    2005-03-04 14:06:21.766292400 
> -0500
> +++ linux/include/linux/sched.h       2005-03-04 13:27:05.586486176 -0500
> @@ -411,6 +411,8 @@
>       atomic_t processes;     /* How many processes does this user have? */
>       atomic_t files;         /* How many open files does this user have? */
>       atomic_t sigpending;    /* How many pending signals does this user 
> have? */
> +     atomic_t inotify_watches;       /* How many inotify watches does this 
> user have? */
> +     atomic_t inotify_devs;  /* How many inotify devs does this user have 
> opened? */

Why are these unconditional, unlike the inode ones?

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch] inotify for 2.6.11-mm1

Reply via email to