Re: [RESEND PATCH 05/13] vfs: Replace array of file pointers with an IDR
On Tue, Jul 11, 2017 at 06:50:52PM +0530, Sandhya Bankar wrote: > Instead of storing all the file pointers in a single array, use an > IDR. It is RCU-safe, and does not need to be reallocated when the > fd array grows. It also handles allocation of new file descriptors. > > --- > [snip] > @@ -604,22 +576,9 @@ void put_unused_fd(unsigned int fd) > void __fd_install(struct files_struct *files, unsigned int fd, > struct file *file) > { > - struct fdtable *fdt; > - > - might_sleep(); > - rcu_read_lock_sched(); > - > - while (unlikely(files->resize_in_progress)) { > - rcu_read_unlock_sched(); > - wait_event(files->resize_wait, !files->resize_in_progress); > - rcu_read_lock_sched(); > - } > - /* coupled with smp_wmb() in expand_fdtable() */ > - smp_rmb(); > - fdt = rcu_dereference_sched(files->fdt); > - BUG_ON(fdt->fd[fd] != NULL); > - rcu_assign_pointer(fdt->fd[fd], file); > - rcu_read_unlock_sched(); > + rcu_read_lock(); > + BUG_ON(idr_replace(>fd_idr, file, fd)); > + rcu_read_unlock(); > } > > void fd_install(unsigned int fd, struct file *file) > @@ -641,10 +600,9 @@ int __close_fd(struct files_struct *files, unsigned fd) > fdt = files_fdtable(files); > if (fd >= fdt->max_fds) > goto out_unlock; > - file = fdt->fd[fd]; > + file = idr_remove(>fd_idr, fd); > if (!file) > goto out_unlock; > - rcu_assign_pointer(fdt->fd[fd], NULL); > __clear_close_on_exec(fd, fdt); > __put_unused_fd(files, fd); > spin_unlock(>file_lock); I have no opinions about the switch, however these 2 places make me worried. I did not check all the changes so perhaps I missed something. In the current code we are safe when it comes to concurrent install and close, in particular here: CPU0CPU1 alloc_fd __close_fd fd_install __close_fd will either see a NULL pointer and return -EBADF or will see an installed pointer and proceed with the close. Your proposed patch seems to be buggy in this regard. You call idr_remove, which from what I understand will free up the slot no matter what. You only detect an error based on whether there was a non-NULL pointer there or not. If so, fd_install can proceed to play with a deallocated entry. -- Mateusz Guzik
Re: [RESEND PATCH 05/13] vfs: Replace array of file pointers with an IDR
On Tue, Jul 11, 2017 at 06:50:52PM +0530, Sandhya Bankar wrote: > Instead of storing all the file pointers in a single array, use an > IDR. It is RCU-safe, and does not need to be reallocated when the > fd array grows. It also handles allocation of new file descriptors. > > --- > [snip] > @@ -604,22 +576,9 @@ void put_unused_fd(unsigned int fd) > void __fd_install(struct files_struct *files, unsigned int fd, > struct file *file) > { > - struct fdtable *fdt; > - > - might_sleep(); > - rcu_read_lock_sched(); > - > - while (unlikely(files->resize_in_progress)) { > - rcu_read_unlock_sched(); > - wait_event(files->resize_wait, !files->resize_in_progress); > - rcu_read_lock_sched(); > - } > - /* coupled with smp_wmb() in expand_fdtable() */ > - smp_rmb(); > - fdt = rcu_dereference_sched(files->fdt); > - BUG_ON(fdt->fd[fd] != NULL); > - rcu_assign_pointer(fdt->fd[fd], file); > - rcu_read_unlock_sched(); > + rcu_read_lock(); > + BUG_ON(idr_replace(>fd_idr, file, fd)); > + rcu_read_unlock(); > } > > void fd_install(unsigned int fd, struct file *file) > @@ -641,10 +600,9 @@ int __close_fd(struct files_struct *files, unsigned fd) > fdt = files_fdtable(files); > if (fd >= fdt->max_fds) > goto out_unlock; > - file = fdt->fd[fd]; > + file = idr_remove(>fd_idr, fd); > if (!file) > goto out_unlock; > - rcu_assign_pointer(fdt->fd[fd], NULL); > __clear_close_on_exec(fd, fdt); > __put_unused_fd(files, fd); > spin_unlock(>file_lock); I have no opinions about the switch, however these 2 places make me worried. I did not check all the changes so perhaps I missed something. In the current code we are safe when it comes to concurrent install and close, in particular here: CPU0CPU1 alloc_fd __close_fd fd_install __close_fd will either see a NULL pointer and return -EBADF or will see an installed pointer and proceed with the close. Your proposed patch seems to be buggy in this regard. You call idr_remove, which from what I understand will free up the slot no matter what. You only detect an error based on whether there was a non-NULL pointer there or not. If so, fd_install can proceed to play with a deallocated entry. -- Mateusz Guzik
[RESEND PATCH 05/13] vfs: Replace array of file pointers with an IDR
Instead of storing all the file pointers in a single array, use an IDR. It is RCU-safe, and does not need to be reallocated when the fd array grows. It also handles allocation of new file descriptors. Signed-off-by: Sandhya Bankar[mawil...@microsoft.com: fixes] Signed-off-by: Matthew Wilcox --- fs/file.c | 180 include/linux/fdtable.h | 10 +-- 2 files changed, 79 insertions(+), 111 deletions(-) diff --git a/fs/file.c b/fs/file.c index ad6f094..1c000d8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -47,7 +47,6 @@ static void *alloc_fdmem(size_t size) static void __free_fdtable(struct fdtable *fdt) { - kvfree(fdt->fd); kvfree(fdt->open_fds); kfree(fdt); } @@ -89,15 +88,7 @@ static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, */ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) { - unsigned int cpy, set; - BUG_ON(nfdt->max_fds < ofdt->max_fds); - - cpy = ofdt->max_fds * sizeof(struct file *); - set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); - memcpy(nfdt->fd, ofdt->fd, cpy); - memset((char *)nfdt->fd + cpy, 0, set); - copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); } @@ -131,15 +122,11 @@ static struct fdtable * alloc_fdtable(unsigned int nr) if (!fdt) goto out; fdt->max_fds = nr; - data = alloc_fdmem(nr * sizeof(struct file *)); - if (!data) - goto out_fdt; - fdt->fd = data; data = alloc_fdmem(max_t(size_t, 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES)); if (!data) - goto out_arr; + goto out_fdt; fdt->open_fds = data; data += nr / BITS_PER_BYTE; fdt->close_on_exec = data; @@ -148,8 +135,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr) return fdt; -out_arr: - kvfree(fdt->fd); out_fdt: kfree(fdt); out: @@ -170,6 +155,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) struct fdtable *new_fdt, *cur_fdt; spin_unlock(>file_lock); + idr_preload_end(); new_fdt = alloc_fdtable(nr); /* make sure all __fd_install() have seen resize_in_progress @@ -178,6 +164,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) if (atomic_read(>count) > 1) synchronize_sched(); + idr_preload(GFP_KERNEL); spin_lock(>file_lock); if (!new_fdt) return -ENOMEM; @@ -228,8 +215,10 @@ static int expand_files(struct files_struct *files, unsigned int nr) if (unlikely(files->resize_in_progress)) { spin_unlock(>file_lock); + idr_preload_end(); expanded = 1; wait_event(files->resize_wait, !files->resize_in_progress); + idr_preload(GFP_KERNEL); spin_lock(>file_lock); goto repeat; } @@ -290,8 +279,8 @@ static unsigned int count_open_files(struct fdtable *fdt) struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) { struct files_struct *newf; - struct file **old_fds, **new_fds; unsigned int open_files, i; + struct file *f; struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; @@ -302,6 +291,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) atomic_set(>count, 1); spin_lock_init(>file_lock); + idr_init(>fd_idr); newf->resize_in_progress = false; init_waitqueue_head(>resize_wait); newf->next_fd = 0; @@ -310,8 +300,9 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) new_fdt->close_on_exec = newf->close_on_exec_init; new_fdt->open_fds = newf->open_fds_init; new_fdt->full_fds_bits = newf->full_fds_bits_init; - new_fdt->fd = >fd_array[0]; +restart: + idr_copy_preload(>fd_idr, GFP_KERNEL); spin_lock(>file_lock); old_fdt = files_fdtable(oldf); open_files = count_open_files(old_fdt); @@ -321,6 +312,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) */ while (unlikely(open_files > new_fdt->max_fds)) { spin_unlock(>file_lock); + idr_preload_end(); if (new_fdt != >fdtab) __free_fdtable(new_fdt); @@ -343,41 +335,50 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) * who knows it may have a new bigger fd table. We need * the latest pointer. */ + idr_copy_preload(>fd_idr, GFP_KERNEL); spin_lock(>file_lock); old_fdt = files_fdtable(oldf); open_files =
[RESEND PATCH 05/13] vfs: Replace array of file pointers with an IDR
Instead of storing all the file pointers in a single array, use an IDR. It is RCU-safe, and does not need to be reallocated when the fd array grows. It also handles allocation of new file descriptors. Signed-off-by: Sandhya Bankar [mawil...@microsoft.com: fixes] Signed-off-by: Matthew Wilcox --- fs/file.c | 180 include/linux/fdtable.h | 10 +-- 2 files changed, 79 insertions(+), 111 deletions(-) diff --git a/fs/file.c b/fs/file.c index ad6f094..1c000d8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -47,7 +47,6 @@ static void *alloc_fdmem(size_t size) static void __free_fdtable(struct fdtable *fdt) { - kvfree(fdt->fd); kvfree(fdt->open_fds); kfree(fdt); } @@ -89,15 +88,7 @@ static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, */ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) { - unsigned int cpy, set; - BUG_ON(nfdt->max_fds < ofdt->max_fds); - - cpy = ofdt->max_fds * sizeof(struct file *); - set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); - memcpy(nfdt->fd, ofdt->fd, cpy); - memset((char *)nfdt->fd + cpy, 0, set); - copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); } @@ -131,15 +122,11 @@ static struct fdtable * alloc_fdtable(unsigned int nr) if (!fdt) goto out; fdt->max_fds = nr; - data = alloc_fdmem(nr * sizeof(struct file *)); - if (!data) - goto out_fdt; - fdt->fd = data; data = alloc_fdmem(max_t(size_t, 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES)); if (!data) - goto out_arr; + goto out_fdt; fdt->open_fds = data; data += nr / BITS_PER_BYTE; fdt->close_on_exec = data; @@ -148,8 +135,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr) return fdt; -out_arr: - kvfree(fdt->fd); out_fdt: kfree(fdt); out: @@ -170,6 +155,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) struct fdtable *new_fdt, *cur_fdt; spin_unlock(>file_lock); + idr_preload_end(); new_fdt = alloc_fdtable(nr); /* make sure all __fd_install() have seen resize_in_progress @@ -178,6 +164,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) if (atomic_read(>count) > 1) synchronize_sched(); + idr_preload(GFP_KERNEL); spin_lock(>file_lock); if (!new_fdt) return -ENOMEM; @@ -228,8 +215,10 @@ static int expand_files(struct files_struct *files, unsigned int nr) if (unlikely(files->resize_in_progress)) { spin_unlock(>file_lock); + idr_preload_end(); expanded = 1; wait_event(files->resize_wait, !files->resize_in_progress); + idr_preload(GFP_KERNEL); spin_lock(>file_lock); goto repeat; } @@ -290,8 +279,8 @@ static unsigned int count_open_files(struct fdtable *fdt) struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) { struct files_struct *newf; - struct file **old_fds, **new_fds; unsigned int open_files, i; + struct file *f; struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; @@ -302,6 +291,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) atomic_set(>count, 1); spin_lock_init(>file_lock); + idr_init(>fd_idr); newf->resize_in_progress = false; init_waitqueue_head(>resize_wait); newf->next_fd = 0; @@ -310,8 +300,9 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) new_fdt->close_on_exec = newf->close_on_exec_init; new_fdt->open_fds = newf->open_fds_init; new_fdt->full_fds_bits = newf->full_fds_bits_init; - new_fdt->fd = >fd_array[0]; +restart: + idr_copy_preload(>fd_idr, GFP_KERNEL); spin_lock(>file_lock); old_fdt = files_fdtable(oldf); open_files = count_open_files(old_fdt); @@ -321,6 +312,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) */ while (unlikely(open_files > new_fdt->max_fds)) { spin_unlock(>file_lock); + idr_preload_end(); if (new_fdt != >fdtab) __free_fdtable(new_fdt); @@ -343,41 +335,50 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) * who knows it may have a new bigger fd table. We need * the latest pointer. */ + idr_copy_preload(>fd_idr, GFP_KERNEL); spin_lock(>file_lock); old_fdt = files_fdtable(oldf); open_files = count_open_files(old_fdt); } + if