Re: [RESEND PATCH 05/13] vfs: Replace array of file pointers with an IDR

2017-10-04 Thread Mateusz Guzik
On Tue, Jul 11, 2017 at 06:50:52PM +0530, Sandhya Bankar wrote:
> Instead of storing all the file pointers in a single array, use an
> IDR.  It is RCU-safe, and does not need to be reallocated when the
> fd array grows.  It also handles allocation of new file descriptors.
> 
> ---
>  
[snip]
> @@ -604,22 +576,9 @@ void put_unused_fd(unsigned int fd)
>  void __fd_install(struct files_struct *files, unsigned int fd,
>   struct file *file)
>  {
> - struct fdtable *fdt;
> -
> - might_sleep();
> - rcu_read_lock_sched();
> -
> - while (unlikely(files->resize_in_progress)) {
> - rcu_read_unlock_sched();
> - wait_event(files->resize_wait, !files->resize_in_progress);
> - rcu_read_lock_sched();
> - }
> - /* coupled with smp_wmb() in expand_fdtable() */
> - smp_rmb();
> - fdt = rcu_dereference_sched(files->fdt);
> - BUG_ON(fdt->fd[fd] != NULL);
> - rcu_assign_pointer(fdt->fd[fd], file);
> - rcu_read_unlock_sched();
> + rcu_read_lock();
> + BUG_ON(idr_replace(>fd_idr, file, fd));
> + rcu_read_unlock();
>  }
>  
>  void fd_install(unsigned int fd, struct file *file)
> @@ -641,10 +600,9 @@ int __close_fd(struct files_struct *files, unsigned fd)
>   fdt = files_fdtable(files);
>   if (fd >= fdt->max_fds)
>   goto out_unlock;
> - file = fdt->fd[fd];
> + file = idr_remove(>fd_idr, fd);
>   if (!file)
>   goto out_unlock;
> - rcu_assign_pointer(fdt->fd[fd], NULL);
>   __clear_close_on_exec(fd, fdt);
>   __put_unused_fd(files, fd);
>   spin_unlock(>file_lock);

I have no opinions about the switch, however these 2 places make me
worried. I did not check all the changes so perhaps I missed something.

In the current code we are safe when it comes to concurrent install and
close, in particular here:

CPU0CPU1
alloc_fd
__close_fd
fd_install

__close_fd will either see a NULL pointer and return -EBADF or will see
an installed pointer and proceed with the close.

Your proposed patch seems to be buggy in this regard.

You call idr_remove, which from what I understand will free up the slot
no matter what. You only detect an error based on whether there was a
non-NULL pointer there or not. If so, fd_install can proceed to play
with a deallocated entry.

-- 
Mateusz Guzik


Re: [RESEND PATCH 05/13] vfs: Replace array of file pointers with an IDR

2017-10-04 Thread Mateusz Guzik
On Tue, Jul 11, 2017 at 06:50:52PM +0530, Sandhya Bankar wrote:
> Instead of storing all the file pointers in a single array, use an
> IDR.  It is RCU-safe, and does not need to be reallocated when the
> fd array grows.  It also handles allocation of new file descriptors.
> 
> ---
>  
[snip]
> @@ -604,22 +576,9 @@ void put_unused_fd(unsigned int fd)
>  void __fd_install(struct files_struct *files, unsigned int fd,
>   struct file *file)
>  {
> - struct fdtable *fdt;
> -
> - might_sleep();
> - rcu_read_lock_sched();
> -
> - while (unlikely(files->resize_in_progress)) {
> - rcu_read_unlock_sched();
> - wait_event(files->resize_wait, !files->resize_in_progress);
> - rcu_read_lock_sched();
> - }
> - /* coupled with smp_wmb() in expand_fdtable() */
> - smp_rmb();
> - fdt = rcu_dereference_sched(files->fdt);
> - BUG_ON(fdt->fd[fd] != NULL);
> - rcu_assign_pointer(fdt->fd[fd], file);
> - rcu_read_unlock_sched();
> + rcu_read_lock();
> + BUG_ON(idr_replace(>fd_idr, file, fd));
> + rcu_read_unlock();
>  }
>  
>  void fd_install(unsigned int fd, struct file *file)
> @@ -641,10 +600,9 @@ int __close_fd(struct files_struct *files, unsigned fd)
>   fdt = files_fdtable(files);
>   if (fd >= fdt->max_fds)
>   goto out_unlock;
> - file = fdt->fd[fd];
> + file = idr_remove(>fd_idr, fd);
>   if (!file)
>   goto out_unlock;
> - rcu_assign_pointer(fdt->fd[fd], NULL);
>   __clear_close_on_exec(fd, fdt);
>   __put_unused_fd(files, fd);
>   spin_unlock(>file_lock);

I have no opinions about the switch, however these 2 places make me
worried. I did not check all the changes so perhaps I missed something.

In the current code we are safe when it comes to concurrent install and
close, in particular here:

CPU0CPU1
alloc_fd
__close_fd
fd_install

__close_fd will either see a NULL pointer and return -EBADF or will see
an installed pointer and proceed with the close.

Your proposed patch seems to be buggy in this regard.

You call idr_remove, which from what I understand will free up the slot
no matter what. You only detect an error based on whether there was a
non-NULL pointer there or not. If so, fd_install can proceed to play
with a deallocated entry.

-- 
Mateusz Guzik


[RESEND PATCH 05/13] vfs: Replace array of file pointers with an IDR

2017-07-11 Thread Sandhya Bankar
Instead of storing all the file pointers in a single array, use an
IDR.  It is RCU-safe, and does not need to be reallocated when the
fd array grows.  It also handles allocation of new file descriptors.

Signed-off-by: Sandhya Bankar 
[mawil...@microsoft.com: fixes]
Signed-off-by: Matthew Wilcox 
---
 fs/file.c   | 180 
 include/linux/fdtable.h |  10 +--
 2 files changed, 79 insertions(+), 111 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index ad6f094..1c000d8 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -47,7 +47,6 @@ static void *alloc_fdmem(size_t size)
 
 static void __free_fdtable(struct fdtable *fdt)
 {
-   kvfree(fdt->fd);
kvfree(fdt->open_fds);
kfree(fdt);
 }
@@ -89,15 +88,7 @@ static void copy_fd_bitmaps(struct fdtable *nfdt, struct 
fdtable *ofdt,
  */
 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
 {
-   unsigned int cpy, set;
-
BUG_ON(nfdt->max_fds < ofdt->max_fds);
-
-   cpy = ofdt->max_fds * sizeof(struct file *);
-   set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
-   memcpy(nfdt->fd, ofdt->fd, cpy);
-   memset((char *)nfdt->fd + cpy, 0, set);
-
copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
 }
 
@@ -131,15 +122,11 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
if (!fdt)
goto out;
fdt->max_fds = nr;
-   data = alloc_fdmem(nr * sizeof(struct file *));
-   if (!data)
-   goto out_fdt;
-   fdt->fd = data;
 
data = alloc_fdmem(max_t(size_t,
 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), 
L1_CACHE_BYTES));
if (!data)
-   goto out_arr;
+   goto out_fdt;
fdt->open_fds = data;
data += nr / BITS_PER_BYTE;
fdt->close_on_exec = data;
@@ -148,8 +135,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
 
return fdt;
 
-out_arr:
-   kvfree(fdt->fd);
 out_fdt:
kfree(fdt);
 out:
@@ -170,6 +155,7 @@ static int expand_fdtable(struct files_struct *files, 
unsigned int nr)
struct fdtable *new_fdt, *cur_fdt;
 
spin_unlock(>file_lock);
+   idr_preload_end();
new_fdt = alloc_fdtable(nr);
 
/* make sure all __fd_install() have seen resize_in_progress
@@ -178,6 +164,7 @@ static int expand_fdtable(struct files_struct *files, 
unsigned int nr)
if (atomic_read(>count) > 1)
synchronize_sched();
 
+   idr_preload(GFP_KERNEL);
spin_lock(>file_lock);
if (!new_fdt)
return -ENOMEM;
@@ -228,8 +215,10 @@ static int expand_files(struct files_struct *files, 
unsigned int nr)
 
if (unlikely(files->resize_in_progress)) {
spin_unlock(>file_lock);
+   idr_preload_end();
expanded = 1;
wait_event(files->resize_wait, !files->resize_in_progress);
+   idr_preload(GFP_KERNEL);
spin_lock(>file_lock);
goto repeat;
}
@@ -290,8 +279,8 @@ static unsigned int count_open_files(struct fdtable *fdt)
 struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 {
struct files_struct *newf;
-   struct file **old_fds, **new_fds;
unsigned int open_files, i;
+   struct file *f;
struct fdtable *old_fdt, *new_fdt;
 
*errorp = -ENOMEM;
@@ -302,6 +291,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int 
*errorp)
atomic_set(>count, 1);
 
spin_lock_init(>file_lock);
+   idr_init(>fd_idr);
newf->resize_in_progress = false;
init_waitqueue_head(>resize_wait);
newf->next_fd = 0;
@@ -310,8 +300,9 @@ struct files_struct *dup_fd(struct files_struct *oldf, int 
*errorp)
new_fdt->close_on_exec = newf->close_on_exec_init;
new_fdt->open_fds = newf->open_fds_init;
new_fdt->full_fds_bits = newf->full_fds_bits_init;
-   new_fdt->fd = >fd_array[0];
 
+restart:
+   idr_copy_preload(>fd_idr, GFP_KERNEL);
spin_lock(>file_lock);
old_fdt = files_fdtable(oldf);
open_files = count_open_files(old_fdt);
@@ -321,6 +312,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int 
*errorp)
 */
while (unlikely(open_files > new_fdt->max_fds)) {
spin_unlock(>file_lock);
+   idr_preload_end();
 
if (new_fdt != >fdtab)
__free_fdtable(new_fdt);
@@ -343,41 +335,50 @@ struct files_struct *dup_fd(struct files_struct *oldf, 
int *errorp)
 * who knows it may have a new bigger fd table. We need
 * the latest pointer.
 */
+   idr_copy_preload(>fd_idr, GFP_KERNEL);
spin_lock(>file_lock);
old_fdt = files_fdtable(oldf);
open_files = 

[RESEND PATCH 05/13] vfs: Replace array of file pointers with an IDR

2017-07-11 Thread Sandhya Bankar
Instead of storing all the file pointers in a single array, use an
IDR.  It is RCU-safe, and does not need to be reallocated when the
fd array grows.  It also handles allocation of new file descriptors.

Signed-off-by: Sandhya Bankar 
[mawil...@microsoft.com: fixes]
Signed-off-by: Matthew Wilcox 
---
 fs/file.c   | 180 
 include/linux/fdtable.h |  10 +--
 2 files changed, 79 insertions(+), 111 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index ad6f094..1c000d8 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -47,7 +47,6 @@ static void *alloc_fdmem(size_t size)
 
 static void __free_fdtable(struct fdtable *fdt)
 {
-   kvfree(fdt->fd);
kvfree(fdt->open_fds);
kfree(fdt);
 }
@@ -89,15 +88,7 @@ static void copy_fd_bitmaps(struct fdtable *nfdt, struct 
fdtable *ofdt,
  */
 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
 {
-   unsigned int cpy, set;
-
BUG_ON(nfdt->max_fds < ofdt->max_fds);
-
-   cpy = ofdt->max_fds * sizeof(struct file *);
-   set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
-   memcpy(nfdt->fd, ofdt->fd, cpy);
-   memset((char *)nfdt->fd + cpy, 0, set);
-
copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
 }
 
@@ -131,15 +122,11 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
if (!fdt)
goto out;
fdt->max_fds = nr;
-   data = alloc_fdmem(nr * sizeof(struct file *));
-   if (!data)
-   goto out_fdt;
-   fdt->fd = data;
 
data = alloc_fdmem(max_t(size_t,
 2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), 
L1_CACHE_BYTES));
if (!data)
-   goto out_arr;
+   goto out_fdt;
fdt->open_fds = data;
data += nr / BITS_PER_BYTE;
fdt->close_on_exec = data;
@@ -148,8 +135,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
 
return fdt;
 
-out_arr:
-   kvfree(fdt->fd);
 out_fdt:
kfree(fdt);
 out:
@@ -170,6 +155,7 @@ static int expand_fdtable(struct files_struct *files, 
unsigned int nr)
struct fdtable *new_fdt, *cur_fdt;
 
spin_unlock(>file_lock);
+   idr_preload_end();
new_fdt = alloc_fdtable(nr);
 
/* make sure all __fd_install() have seen resize_in_progress
@@ -178,6 +164,7 @@ static int expand_fdtable(struct files_struct *files, 
unsigned int nr)
if (atomic_read(>count) > 1)
synchronize_sched();
 
+   idr_preload(GFP_KERNEL);
spin_lock(>file_lock);
if (!new_fdt)
return -ENOMEM;
@@ -228,8 +215,10 @@ static int expand_files(struct files_struct *files, 
unsigned int nr)
 
if (unlikely(files->resize_in_progress)) {
spin_unlock(>file_lock);
+   idr_preload_end();
expanded = 1;
wait_event(files->resize_wait, !files->resize_in_progress);
+   idr_preload(GFP_KERNEL);
spin_lock(>file_lock);
goto repeat;
}
@@ -290,8 +279,8 @@ static unsigned int count_open_files(struct fdtable *fdt)
 struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 {
struct files_struct *newf;
-   struct file **old_fds, **new_fds;
unsigned int open_files, i;
+   struct file *f;
struct fdtable *old_fdt, *new_fdt;
 
*errorp = -ENOMEM;
@@ -302,6 +291,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int 
*errorp)
atomic_set(>count, 1);
 
spin_lock_init(>file_lock);
+   idr_init(>fd_idr);
newf->resize_in_progress = false;
init_waitqueue_head(>resize_wait);
newf->next_fd = 0;
@@ -310,8 +300,9 @@ struct files_struct *dup_fd(struct files_struct *oldf, int 
*errorp)
new_fdt->close_on_exec = newf->close_on_exec_init;
new_fdt->open_fds = newf->open_fds_init;
new_fdt->full_fds_bits = newf->full_fds_bits_init;
-   new_fdt->fd = >fd_array[0];
 
+restart:
+   idr_copy_preload(>fd_idr, GFP_KERNEL);
spin_lock(>file_lock);
old_fdt = files_fdtable(oldf);
open_files = count_open_files(old_fdt);
@@ -321,6 +312,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int 
*errorp)
 */
while (unlikely(open_files > new_fdt->max_fds)) {
spin_unlock(>file_lock);
+   idr_preload_end();
 
if (new_fdt != >fdtab)
__free_fdtable(new_fdt);
@@ -343,41 +335,50 @@ struct files_struct *dup_fd(struct files_struct *oldf, 
int *errorp)
 * who knows it may have a new bigger fd table. We need
 * the latest pointer.
 */
+   idr_copy_preload(>fd_idr, GFP_KERNEL);
spin_lock(>file_lock);
old_fdt = files_fdtable(oldf);
open_files = count_open_files(old_fdt);
}
 
+   if