Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=9cfe015aa424b3c003baba3841a60dd9b5ad319b
Commit:     9cfe015aa424b3c003baba3841a60dd9b5ad319b
Parent:     774ed22c21ab95d582dfff38560f11cf290baeb4
Author:     Eric Dumazet <[EMAIL PROTECTED]>
AuthorDate: Wed Feb 6 01:37:16 2008 -0800
Committer:  Linus Torvalds <[EMAIL PROTECTED]>
CommitDate: Wed Feb 6 10:41:06 2008 -0800

    get rid of NR_OPEN and introduce a sysctl_nr_open
    
    NR_OPEN (historically set to 1024*1024) actually forbids processes to open
    more than 1024*1024 handles.
    
    Unfortunatly some production servers hit the not so 'ridiculously high
    value' of 1024*1024 file descriptors per process.
    
    Changing NR_OPEN is not considered safe because of vmalloc space potential
    exhaust.
    
    This patch introduces a new sysctl (/proc/sys/fs/nr_open) wich defaults to
    1024*1024, so that admins can decide to change this limit if their workload
    needs it.
    
    [EMAIL PROTECTED]: export it for sparc64]
    Signed-off-by: Eric Dumazet <[EMAIL PROTECTED]>
    Cc: Alan Cox <[EMAIL PROTECTED]>
    Cc: Richard Henderson <[EMAIL PROTECTED]>
    Cc: Ivan Kokshaysky <[EMAIL PROTECTED]>
    Cc: "David S. Miller" <[EMAIL PROTECTED]>
    Cc: Ralf Baechle <[EMAIL PROTECTED]>
    Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
    Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>
---
 Documentation/filesystems/proc.txt  |    8 ++++++++
 Documentation/sysctl/fs.txt         |   10 ++++++++++
 arch/alpha/kernel/osf_sys.c         |    2 +-
 arch/mips/kernel/sysirix.c          |    2 +-
 arch/sparc64/kernel/sparc64_ksyms.c |    1 +
 arch/sparc64/solaris/fs.c           |    2 +-
 arch/sparc64/solaris/timod.c        |    6 ++++--
 fs/file.c                           |    8 +++++---
 include/linux/fs.h                  |    2 +-
 kernel/sys.c                        |    2 +-
 kernel/sysctl.c                     |    8 ++++++++
 11 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/Documentation/filesystems/proc.txt 
b/Documentation/filesystems/proc.txt
index e2799b5..5681e2f 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1029,6 +1029,14 @@ nr_inodes
 Denotes the  number  of  inodes the system has allocated. This number will
 grow and shrink dynamically.
 
+nr_open
+-------
+
+Denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
 nr_free_inodes
 --------------
 
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index aa986a3..f992543 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs:
 - inode-max
 - inode-nr
 - inode-state
+- nr_open
 - overflowuid
 - overflowgid
 - suid_dumpable
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the 
maximum.
 
 ==============================================================
 
+nr_open:
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
+==============================================================
+
 inode-max, inode-nr & inode-state:
 
 As with file handles, the kernel allocates the inode structures
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 6413c5f..72f9a61 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -430,7 +430,7 @@ sys_getpagesize(void)
 asmlinkage unsigned long
 sys_getdtablesize(void)
 {
-       return NR_OPEN;
+       return sysctl_nr_open;
 }
 
 /*
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index 4c477c7..22fd41e 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -356,7 +356,7 @@ asmlinkage int irix_syssgi(struct pt_regs *regs)
                        retval = NGROUPS_MAX;
                        goto out;
                case 5:
-                       retval = NR_OPEN;
+                       retval = sysctl_nr_open;
                        goto out;
                case 6:
                        retval = 1;
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c 
b/arch/sparc64/kernel/sparc64_ksyms.c
index 60765e3..8649635 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -277,6 +277,7 @@ EXPORT_SYMBOL(sys_getpid);
 EXPORT_SYMBOL(sys_geteuid);
 EXPORT_SYMBOL(sys_getuid);
 EXPORT_SYMBOL(sys_getegid);
+EXPORT_SYMBOL(sysctl_nr_open);
 EXPORT_SYMBOL(sys_getgid);
 EXPORT_SYMBOL(svr4_getcontext);
 EXPORT_SYMBOL(svr4_setcontext);
diff --git a/arch/sparc64/solaris/fs.c b/arch/sparc64/solaris/fs.c
index 61be597..9311bfe 100644
--- a/arch/sparc64/solaris/fs.c
+++ b/arch/sparc64/solaris/fs.c
@@ -624,7 +624,7 @@ asmlinkage int solaris_ulimit(int cmd, int val)
        case 3: /* UL_GMEMLIM */
                return current->signal->rlim[RLIMIT_DATA].rlim_cur;
        case 4: /* UL_GDESLIM */
-               return NR_OPEN;
+               return sysctl_nr_open;
        }
        return -EINVAL;
 }
diff --git a/arch/sparc64/solaris/timod.c b/arch/sparc64/solaris/timod.c
index a9d32ce..f53123c 100644
--- a/arch/sparc64/solaris/timod.c
+++ b/arch/sparc64/solaris/timod.c
@@ -859,7 +859,8 @@ asmlinkage int solaris_getmsg(unsigned int fd, u32 arg1, 
u32 arg2, u32 arg3)
 
        SOLD("entry");
        lock_kernel();
-       if(fd >= NR_OPEN) goto out;
+       if (fd >= sysctl_nr_open)
+               goto out;
 
        fdt = files_fdtable(current->files);
        filp = fdt->fd[fd];
@@ -927,7 +928,8 @@ asmlinkage int solaris_putmsg(unsigned int fd, u32 arg1, 
u32 arg2, u32 arg3)
 
        SOLD("entry");
        lock_kernel();
-       if(fd >= NR_OPEN) goto out;
+       if (fd >= sysctl_nr_open)
+               goto out;
 
        fdt = files_fdtable(current->files);
        filp = fdt->fd[fd];
diff --git a/fs/file.c b/fs/file.c
index c5575de..5110acb 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -24,6 +24,8 @@ struct fdtable_defer {
        struct fdtable *next;
 };
 
+int sysctl_nr_open __read_mostly = 1024*1024;
+
 /*
  * We use this list to defer free fdtables that have vmalloced
  * sets/arrays. By keeping a per-cpu list, we avoid having to embed
@@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
        nr /= (1024 / sizeof(struct file *));
        nr = roundup_pow_of_two(nr + 1);
        nr *= (1024 / sizeof(struct file *));
-       if (nr > NR_OPEN)
-               nr = NR_OPEN;
+       if (nr > sysctl_nr_open)
+               nr = sysctl_nr_open;
 
        fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
        if (!fdt)
@@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr)
        if (nr < fdt->max_fds)
                return 0;
        /* Can we expand? */
-       if (nr >= NR_OPEN)
+       if (nr >= sysctl_nr_open)
                return -EMFILE;
 
        /* All good, so we try */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 19aab50..109734b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -21,7 +21,7 @@
 
 /* Fixed constants first: */
 #undef NR_OPEN
-#define NR_OPEN (1024*1024)    /* Absolute upper limit on fd num */
+extern int sysctl_nr_open;
 #define INR_OPEN 1024          /* Initial setting for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
diff --git a/kernel/sys.c b/kernel/sys.c
index 53de35f..2b8e2da 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1472,7 +1472,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, 
struct rlimit __user *rlim)
        if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
            !capable(CAP_SYS_RESOURCE))
                return -EPERM;
-       if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
+       if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
                return -EPERM;
 
        retval = security_task_setrlimit(resource, &new_rlim);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5e2ad5b..86daaa2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1203,6 +1203,14 @@ static struct ctl_table fs_table[] = {
                .proc_handler   = &proc_dointvec,
        },
        {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "nr_open",
+               .data           = &sysctl_nr_open,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
                .ctl_name       = FS_DENTRY,
                .procname       = "dentry-state",
                .data           = &dentry_stat,
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to