On 31.05.2021 19:55, Valeriy Vdovin wrote:
Current kernel version wards a priviledged task from running unsecure
binaries via exec or uselib syscalls. The condition of unsecurity of
the subject binary is decided via a function 've_trusted_exec', which
performs a series of sub-checks to see if the binary is located inside
of a container image. This is considered unsafe and is only allowed in
case if admin explicitly allows to run executable code from that image
or the system as a whole. The problem is that dynamic librarie also
have another way aside from uselib to link the library to a running
process. Simple userspace linker would simply read out elf sections
and load all the loadable sections from the file to the process address
space via mmap syscall. If the secion is of executable type the linker
will mmap this section with PROC_EXEC flag. This way it is possible
to load potetialy malicious library into a priviledged task and there
is no mechanism to stop it.
This can happen if the priviledged task has entered a mount namespace,
that belongs to some container and call some function that is not linked
into the process already. Another possible way to link to a container
library is to provide LD_LIBRARY_PATH=/vz/root/VEID/lib64 env var to
a starting process. In both cases any lazy-linked function will trigger
library loading at the moment the function from that library gets called
for the first time.
To stop the code from a container library to get executed in the host-
level process, let's add ve_trusted_exec to the mmap code under
condition that mmap has been called in PROT_EXEC non-anonymous mode.

Signed-off-by: Valeriy Vdovin <[email protected]>
---
  fs/exec.c          | 44 +++----------------------------
  include/linux/ve.h |  2 ++
  kernel/ve/ve.c     | 64 ++++++++++++++++++++++++++++++++++++++++++++++
  mm/mmap.c          |  5 ++++
  4 files changed, 74 insertions(+), 41 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 61195aee6cfb..1201cd64b366 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,6 +63,7 @@
  #include <linux/compat.h>
  #include <linux/vmalloc.h>
  #include <linux/sysctl.h>
+#include <linux/ve.h>
#include <linux/uaccess.h>
  #include <asm/mmu_context.h>
@@ -112,45 +113,6 @@ bool path_noexec(const struct path *path)
               (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
  }
-/* Send signal only 3 times a day so that coredumps don't overflow the disk */
-#define SIGSEGV_RATELIMIT_INTERVAL     (24 * 60 * 60 * HZ)
-#define SIGSEGV_RATELIMIT_BURST                3
-
-/*
- * We don't want a VE0-privileged user intentionally or by mistake
- * to execute files of container, these files are untrusted.
- */
-bool ve_exec_trusted(struct file *file, struct filename *name)
-{
-       struct block_device *bdev = file->f_inode->i_sb->s_bdev;
-       bool exec_from_ct = !ve_is_super(get_exec_env());
-       bool file_on_ct_mount = !ve_is_super(
-               real_mount(file->f_path.mnt)->ve_owner);
-       bool file_on_trusted_disk = true;
-       static DEFINE_RATELIMIT_STATE(sigsegv_rs, SIGSEGV_RATELIMIT_INTERVAL,
-                                                 SIGSEGV_RATELIMIT_BURST);
-
-       if (trusted_exec)
-               return true;
-
-       if (bdev && bdev->bd_disk)
-               file_on_trusted_disk = bdev->bd_disk->vz_trusted_exec;
-
-       if (exec_from_ct)
-               return true;
-
-       if (file_on_trusted_disk && !file_on_ct_mount)
-               return true;
-
-       if (!__ratelimit(&sigsegv_rs))
-               return false;
-
-       WARN(1, "VE0's %s tried to execute untrusted file %s from VEX\n",
-               current->comm, name->name);
-       force_sigsegv(SIGSEGV, current);
-       return false;
-}
-
  #ifdef CONFIG_USELIB
  /*
   * Note that a shared library must be both readable and executable due to
@@ -187,7 +149,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
        if (path_noexec(&file->f_path))
                goto exit;
- if (!ve_exec_trusted(file, tmp))
+       if (!ve_check_trusted_exec(file, tmp))
                goto exit;
putname(tmp);
@@ -910,7 +872,7 @@ static struct file *do_open_execat(int fd, struct filename 
*name, int flags)
        if (path_noexec(&file->f_path))
                goto exit;
- if (!ve_exec_trusted(file, name))
+       if (!ve_check_trusted_exec(file, name))
                goto exit;
err = deny_write_access(file);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 4ca1f122a413..a24202fbb3a3 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -159,6 +159,8 @@ extern void put_ve(struct ve_struct *ve);
void ve_stop_ns(struct pid_namespace *ns);
  void ve_exit_ns(struct pid_namespace *ns);
+bool ve_check_trusted_exec(struct file *file, struct filename *name);
+bool ve_check_trusted_mmap(struct file *file, int prot);
static inline struct ve_struct *css_to_ve(struct cgroup_subsys_state *css)
  {
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 5bf187705f2a..39144e9059f0 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -1644,6 +1644,70 @@ int vz_security_protocol_check(struct net *net, int 
protocol)
  }
  EXPORT_SYMBOL_GPL(vz_security_protocol_check);
+static bool ve_check_trusted_file(struct file *file)
+{
+       struct block_device *bdev = file->f_inode->i_sb->s_bdev;
+       bool exec_from_ct = !ve_is_super(get_exec_env());
+       bool file_on_ct_mount = !ve_is_super(
+               real_mount(file->f_path.mnt)->ve_owner);
+       bool file_on_trusted_disk = true;
+
+       if (trusted_exec)
+               return true;
+
+       if (bdev && bdev->bd_disk)
+               file_on_trusted_disk = bdev->bd_disk->vz_trusted_exec;
+
+       if (exec_from_ct)
+               return true;

While on it, can we reorder ifs the way file_on_trusted_disk is set and check would be one after another, without exec_from_ct if in between ?

+
+       if (file_on_trusted_disk && !file_on_ct_mount)
+               return true;
+       return false;
+}
+
+
+/* Send signal only 3 times a day so that coredumps don't overflow the disk */
+#define SIGSEGV_RATELIMIT_INTERVAL     (24 * 60 * 60 * HZ)
+#define SIGSEGV_RATELIMIT_BURST                3
+
+bool ve_check_trusted_mmap(struct file *file, int prot)
+{
+       static DEFINE_RATELIMIT_STATE(sigsegv_rs,
+               SIGSEGV_RATELIMIT_INTERVAL, SIGSEGV_RATELIMIT_BURST);
+
+       if ((prot & PROT_EXEC) && !ve_check_exec_trusted(file)) {

ve_check_exec_trusted() looks missing. Probably missprint and ve_check_trusted_file() was meant?

+               if (!__ratelimit(&sigsegv_rs))
+                       return false;
+
+               WARN(1, "VE0 %s tried to map untrusted code from VEX\n",
+                       current->comm);

Can we at least print name of file file->f_path->dentry->d_name.name (if available) ?

+               force_sigsegv(SIGSEGV, current);
+               return false;
+       }
+       return true;
+}
+
+/*
+ * We don't want a VE0-privileged user intentionally or by mistake
+ * to execute files of container, these files are untrusted.
+ */
+bool ve_check_trusted_exec(struct file *file, struct filename *name) > +{
+       static DEFINE_RATELIMIT_STATE(sigsegv_rs, SIGSEGV_RATELIMIT_INTERVAL,
+                                                 SIGSEGV_RATELIMIT_BURST);
+       if (ve_check_trusted_file(file))
+               return true;
+
+       if (!__ratelimit(&sigsegv_rs))
+               return false;
+
+       WARN(1, "VE0's %s tried to execute untrusted file %s from VEX\n",
+               current->comm, name->name);
+       force_sigsegv(SIGSEGV, current);
+       return false;
+}
+
  #ifdef CONFIG_CGROUP_SCHED
  int cpu_cgroup_proc_stat(struct cgroup_subsys_state *cpu_css,
                         struct cgroup_subsys_state *cpuacct_css,
diff --git a/mm/mmap.c b/mm/mmap.c
index 2d736d66df95..ad6b1f646460 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -46,6 +46,7 @@
  #include <linux/pkeys.h>
  #include <linux/oom.h>
  #include <linux/sched/mm.h>
+#include <linux/ve.h>
#include <linux/uaccess.h>
  #include <asm/cacheflush.h>
@@ -1547,6 +1548,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, 
unsigned long len,
                file = fget(fd);
                if (!file)
                        return -EBADF;
+
+               if (ve_check_trusted_mmap(file, prot))
+                       return -EBADF;

Do we need to handle case of mm/nommu.c:ksys_mmap_pgoff() too ?
Probably it would be better if we move the check to vm_mmap_pgoff() ?

+
                if (is_file_hugepages(file))
                        len = ALIGN(len, huge_page_size(hstate_file(file)));
                retval = -EINVAL;


--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to