>From fe6acd0790685029cb2da8251598d13acce3fed4 Mon Sep 17 00:00:00 2001
From: Nils Goroll <[email protected]>
Date: Thu, 29 Sep 2011 12:42:31 +0200
Subject: [PATCH] solaris sandbox / least privileges overhaul:

- [e0ee2a2e69654a9df74aaf3dcadc9639659cf42b] adds the file_read
  privilege needed for onnv_140 and newer (see #912), but we also need
  the file_write privilege for stevedore access.

- If available, keep sys_resource in the permitted/limited set to
  allow cache_waiter_ports to raise the process.max-port-events
  resource control (feature to be added later).

- When starting varnish with euid 0 on Solaris, privilege seperation
  prohibited preserving additional privileges (in excess of the basic
  set) in the child, because, for a non privilege aware process,
  setuid() resets the effective, inheritable and permitted sets to the
  basic set.

  To achieve interoperability between solaris privileges and
  setuid()/setgid(), we now make the varnish child privilege aware
  before calling setuid() by trying to add all privileges we will need
  plus proc_setid.

- On solaris, check for proc_setid rather than checking the euid as a
  prerequisite for changing the uid/gid and only change the uid/gid if
  we need to (for a privilege aware process, [ers]uid 0 loose their
  magic powers).

  Note that setuid() will always set SNOCD on Solaris, which will
  prevent core dumps from being written, unless setuid core dumps are
  explicitly enabled using coreadm(1M).

  To avoid setuid() (and the SNOCD flag, consequently), start varnish
  as the user you intend to run the child as, but with additional
  privileges, e.g. using

  ppriv -e -s A=basic,net_privaddr,sys_resource varnishd ...

- setppriv(PRIV_SET, ...) failed when the privileges to be applied
  were not available in the permitted set.

  We change the logic to only clear the privileges which are not
  needed by inverting the sets and removing all unneeded privileges
  using setppriv(PRIV_OFF, ...).

  So the child might end up with less privileges than given initially,
  but unneeded privileges will always be waived.
---
 bin/varnishd/mgt_sandbox.c |  241 +++++++++++++++++++++++++++++++++++++-------
 1 files changed, 206 insertions(+), 35 deletions(-)

diff --git a/bin/varnishd/mgt_sandbox.c b/bin/varnishd/mgt_sandbox.c
index a5eee2f..317aac8 100644
--- a/bin/varnishd/mgt_sandbox.c
+++ b/bin/varnishd/mgt_sandbox.c
@@ -3,6 +3,7 @@
  * All rights reserved.
  *
  * Author: Poul-Henning Kamp <[email protected]>
+ *        Nils Goroll <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -37,6 +38,8 @@
  * This sourcefile tries to encapsulate the resulting mess on place.
  *
  * TODO:
+ *     os-specific source files ?
+ *
  *     Unix:   chroot
  *     FreeBSD: jail
  *     FreeBSD: capsicum
@@ -47,6 +50,7 @@
 #include <stdio.h>
 #include <syslog.h>
 #include <unistd.h>
+#include <string.h>
 
 #ifdef HAVE_PRIV_H
 #include <priv.h>
@@ -61,58 +65,225 @@
 
 /*--------------------------------------------------------------------*/
 
-/* Waive all privileges in the child, it does not need any */
+#ifdef HAVE_SETPPRIV
+
+/*
+ * SOLARIS PRIVILEGES: Note on use of symbolic PRIV_* constants
+ * 
+ * For privileges which existed in Solaris 10 FCS, we may use the constants 
from
+ * sys/priv_names.h
+ *
+ * For privileges which have been added later, we need to use strings in order
+ * not to break builds of varnish on these platforms. To remain binary
+ * compatible, we need to silently ignore errors from priv_addset when using
+ * these strings.
+ *
+ * For optimal build and binary forward comatibility, we could use subtractive
+ * set specs like
+ *
+ *       basic,!file_link_any,!proc_exec,!proc_fork,!proc_info,!proc_session
+ *
+ * but I (Nils) have a preference for making an informed decision about which
+ * privileges the varnish child should have and which it shouldn't.
+ *
+ * Newly introduced privileges should be annotated with their PSARC / commit ID
+ * (as long as Oracle reveils these :/ )
+ *
+ * SOLARIS PRIVILEGES: Note on accidentally setting the SNOCD flag
+ *
+ * When setting privileges, we need to take care not to accidentally set the
+ * SNOCD flag which will disable core dumps unnecessarily. (see
+ * https://www.varnish-cache.org/trac/ticket/671 )
+ *
+ * When changing the logic herein, always check with mdb -k. Replace _PID_ with
+ * the pid of your varnish child, the result should be 0, otherwise a 
regression
+ * has been introduced.
+ *
+ * > 0t_PID_::pid2proc | ::print proc_t p_flag | >a
+ * > (<a & 0x10000000)=X
+ *                 0
+ *
+ * (a value of 0x10000000 indicates that SNOCD is set)
+ *
+ * NOTE that on Solaris changing the uid will _always_ set SNOCD, so make sure
+ * you run this test with appropriate privileges, but without proc_setid, so
+ * varnish won't setuid(), e.g.
+ *
+ * pfexec ppriv -e -s A=basic,net_privaddr,sys_resource varnish ...
+ *
+ * SOLARIS COREDUMPS with setuid(): See coreadm(1M) - global-setid / proc-setid
+ *
+ */
+
+/* effective during runtime of the child */
+static inline void
+mgt_sandbox_solaris_add_effective(priv_set_t *priv_set)
+{
+       /* PSARC/2009/685 - 8eca52188202 - onnv_132 */
+       priv_addset(priv_set, "net_access");
+
+       /* PSARC/2009/378 - 63678502e95e - onnv_140 */
+       priv_addset(priv_set, "file_read");
+       priv_addset(priv_set, "file_write");
+}
+
+/* permitted during runtime of the child - for privilege bracketing */
+static inline void
+mgt_sandbox_solaris_add_permitted(priv_set_t *priv_set)
+{
+       /* for raising limits in cache_waiter_ports.c */
+       priv_addset(priv_set, PRIV_SYS_RESOURCE);
+}
+
+/* effective during mgt_sandbox */
+static inline void
+mgt_sandbox_solaris_add_initial(priv_set_t *priv_set)
+{
+       /* for setgid/setuid */
+       priv_addset(priv_set, PRIV_PROC_SETID);
+}
+
+/*
+ * if we are not yet privilege-aware already (ie we have been started
+ * not-privilege aware wird euid 0), we need to grab any additional privileges
+ * needed during mgt_standbox, until we reduce to least privileges in
+ * mgt_sandbox_waive, otherwise we would loose them with setuid()
+ */
 
 void
-mgt_sandbox(void)
+mgt_sandbox_init(void)
+{
+       priv_set_t *priv_all;
+
+       if (! (priv_all = priv_allocset())) {
+               REPORT(LOG_ERR,
+                   "Child start warning: mgt_sandbox_init - priv_allocset 
failed: errno=%d (%s)",
+                   errno, strerror(errno));
+               return;
+       }
+       
+       priv_emptyset(priv_all);
+
+       mgt_sandbox_solaris_add_effective(priv_all);
+       mgt_sandbox_solaris_add_permitted(priv_all);
+       mgt_sandbox_solaris_add_initial(priv_all);
+
+       setppriv(PRIV_ON, PRIV_PERMITTED, priv_all);
+       setppriv(PRIV_ON, PRIV_EFFECTIVE, priv_all);
+       setppriv(PRIV_ON, PRIV_INHERITABLE, priv_all);
+
+       priv_freeset(priv_all);
+}
+
+/* 
+ * Waive most privileges in the child
+ *
+ * as of onnv_151a, we should end up with:
+ *
+ * > ppriv -v #pid of varnish child
+ * PID:  .../varnishd ...
+ * flags = PRIV_AWARE
+ *      E: file_read,file_write,net_access
+ *      I: none
+ *      P: file_read,file_write,net_access,sys_resource
+ *      L: file_read,file_write,net_access,sys_resource
+ *
+ * We should keep sys_resource in P in order to adjust our limits if we need to
+ */
+void
+mgt_sandbox_waive(void)
 {
+       priv_set_t *effective, *inheritable, *permitted;
+
+       if (!(effective = priv_allocset()) ||
+           !(inheritable = priv_allocset()) ||
+           !(permitted = priv_allocset())) {
+               REPORT(LOG_ERR,
+                   "Child start warning: mgt_sandbox_waive - priv_allocset 
failed: errno=%d (%s)",
+                   errno, strerror(errno));
+               return;
+       }
+
+       priv_emptyset(inheritable);
+
+       priv_emptyset(effective);
+       mgt_sandbox_solaris_add_effective(effective);
+
+       priv_copyset(effective, permitted);
+       mgt_sandbox_solaris_add_permitted(permitted);
+
+       /* 
+        * invert the sets and clear privileges such that setppriv will always
+        * succeed
+        */
+       priv_inverse(inheritable);
+       priv_inverse(effective);
+       priv_inverse(permitted);
+
+#define SETPPRIV(which, set)                                           \
+       if (setppriv(PRIV_OFF, which, set))                             \
+               REPORT(LOG_ERR,                                         \
+                   "Child start warning: Waiving privileges failed on %s: 
errno=%d (%s)", \
+                   #which, errno, strerror(errno));
+
+       SETPPRIV(PRIV_INHERITABLE, inheritable);
+       SETPPRIV(PRIV_EFFECTIVE, effective);
+       SETPPRIV(PRIV_PERMITTED, permitted);
+       SETPPRIV(PRIV_LIMIT, permitted);
+#undef SETPPRIV
 
+       priv_freeset(inheritable);
+       priv_freeset(effective);
+}
+
+#else /* HAVE_SETPPRIV */
+
+void
+mgt_sandbox_init(void)
+{}
+
+void
+mgt_sandbox_waive(void)
+{}
+
+#endif /* HAVE_SETPPRIV */
+
+void
+mgt_sandbox_privsep(void)
+{
+#ifdef HAVE_SETPPRIV
+       if (priv_ineffect(PRIV_PROC_SETID)) {
+               if (getgid() != params->gid)
+                       XXXAZ(setgid(params->gid));
+               if (getuid() != params->uid)
+                       XXXAZ(setuid(params->uid));
+       } else {
+               REPORT(LOG_INFO, "Privilege %s missing, will not change 
uid/gid", PRIV_PROC_SETID);
+       }
+#else
        if (geteuid() == 0) {
                XXXAZ(setgid(params->gid));
                XXXAZ(setuid(params->uid));
        } else {
                REPORT0(LOG_INFO, "Not running as root, no priv-sep");
        }
+#endif
+}
 
+void
+mgt_sandbox(void)
+{
+       mgt_sandbox_init();
+
+       mgt_sandbox_privsep();
+    
        /* On Linux >= 2.4, you need to set the dumpable flag
           to get core dumps after you have done a setuid. */
-
 #ifdef __linux__
        if (prctl(PR_SET_DUMPABLE, 1) != 0)
                REPORT0(LOG_INFO,
                    "Could not set dumpable bit.  Core dumps turned off\n");
 #endif
 
-#ifdef HAVE_SETPPRIV
-       priv_set_t *empty, *minimal;
-
-       if (!(empty = priv_allocset()) ||
-           !(minimal = priv_allocset())) {
-               REPORT0(LOG_ERR, "priv_allocset_failed");
-       } else {
-               priv_emptyset(empty);
-               priv_emptyset(minimal);
-
-               /*
-                * new privilege,
-                * silently ignore any errors if it doesn't exist
-                */
-               priv_addset(minimal, "net_access");
-               priv_addset(minimal, "file_read");
-
-#define SETPPRIV(which, set)                                           \
-               if (setppriv(PRIV_SET, which, set))                     \
-                       REPORT0(LOG_ERR,                                \
-                           "Waiving privileges failed on " #which)
-
-               /* need to set I after P to avoid SNOCD being set */
-               SETPPRIV(PRIV_LIMIT, minimal);
-               SETPPRIV(PRIV_PERMITTED, minimal); /* implies PRIV_EFFECTIVE */
-               SETPPRIV(PRIV_INHERITABLE, empty);
-
-               priv_freeset(empty);
-               priv_freeset(minimal);
-       }
-#endif
-
+       mgt_sandbox_waive();
 }
-- 
1.5.6.5

_______________________________________________
varnish-dev mailing list
[email protected]
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev

Reply via email to