look like it's been too long without varnish and too long without C for me...
>From 389ebc6310356d31793b75f19264bd078d34a5e7 Mon Sep 17 00:00:00 2001 From: Nils Goroll <[email protected]> Date: Thu, 29 Sep 2011 12:42:31 +0200 Subject: [PATCH] solaris sandbox / least privileges overhaul:
- [e0ee2a2e69654a9df74aaf3dcadc9639659cf42b] adds the file_read privilege needed for onnv_140 and newer (see #912), but we also need the file_write privilege for stevedore access. - If available, keep sys_resource in the permitted/limited set to allow cache_waiter_ports to raise the process.max-port-events resource control (feature to be added later). - When starting varnish with euid 0 on Solaris, privilege seperation prohibited preserving additional privileges (in excess of the basic set) in the child, because, for a non privilege aware process, setuid() resets the effective, inheritable and permitted sets to the basic set. To achieve interoperability between solaris privileges and setuid()/setgid(), we now make the varnish child privilege aware before calling setuid() by trying to add all privileges we will need plus proc_setid. - On solaris, check for proc_setid rather than checking the euid as a prerequisite for changing the uid/gid and only change the uid/gid if we need to (for a privilege aware process, [ers]uid 0 loose their magic powers). Note that setuid() will always set SNOCD on Solaris, which will prevent core dumps from being written, unless setuid core dumps are explicitly enabled using coreadm(1M). To avoid setuid() (and the SNOCD flag, consequently), start varnish as the user you intend to run the child as, but with additional privileges, e.g. using ppriv -e -s A=basic,net_privaddr,sys_resource varnishd ... - setppriv(PRIV_SET, ...) failed when the privileges to be applied were not available in the permitted set. We change the logic to only clear the privileges which are not needed by inverting the sets and removing all unneeded privileges using setppriv(PRIV_OFF, ...). So the child might end up with less privileges than given initially, but unneeded privileges will always be waived. --- bin/varnishd/mgt_sandbox.c | 243 +++++++++++++++++++++++++++++++++++++------- 1 files changed, 207 insertions(+), 36 deletions(-) diff --git a/bin/varnishd/mgt_sandbox.c b/bin/varnishd/mgt_sandbox.c index a5eee2f..8dc9e42 100644 --- a/bin/varnishd/mgt_sandbox.c +++ b/bin/varnishd/mgt_sandbox.c @@ -3,6 +3,7 @@ * All rights reserved. * * Author: Poul-Henning Kamp <[email protected]> + * Nils Goroll <[email protected]> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -37,6 +38,8 @@ * This sourcefile tries to encapsulate the resulting mess on place. * * TODO: + * os-specific source files ? + * * Unix: chroot * FreeBSD: jail * FreeBSD: capsicum @@ -47,6 +50,7 @@ #include <stdio.h> #include <syslog.h> #include <unistd.h> +#include <string.h> #ifdef HAVE_PRIV_H #include <priv.h> @@ -61,58 +65,225 @@ /*--------------------------------------------------------------------*/ -/* Waive all privileges in the child, it does not need any */ +#ifdef HAVE_SETPPRIV -void -mgt_sandbox(void) +/* + * SOLARIS PRIVILEGES: Note on use of symbolic PRIV_* constants + * + * For privileges which existed in Solaris 10 FCS, we may use the constants from + * sys/priv_names.h + * + * For privileges which have been added later, we need to use strings in order + * not to break builds of varnish on these platforms. To remain binary + * compatible, we need to silently ignore errors from priv_addset when using + * these strings. + * + * For optimal build and binary forward comatibility, we could use subtractive + * set specs like + * + * basic,!file_link_any,!proc_exec,!proc_fork,!proc_info,!proc_session + * + * but I (Nils) have a preference for making an informed decision about which + * privileges the varnish child should have and which it shouldn't. + * + * Newly introduced privileges should be annotated with their PSARC / commit ID + * (as long as Oracle reveils these :/ ) + * + * SOLARIS PRIVILEGES: Note on accidentally setting the SNOCD flag + * + * When setting privileges, we need to take care not to accidentally set the + * SNOCD flag which will disable core dumps unnecessarily. (see + * https://www.varnish-cache.org/trac/ticket/671 ) + * + * When changing the logic herein, always check with mdb -k. Replace _PID_ with + * the pid of your varnish child, the result should be 0, otherwise a regression + * has been introduced. + * + * > 0t_PID_::pid2proc | ::print proc_t p_flag | >a + * > (<a & 0x10000000)=X + * 0 + * + * (a value of 0x10000000 indicates that SNOCD is set) + * + * NOTE that on Solaris changing the uid will _always_ set SNOCD, so make sure + * you run this test with appropriate privileges, but without proc_setid, so + * varnish won't setuid(), e.g. + * + * pfexec ppriv -e -s A=basic,net_privaddr,sys_resource varnish ... + * + * SOLARIS COREDUMPS with setuid(): See coreadm(1M) - global-setid / proc-setid + * + */ + +/* effective during runtime of the child */ +static inline void +mgt_sandbox_solaris_add_effective(priv_set_t *pset) { + /* PSARC/2009/685 - 8eca52188202 - onnv_132 */ + priv_addset(pset, "net_access"); + /* PSARC/2009/378 - 63678502e95e - onnv_140 */ + priv_addset(pset, "file_read"); + priv_addset(pset, "file_write"); +} + +/* permitted during runtime of the child - for privilege bracketing */ +static inline void +mgt_sandbox_solaris_add_permitted(priv_set_t *pset) +{ + /* for raising limits in cache_waiter_ports.c */ + priv_addset(pset, PRIV_SYS_RESOURCE); +} + +/* effective during mgt_sandbox */ +static inline void +mgt_sandbox_solaris_add_initial(priv_set_t *pset) +{ + /* for setgid/setuid */ + priv_addset(pset, PRIV_PROC_SETID); +} + +/* + * if we are not yet privilege-aware already (ie we have been started + * not-privilege aware wird euid 0), we need to grab any additional privileges + * needed during mgt_standbox, until we reduce to least privileges in + * mgt_sandbox_waive, otherwise we would loose them with setuid() + */ + +static void +mgt_sandbox_init(void) +{ + priv_set_t *priv_all; + + if (! (priv_all = priv_allocset())) { + REPORT(LOG_ERR, + "Child start warning: mgt_sandbox_init - priv_allocset failed: errno=%d (%s)", + errno, strerror(errno)); + return; + } + + priv_emptyset(priv_all); + + mgt_sandbox_solaris_add_effective(priv_all); + mgt_sandbox_solaris_add_permitted(priv_all); + mgt_sandbox_solaris_add_initial(priv_all); + + setppriv(PRIV_ON, PRIV_PERMITTED, priv_all); + setppriv(PRIV_ON, PRIV_EFFECTIVE, priv_all); + setppriv(PRIV_ON, PRIV_INHERITABLE, priv_all); + + priv_freeset(priv_all); +} + +/* + * Waive most privileges in the child + * + * as of onnv_151a, we should end up with: + * + * > ppriv -v #pid of varnish child + * PID: .../varnishd ... + * flags = PRIV_AWARE + * E: file_read,file_write,net_access + * I: none + * P: file_read,file_write,net_access,sys_resource + * L: file_read,file_write,net_access,sys_resource + * + * We should keep sys_resource in P in order to adjust our limits if we need to + */ +static void +mgt_sandbox_waive(void) +{ + priv_set_t *effective, *inheritable, *permitted; + + if (!(effective = priv_allocset()) || + !(inheritable = priv_allocset()) || + !(permitted = priv_allocset())) { + REPORT(LOG_ERR, + "Child start warning: mgt_sandbox_waive - priv_allocset failed: errno=%d (%s)", + errno, strerror(errno)); + return; + } + + priv_emptyset(inheritable); + + priv_emptyset(effective); + mgt_sandbox_solaris_add_effective(effective); + + priv_copyset(effective, permitted); + mgt_sandbox_solaris_add_permitted(permitted); + + /* + * invert the sets and clear privileges such that setppriv will always + * succeed + */ + priv_inverse(inheritable); + priv_inverse(effective); + priv_inverse(permitted); + +#define SETPPRIV(which, set) \ + if (setppriv(PRIV_OFF, which, set)) \ + REPORT(LOG_ERR, \ + "Child start warning: Waiving privileges failed on %s: errno=%d (%s)", \ + #which, errno, strerror(errno)); + + SETPPRIV(PRIV_INHERITABLE, inheritable); + SETPPRIV(PRIV_EFFECTIVE, effective); + SETPPRIV(PRIV_PERMITTED, permitted); + SETPPRIV(PRIV_LIMIT, permitted); +#undef SETPPRIV + + priv_freeset(inheritable); + priv_freeset(effective); +} + +#else /* HAVE_SETPPRIV */ + +static void +mgt_sandbox_init(void) +{} + +static void +mgt_sandbox_waive(void) +{} + +#endif /* HAVE_SETPPRIV */ + +static void +mgt_sandbox_privsep(void) +{ +#ifdef HAVE_SETPPRIV + if (priv_ineffect(PRIV_PROC_SETID)) { + if (getgid() != params->gid) + XXXAZ(setgid(params->gid)); + if (getuid() != params->uid) + XXXAZ(setuid(params->uid)); + } else { + REPORT(LOG_INFO, "Privilege %s missing, will not change uid/gid", PRIV_PROC_SETID); + } +#else if (geteuid() == 0) { XXXAZ(setgid(params->gid)); XXXAZ(setuid(params->uid)); } else { REPORT0(LOG_INFO, "Not running as root, no priv-sep"); } +#endif +} + +void +mgt_sandbox(void) +{ + mgt_sandbox_init(); + mgt_sandbox_privsep(); + /* On Linux >= 2.4, you need to set the dumpable flag to get core dumps after you have done a setuid. */ - #ifdef __linux__ if (prctl(PR_SET_DUMPABLE, 1) != 0) REPORT0(LOG_INFO, "Could not set dumpable bit. Core dumps turned off\n"); #endif -#ifdef HAVE_SETPPRIV - priv_set_t *empty, *minimal; - - if (!(empty = priv_allocset()) || - !(minimal = priv_allocset())) { - REPORT0(LOG_ERR, "priv_allocset_failed"); - } else { - priv_emptyset(empty); - priv_emptyset(minimal); - - /* - * new privilege, - * silently ignore any errors if it doesn't exist - */ - priv_addset(minimal, "net_access"); - priv_addset(minimal, "file_read"); - -#define SETPPRIV(which, set) \ - if (setppriv(PRIV_SET, which, set)) \ - REPORT0(LOG_ERR, \ - "Waiving privileges failed on " #which) - - /* need to set I after P to avoid SNOCD being set */ - SETPPRIV(PRIV_LIMIT, minimal); - SETPPRIV(PRIV_PERMITTED, minimal); /* implies PRIV_EFFECTIVE */ - SETPPRIV(PRIV_INHERITABLE, empty); - - priv_freeset(empty); - priv_freeset(minimal); - } -#endif - + mgt_sandbox_waive(); } -- 1.5.6.5
_______________________________________________ varnish-dev mailing list [email protected] https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev
