Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges
Flavio Leitner writes: > On Tue, Mar 07, 2023 at 02:32:04PM +0100, David Marchand wrote: >> On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole wrote: >> > >> > Open vSwitch generally tries to let the underlying operating system >> > managed the low level details of hardware, for example DMA mapping, >> > bus arbitration, etc. However, when using DPDK, the underlying >> > operating system yields control of many of these details to userspace >> > for management. >> > >> > In the case of some DPDK port drivers, configuring rte_flow or even >> > allocating resources may require access to iopl/ioperm calls, which >> > are guarded by the CAP_SYS_RAWIO privilege on linux systems. These >> > calls are dangerous, and can allow a process to completely compromise >> > a system. However, they are needed in the case of some userspace >> > driver code which manages the hardware (for example, the mlx >> > implementation of backend support for rte_flow). >> > >> > Here, we create an opt-in flag passed to the command line to allow >> > this access. We need to do this before ever accessing the database, >> > because we want to drop all privileges asap, and cannot wait for >> > a connection to the database to be established and functional before >> > dropping. There may be distribution specific ways to do capability >> > management as well (using for example, systemd), but they are not >> > as universal to the vswitchd as a flag. >> > >> > Reviewed-by: Simon Horman >> > Signed-off-by: Aaron Conole >> > --- >> > NEWS | 4 >> > lib/daemon-unix.c | 29 + >> > lib/daemon-windows.c | 3 ++- >> > lib/daemon.c | 2 +- >> > lib/daemon.h | 4 ++-- >> > ovsdb/ovsdb-client.c | 6 +++--- >> > ovsdb/ovsdb-server.c | 4 ++-- >> > tests/test-netflow.c | 2 +- >> > tests/test-sflow.c | 2 +- >> > tests/test-unixctl.c | 2 +- >> > utilities/ovs-ofctl.c | 4 ++-- >> > utilities/ovs-testcontroller.c | 4 ++-- >> > vswitchd/ovs-vswitchd.8.in | 8 >> > vswitchd/ovs-vswitchd.c| 11 ++- >> > 14 files changed, 60 insertions(+), 25 deletions(-) >> > >> > diff --git a/NEWS b/NEWS >> > index 85b3496214..65f35dcdd5 100644 >> > --- a/NEWS >> > +++ b/NEWS >> > @@ -10,6 +10,10 @@ Post-v3.1.0 >> > in order to create OVSDB sockets with access mode of 0770. >> > - QoS: >> > * Added new configuration option 'jitter' for a linux-netem QoS type. >> > + - DPDK: >> > + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started >> > + with the --hw-rawio-access command line option. This allows the >> > + process extra privileges when mapping physical interconnect memory. >> > >> > >> > v3.1.0 - 16 Feb 2023 >> > diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c >> > index 1a7ba427d7..a080facddc 100644 >> > --- a/lib/daemon-unix.c >> > +++ b/lib/daemon-unix.c >> > @@ -88,7 +88,8 @@ static bool switch_user = false; >> > static uid_t uid; >> > static gid_t gid; >> > static char *user = NULL; >> > -static void daemon_become_new_user__(bool access_datapath); >> > +static void daemon_become_new_user__(bool access_datapath, >> > + bool access_hardware_ports); >> > >> > static void check_already_running(void); >> > static int lock_pidfile(FILE *, int command); >> > @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid) >> > * daemonize_complete()) or that it failed to start up (by exiting with a >> > * nonzero exit code). */ >> > void >> > -daemonize_start(bool access_datapath) >> > +daemonize_start(bool access_datapath, bool access_hardware_ports) >> > { >> > assert_single_threaded(); >> > daemonize_fd = -1; >> > >> > if (switch_user) { >> > -daemon_become_new_user__(access_datapath); >> > +daemon_become_new_user__(access_datapath, access_hardware_ports); >> > switch_user = false; >> > } >> > >> > @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void) >> > /* Linux specific implementation of daemon_become_new_user() >> > * using libcap-ng. */ >> > static void >> > -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) >> > +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED, >> > + bool access_hardware_ports OVS_UNUSED) >> > { >> > #if defined __linux__ && HAVE_LIBCAPNG >> > int ret; >> > @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath >> > OVS_UNUSED) >> > ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN) >> >|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW) >> >|| capng_update(CAPNG_ADD, cap_sets, >> > CAP_NET_BROADCAST); >> > +#ifdef DPDK_NETDEV >> > +if (access_hardware_ports && !ret) { >> > +ret = capng_update(CAPNG_ADD,
Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges
David Marchand writes: > On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole wrote: >> >> Open vSwitch generally tries to let the underlying operating system >> managed the low level details of hardware, for example DMA mapping, >> bus arbitration, etc. However, when using DPDK, the underlying >> operating system yields control of many of these details to userspace >> for management. >> >> In the case of some DPDK port drivers, configuring rte_flow or even >> allocating resources may require access to iopl/ioperm calls, which >> are guarded by the CAP_SYS_RAWIO privilege on linux systems. These >> calls are dangerous, and can allow a process to completely compromise >> a system. However, they are needed in the case of some userspace >> driver code which manages the hardware (for example, the mlx >> implementation of backend support for rte_flow). >> >> Here, we create an opt-in flag passed to the command line to allow >> this access. We need to do this before ever accessing the database, >> because we want to drop all privileges asap, and cannot wait for >> a connection to the database to be established and functional before >> dropping. There may be distribution specific ways to do capability >> management as well (using for example, systemd), but they are not >> as universal to the vswitchd as a flag. >> >> Reviewed-by: Simon Horman >> Signed-off-by: Aaron Conole >> --- >> NEWS | 4 >> lib/daemon-unix.c | 29 + >> lib/daemon-windows.c | 3 ++- >> lib/daemon.c | 2 +- >> lib/daemon.h | 4 ++-- >> ovsdb/ovsdb-client.c | 6 +++--- >> ovsdb/ovsdb-server.c | 4 ++-- >> tests/test-netflow.c | 2 +- >> tests/test-sflow.c | 2 +- >> tests/test-unixctl.c | 2 +- >> utilities/ovs-ofctl.c | 4 ++-- >> utilities/ovs-testcontroller.c | 4 ++-- >> vswitchd/ovs-vswitchd.8.in | 8 >> vswitchd/ovs-vswitchd.c| 11 ++- >> 14 files changed, 60 insertions(+), 25 deletions(-) >> >> diff --git a/NEWS b/NEWS >> index 85b3496214..65f35dcdd5 100644 >> --- a/NEWS >> +++ b/NEWS >> @@ -10,6 +10,10 @@ Post-v3.1.0 >> in order to create OVSDB sockets with access mode of 0770. >> - QoS: >> * Added new configuration option 'jitter' for a linux-netem QoS type. >> + - DPDK: >> + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started >> + with the --hw-rawio-access command line option. This allows the >> + process extra privileges when mapping physical interconnect memory. >> >> >> v3.1.0 - 16 Feb 2023 >> diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c >> index 1a7ba427d7..a080facddc 100644 >> --- a/lib/daemon-unix.c >> +++ b/lib/daemon-unix.c >> @@ -88,7 +88,8 @@ static bool switch_user = false; >> static uid_t uid; >> static gid_t gid; >> static char *user = NULL; >> -static void daemon_become_new_user__(bool access_datapath); >> +static void daemon_become_new_user__(bool access_datapath, >> + bool access_hardware_ports); >> >> static void check_already_running(void); >> static int lock_pidfile(FILE *, int command); >> @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid) >> * daemonize_complete()) or that it failed to start up (by exiting with a >> * nonzero exit code). */ >> void >> -daemonize_start(bool access_datapath) >> +daemonize_start(bool access_datapath, bool access_hardware_ports) >> { >> assert_single_threaded(); >> daemonize_fd = -1; >> >> if (switch_user) { >> -daemon_become_new_user__(access_datapath); >> +daemon_become_new_user__(access_datapath, access_hardware_ports); >> switch_user = false; >> } >> >> @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void) >> /* Linux specific implementation of daemon_become_new_user() >> * using libcap-ng. */ >> static void >> -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) >> +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED, >> + bool access_hardware_ports OVS_UNUSED) >> { >> #if defined __linux__ && HAVE_LIBCAPNG >> int ret; >> @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath >> OVS_UNUSED) >> ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN) >>|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW) >>|| capng_update(CAPNG_ADD, cap_sets, >> CAP_NET_BROADCAST); >> +#ifdef DPDK_NETDEV >> +if (access_hardware_ports && !ret) { >> +ret = capng_update(CAPNG_ADD, cap_sets, CAP_SYS_RAWIO); >> +VLOG_INFO("CAP_SYS_RAWIO enabled."); >> +} >> +#else >> +if (access_hardware_ports) { >> +VLOG_WARN("Dropped CAP_SYS_RAWIO request (no >> drivers)."); > > Does it mean we drop
Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges
On Tue, Mar 07, 2023 at 12:24:37PM -0300, Flavio Leitner wrote: > On Tue, Mar 07, 2023 at 02:32:04PM +0100, David Marchand wrote: > > On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole wrote: > > > > > > Open vSwitch generally tries to let the underlying operating system > > > managed the low level details of hardware, for example DMA mapping, > > > bus arbitration, etc. However, when using DPDK, the underlying > > > operating system yields control of many of these details to userspace > > > for management. > > > > > > In the case of some DPDK port drivers, configuring rte_flow or even > > > allocating resources may require access to iopl/ioperm calls, which > > > are guarded by the CAP_SYS_RAWIO privilege on linux systems. These > > > calls are dangerous, and can allow a process to completely compromise > > > a system. However, they are needed in the case of some userspace > > > driver code which manages the hardware (for example, the mlx > > > implementation of backend support for rte_flow). > > > > > > Here, we create an opt-in flag passed to the command line to allow > > > this access. We need to do this before ever accessing the database, > > > because we want to drop all privileges asap, and cannot wait for > > > a connection to the database to be established and functional before > > > dropping. There may be distribution specific ways to do capability > > > management as well (using for example, systemd), but they are not > > > as universal to the vswitchd as a flag. > > > > > > Reviewed-by: Simon Horman > > > Signed-off-by: Aaron Conole > > > --- > > > NEWS | 4 > > > lib/daemon-unix.c | 29 + > > > lib/daemon-windows.c | 3 ++- > > > lib/daemon.c | 2 +- > > > lib/daemon.h | 4 ++-- > > > ovsdb/ovsdb-client.c | 6 +++--- > > > ovsdb/ovsdb-server.c | 4 ++-- > > > tests/test-netflow.c | 2 +- > > > tests/test-sflow.c | 2 +- > > > tests/test-unixctl.c | 2 +- > > > utilities/ovs-ofctl.c | 4 ++-- > > > utilities/ovs-testcontroller.c | 4 ++-- > > > vswitchd/ovs-vswitchd.8.in | 8 > > > vswitchd/ovs-vswitchd.c| 11 ++- > > > 14 files changed, 60 insertions(+), 25 deletions(-) > > > > > > diff --git a/NEWS b/NEWS > > > index 85b3496214..65f35dcdd5 100644 > > > --- a/NEWS > > > +++ b/NEWS > > > @@ -10,6 +10,10 @@ Post-v3.1.0 > > > in order to create OVSDB sockets with access mode of 0770. > > > - QoS: > > > * Added new configuration option 'jitter' for a linux-netem QoS > > > type. > > > + - DPDK: > > > + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started > > > + with the --hw-rawio-access command line option. This allows the > > > + process extra privileges when mapping physical interconnect > > > memory. > > > > > > > > > v3.1.0 - 16 Feb 2023 > > > diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c > > > index 1a7ba427d7..a080facddc 100644 > > > --- a/lib/daemon-unix.c > > > +++ b/lib/daemon-unix.c > > > @@ -88,7 +88,8 @@ static bool switch_user = false; > > > static uid_t uid; > > > static gid_t gid; > > > static char *user = NULL; > > > -static void daemon_become_new_user__(bool access_datapath); > > > +static void daemon_become_new_user__(bool access_datapath, > > > + bool access_hardware_ports); > > > > > > static void check_already_running(void); > > > static int lock_pidfile(FILE *, int command); > > > @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid) > > > * daemonize_complete()) or that it failed to start up (by exiting with a > > > * nonzero exit code). */ > > > void > > > -daemonize_start(bool access_datapath) > > > +daemonize_start(bool access_datapath, bool access_hardware_ports) > > > { > > > assert_single_threaded(); > > > daemonize_fd = -1; > > > > > > if (switch_user) { > > > -daemon_become_new_user__(access_datapath); > > > +daemon_become_new_user__(access_datapath, access_hardware_ports); > > > switch_user = false; > > > } > > > > > > @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void) > > > /* Linux specific implementation of daemon_become_new_user() > > > * using libcap-ng. */ > > > static void > > > -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) > > > +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED, > > > + bool access_hardware_ports OVS_UNUSED) > > > { > > > #if defined __linux__ && HAVE_LIBCAPNG > > > int ret; > > > @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath > > > OVS_UNUSED) > > > ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN) > > >|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW) > > >|| capng_update(CAPNG_ADD, cap_sets, > > >
Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges
On Tue, Mar 07, 2023 at 02:32:04PM +0100, David Marchand wrote: > On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole wrote: > > > > Open vSwitch generally tries to let the underlying operating system > > managed the low level details of hardware, for example DMA mapping, > > bus arbitration, etc. However, when using DPDK, the underlying > > operating system yields control of many of these details to userspace > > for management. > > > > In the case of some DPDK port drivers, configuring rte_flow or even > > allocating resources may require access to iopl/ioperm calls, which > > are guarded by the CAP_SYS_RAWIO privilege on linux systems. These > > calls are dangerous, and can allow a process to completely compromise > > a system. However, they are needed in the case of some userspace > > driver code which manages the hardware (for example, the mlx > > implementation of backend support for rte_flow). > > > > Here, we create an opt-in flag passed to the command line to allow > > this access. We need to do this before ever accessing the database, > > because we want to drop all privileges asap, and cannot wait for > > a connection to the database to be established and functional before > > dropping. There may be distribution specific ways to do capability > > management as well (using for example, systemd), but they are not > > as universal to the vswitchd as a flag. > > > > Reviewed-by: Simon Horman > > Signed-off-by: Aaron Conole > > --- > > NEWS | 4 > > lib/daemon-unix.c | 29 + > > lib/daemon-windows.c | 3 ++- > > lib/daemon.c | 2 +- > > lib/daemon.h | 4 ++-- > > ovsdb/ovsdb-client.c | 6 +++--- > > ovsdb/ovsdb-server.c | 4 ++-- > > tests/test-netflow.c | 2 +- > > tests/test-sflow.c | 2 +- > > tests/test-unixctl.c | 2 +- > > utilities/ovs-ofctl.c | 4 ++-- > > utilities/ovs-testcontroller.c | 4 ++-- > > vswitchd/ovs-vswitchd.8.in | 8 > > vswitchd/ovs-vswitchd.c| 11 ++- > > 14 files changed, 60 insertions(+), 25 deletions(-) > > > > diff --git a/NEWS b/NEWS > > index 85b3496214..65f35dcdd5 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -10,6 +10,10 @@ Post-v3.1.0 > > in order to create OVSDB sockets with access mode of 0770. > > - QoS: > > * Added new configuration option 'jitter' for a linux-netem QoS type. > > + - DPDK: > > + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started > > + with the --hw-rawio-access command line option. This allows the > > + process extra privileges when mapping physical interconnect memory. > > > > > > v3.1.0 - 16 Feb 2023 > > diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c > > index 1a7ba427d7..a080facddc 100644 > > --- a/lib/daemon-unix.c > > +++ b/lib/daemon-unix.c > > @@ -88,7 +88,8 @@ static bool switch_user = false; > > static uid_t uid; > > static gid_t gid; > > static char *user = NULL; > > -static void daemon_become_new_user__(bool access_datapath); > > +static void daemon_become_new_user__(bool access_datapath, > > + bool access_hardware_ports); > > > > static void check_already_running(void); > > static int lock_pidfile(FILE *, int command); > > @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid) > > * daemonize_complete()) or that it failed to start up (by exiting with a > > * nonzero exit code). */ > > void > > -daemonize_start(bool access_datapath) > > +daemonize_start(bool access_datapath, bool access_hardware_ports) > > { > > assert_single_threaded(); > > daemonize_fd = -1; > > > > if (switch_user) { > > -daemon_become_new_user__(access_datapath); > > +daemon_become_new_user__(access_datapath, access_hardware_ports); > > switch_user = false; > > } > > > > @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void) > > /* Linux specific implementation of daemon_become_new_user() > > * using libcap-ng. */ > > static void > > -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) > > +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED, > > + bool access_hardware_ports OVS_UNUSED) > > { > > #if defined __linux__ && HAVE_LIBCAPNG > > int ret; > > @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath > > OVS_UNUSED) > > ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN) > >|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW) > >|| capng_update(CAPNG_ADD, cap_sets, > > CAP_NET_BROADCAST); > > +#ifdef DPDK_NETDEV > > +if (access_hardware_ports && !ret) { > > +ret = capng_update(CAPNG_ADD, cap_sets, CAP_SYS_RAWIO); > > +VLOG_INFO("CAP_SYS_RAWIO enabled."); Perhaps "The Linux capability CAP_SYS_RAWIO is
Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges
On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole wrote: > > Open vSwitch generally tries to let the underlying operating system > managed the low level details of hardware, for example DMA mapping, > bus arbitration, etc. However, when using DPDK, the underlying > operating system yields control of many of these details to userspace > for management. > > In the case of some DPDK port drivers, configuring rte_flow or even > allocating resources may require access to iopl/ioperm calls, which > are guarded by the CAP_SYS_RAWIO privilege on linux systems. These > calls are dangerous, and can allow a process to completely compromise > a system. However, they are needed in the case of some userspace > driver code which manages the hardware (for example, the mlx > implementation of backend support for rte_flow). > > Here, we create an opt-in flag passed to the command line to allow > this access. We need to do this before ever accessing the database, > because we want to drop all privileges asap, and cannot wait for > a connection to the database to be established and functional before > dropping. There may be distribution specific ways to do capability > management as well (using for example, systemd), but they are not > as universal to the vswitchd as a flag. > > Reviewed-by: Simon Horman > Signed-off-by: Aaron Conole > --- > NEWS | 4 > lib/daemon-unix.c | 29 + > lib/daemon-windows.c | 3 ++- > lib/daemon.c | 2 +- > lib/daemon.h | 4 ++-- > ovsdb/ovsdb-client.c | 6 +++--- > ovsdb/ovsdb-server.c | 4 ++-- > tests/test-netflow.c | 2 +- > tests/test-sflow.c | 2 +- > tests/test-unixctl.c | 2 +- > utilities/ovs-ofctl.c | 4 ++-- > utilities/ovs-testcontroller.c | 4 ++-- > vswitchd/ovs-vswitchd.8.in | 8 > vswitchd/ovs-vswitchd.c| 11 ++- > 14 files changed, 60 insertions(+), 25 deletions(-) > > diff --git a/NEWS b/NEWS > index 85b3496214..65f35dcdd5 100644 > --- a/NEWS > +++ b/NEWS > @@ -10,6 +10,10 @@ Post-v3.1.0 > in order to create OVSDB sockets with access mode of 0770. > - QoS: > * Added new configuration option 'jitter' for a linux-netem QoS type. > + - DPDK: > + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started > + with the --hw-rawio-access command line option. This allows the > + process extra privileges when mapping physical interconnect memory. > > > v3.1.0 - 16 Feb 2023 > diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c > index 1a7ba427d7..a080facddc 100644 > --- a/lib/daemon-unix.c > +++ b/lib/daemon-unix.c > @@ -88,7 +88,8 @@ static bool switch_user = false; > static uid_t uid; > static gid_t gid; > static char *user = NULL; > -static void daemon_become_new_user__(bool access_datapath); > +static void daemon_become_new_user__(bool access_datapath, > + bool access_hardware_ports); > > static void check_already_running(void); > static int lock_pidfile(FILE *, int command); > @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid) > * daemonize_complete()) or that it failed to start up (by exiting with a > * nonzero exit code). */ > void > -daemonize_start(bool access_datapath) > +daemonize_start(bool access_datapath, bool access_hardware_ports) > { > assert_single_threaded(); > daemonize_fd = -1; > > if (switch_user) { > -daemon_become_new_user__(access_datapath); > +daemon_become_new_user__(access_datapath, access_hardware_ports); > switch_user = false; > } > > @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void) > /* Linux specific implementation of daemon_become_new_user() > * using libcap-ng. */ > static void > -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) > +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED, > + bool access_hardware_ports OVS_UNUSED) > { > #if defined __linux__ && HAVE_LIBCAPNG > int ret; > @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath > OVS_UNUSED) > ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN) >|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW) >|| capng_update(CAPNG_ADD, cap_sets, > CAP_NET_BROADCAST); > +#ifdef DPDK_NETDEV > +if (access_hardware_ports && !ret) { > +ret = capng_update(CAPNG_ADD, cap_sets, CAP_SYS_RAWIO); > +VLOG_INFO("CAP_SYS_RAWIO enabled."); > +} > +#else > +if (access_hardware_ports) { > +VLOG_WARN("Dropped CAP_SYS_RAWIO request (no drivers)."); Does it mean we drop the capability? or do we drop the drop? I don't really have a better wording but the current log had me reading it a few times. > +
[ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges
Open vSwitch generally tries to let the underlying operating system managed the low level details of hardware, for example DMA mapping, bus arbitration, etc. However, when using DPDK, the underlying operating system yields control of many of these details to userspace for management. In the case of some DPDK port drivers, configuring rte_flow or even allocating resources may require access to iopl/ioperm calls, which are guarded by the CAP_SYS_RAWIO privilege on linux systems. These calls are dangerous, and can allow a process to completely compromise a system. However, they are needed in the case of some userspace driver code which manages the hardware (for example, the mlx implementation of backend support for rte_flow). Here, we create an opt-in flag passed to the command line to allow this access. We need to do this before ever accessing the database, because we want to drop all privileges asap, and cannot wait for a connection to the database to be established and functional before dropping. There may be distribution specific ways to do capability management as well (using for example, systemd), but they are not as universal to the vswitchd as a flag. Reviewed-by: Simon Horman Signed-off-by: Aaron Conole --- NEWS | 4 lib/daemon-unix.c | 29 + lib/daemon-windows.c | 3 ++- lib/daemon.c | 2 +- lib/daemon.h | 4 ++-- ovsdb/ovsdb-client.c | 6 +++--- ovsdb/ovsdb-server.c | 4 ++-- tests/test-netflow.c | 2 +- tests/test-sflow.c | 2 +- tests/test-unixctl.c | 2 +- utilities/ovs-ofctl.c | 4 ++-- utilities/ovs-testcontroller.c | 4 ++-- vswitchd/ovs-vswitchd.8.in | 8 vswitchd/ovs-vswitchd.c| 11 ++- 14 files changed, 60 insertions(+), 25 deletions(-) diff --git a/NEWS b/NEWS index 85b3496214..65f35dcdd5 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,10 @@ Post-v3.1.0 in order to create OVSDB sockets with access mode of 0770. - QoS: * Added new configuration option 'jitter' for a linux-netem QoS type. + - DPDK: + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started + with the --hw-rawio-access command line option. This allows the + process extra privileges when mapping physical interconnect memory. v3.1.0 - 16 Feb 2023 diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c index 1a7ba427d7..a080facddc 100644 --- a/lib/daemon-unix.c +++ b/lib/daemon-unix.c @@ -88,7 +88,8 @@ static bool switch_user = false; static uid_t uid; static gid_t gid; static char *user = NULL; -static void daemon_become_new_user__(bool access_datapath); +static void daemon_become_new_user__(bool access_datapath, + bool access_hardware_ports); static void check_already_running(void); static int lock_pidfile(FILE *, int command); @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid) * daemonize_complete()) or that it failed to start up (by exiting with a * nonzero exit code). */ void -daemonize_start(bool access_datapath) +daemonize_start(bool access_datapath, bool access_hardware_ports) { assert_single_threaded(); daemonize_fd = -1; if (switch_user) { -daemon_become_new_user__(access_datapath); +daemon_become_new_user__(access_datapath, access_hardware_ports); switch_user = false; } @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void) /* Linux specific implementation of daemon_become_new_user() * using libcap-ng. */ static void -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED, + bool access_hardware_ports OVS_UNUSED) { #if defined __linux__ && HAVE_LIBCAPNG int ret; @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN) || capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW) || capng_update(CAPNG_ADD, cap_sets, CAP_NET_BROADCAST); +#ifdef DPDK_NETDEV +if (access_hardware_ports && !ret) { +ret = capng_update(CAPNG_ADD, cap_sets, CAP_SYS_RAWIO); +VLOG_INFO("CAP_SYS_RAWIO enabled."); +} +#else +if (access_hardware_ports) { +VLOG_WARN("Dropped CAP_SYS_RAWIO request (no drivers)."); +} +#endif } } else { ret = -1; @@ -854,7 +866,7 @@ daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) } static void -daemon_become_new_user__(bool access_datapath) +daemon_become_new_user__(bool access_datapath, bool access_hardware_ports) { /* If vlog file has been created, change its owner to the non-root user * as specifed by the --user option.