Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges

2023-03-07 Thread Aaron Conole
Flavio Leitner  writes:

> On Tue, Mar 07, 2023 at 02:32:04PM +0100, David Marchand wrote:
>> On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole  wrote:
>> >
>> > Open vSwitch generally tries to let the underlying operating system
>> > managed the low level details of hardware, for example DMA mapping,
>> > bus arbitration, etc.  However, when using DPDK, the underlying
>> > operating system yields control of many of these details to userspace
>> > for management.
>> >
>> > In the case of some DPDK port drivers, configuring rte_flow or even
>> > allocating resources may require access to iopl/ioperm calls, which
>> > are guarded by the CAP_SYS_RAWIO privilege on linux systems.  These
>> > calls are dangerous, and can allow a process to completely compromise
>> > a system.  However, they are needed in the case of some userspace
>> > driver code which manages the hardware (for example, the mlx
>> > implementation of backend support for rte_flow).
>> >
>> > Here, we create an opt-in flag passed to the command line to allow
>> > this access.  We need to do this before ever accessing the database,
>> > because we want to drop all privileges asap, and cannot wait for
>> > a connection to the database to be established and functional before
>> > dropping.  There may be distribution specific ways to do capability
>> > management as well (using for example, systemd), but they are not
>> > as universal to the vswitchd as a flag.
>> >
>> > Reviewed-by: Simon Horman 
>> > Signed-off-by: Aaron Conole 
>> > ---
>> >  NEWS   |  4 
>> >  lib/daemon-unix.c  | 29 +
>> >  lib/daemon-windows.c   |  3 ++-
>> >  lib/daemon.c   |  2 +-
>> >  lib/daemon.h   |  4 ++--
>> >  ovsdb/ovsdb-client.c   |  6 +++---
>> >  ovsdb/ovsdb-server.c   |  4 ++--
>> >  tests/test-netflow.c   |  2 +-
>> >  tests/test-sflow.c |  2 +-
>> >  tests/test-unixctl.c   |  2 +-
>> >  utilities/ovs-ofctl.c  |  4 ++--
>> >  utilities/ovs-testcontroller.c |  4 ++--
>> >  vswitchd/ovs-vswitchd.8.in |  8 
>> >  vswitchd/ovs-vswitchd.c| 11 ++-
>> >  14 files changed, 60 insertions(+), 25 deletions(-)
>> >
>> > diff --git a/NEWS b/NEWS
>> > index 85b3496214..65f35dcdd5 100644
>> > --- a/NEWS
>> > +++ b/NEWS
>> > @@ -10,6 +10,10 @@ Post-v3.1.0
>> > in order to create OVSDB sockets with access mode of 0770.
>> > - QoS:
>> >   * Added new configuration option 'jitter' for a linux-netem QoS type.
>> > +   - DPDK:
>> > + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started
>> > +   with the --hw-rawio-access command line option.  This allows the
>> > +   process extra privileges when mapping physical interconnect memory.
>> >
>> >
>> >  v3.1.0 - 16 Feb 2023
>> > diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c
>> > index 1a7ba427d7..a080facddc 100644
>> > --- a/lib/daemon-unix.c
>> > +++ b/lib/daemon-unix.c
>> > @@ -88,7 +88,8 @@ static bool switch_user = false;
>> >  static uid_t uid;
>> >  static gid_t gid;
>> >  static char *user = NULL;
>> > -static void daemon_become_new_user__(bool access_datapath);
>> > +static void daemon_become_new_user__(bool access_datapath,
>> > + bool access_hardware_ports);
>> >
>> >  static void check_already_running(void);
>> >  static int lock_pidfile(FILE *, int command);
>> > @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid)
>> >   * daemonize_complete()) or that it failed to start up (by exiting with a
>> >   * nonzero exit code). */
>> >  void
>> > -daemonize_start(bool access_datapath)
>> > +daemonize_start(bool access_datapath, bool access_hardware_ports)
>> >  {
>> >  assert_single_threaded();
>> >  daemonize_fd = -1;
>> >
>> >  if (switch_user) {
>> > -daemon_become_new_user__(access_datapath);
>> > +daemon_become_new_user__(access_datapath, access_hardware_ports);
>> >  switch_user = false;
>> >  }
>> >
>> > @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void)
>> >  /* Linux specific implementation of daemon_become_new_user()
>> >   * using libcap-ng.   */
>> >  static void
>> > -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
>> > +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED,
>> > + bool access_hardware_ports OVS_UNUSED)
>> >  {
>> >  #if defined __linux__ &&  HAVE_LIBCAPNG
>> >  int ret;
>> > @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath 
>> > OVS_UNUSED)
>> >  ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN)
>> >|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW)
>> >|| capng_update(CAPNG_ADD, cap_sets, 
>> > CAP_NET_BROADCAST);
>> > +#ifdef DPDK_NETDEV
>> > +if (access_hardware_ports && !ret) {
>> > +ret = capng_update(CAPNG_ADD, 

Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges

2023-03-07 Thread Aaron Conole
David Marchand  writes:

> On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole  wrote:
>>
>> Open vSwitch generally tries to let the underlying operating system
>> managed the low level details of hardware, for example DMA mapping,
>> bus arbitration, etc.  However, when using DPDK, the underlying
>> operating system yields control of many of these details to userspace
>> for management.
>>
>> In the case of some DPDK port drivers, configuring rte_flow or even
>> allocating resources may require access to iopl/ioperm calls, which
>> are guarded by the CAP_SYS_RAWIO privilege on linux systems.  These
>> calls are dangerous, and can allow a process to completely compromise
>> a system.  However, they are needed in the case of some userspace
>> driver code which manages the hardware (for example, the mlx
>> implementation of backend support for rte_flow).
>>
>> Here, we create an opt-in flag passed to the command line to allow
>> this access.  We need to do this before ever accessing the database,
>> because we want to drop all privileges asap, and cannot wait for
>> a connection to the database to be established and functional before
>> dropping.  There may be distribution specific ways to do capability
>> management as well (using for example, systemd), but they are not
>> as universal to the vswitchd as a flag.
>>
>> Reviewed-by: Simon Horman 
>> Signed-off-by: Aaron Conole 
>> ---
>>  NEWS   |  4 
>>  lib/daemon-unix.c  | 29 +
>>  lib/daemon-windows.c   |  3 ++-
>>  lib/daemon.c   |  2 +-
>>  lib/daemon.h   |  4 ++--
>>  ovsdb/ovsdb-client.c   |  6 +++---
>>  ovsdb/ovsdb-server.c   |  4 ++--
>>  tests/test-netflow.c   |  2 +-
>>  tests/test-sflow.c |  2 +-
>>  tests/test-unixctl.c   |  2 +-
>>  utilities/ovs-ofctl.c  |  4 ++--
>>  utilities/ovs-testcontroller.c |  4 ++--
>>  vswitchd/ovs-vswitchd.8.in |  8 
>>  vswitchd/ovs-vswitchd.c| 11 ++-
>>  14 files changed, 60 insertions(+), 25 deletions(-)
>>
>> diff --git a/NEWS b/NEWS
>> index 85b3496214..65f35dcdd5 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -10,6 +10,10 @@ Post-v3.1.0
>> in order to create OVSDB sockets with access mode of 0770.
>> - QoS:
>>   * Added new configuration option 'jitter' for a linux-netem QoS type.
>> +   - DPDK:
>> + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started
>> +   with the --hw-rawio-access command line option.  This allows the
>> +   process extra privileges when mapping physical interconnect memory.
>>
>>
>>  v3.1.0 - 16 Feb 2023
>> diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c
>> index 1a7ba427d7..a080facddc 100644
>> --- a/lib/daemon-unix.c
>> +++ b/lib/daemon-unix.c
>> @@ -88,7 +88,8 @@ static bool switch_user = false;
>>  static uid_t uid;
>>  static gid_t gid;
>>  static char *user = NULL;
>> -static void daemon_become_new_user__(bool access_datapath);
>> +static void daemon_become_new_user__(bool access_datapath,
>> + bool access_hardware_ports);
>>
>>  static void check_already_running(void);
>>  static int lock_pidfile(FILE *, int command);
>> @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid)
>>   * daemonize_complete()) or that it failed to start up (by exiting with a
>>   * nonzero exit code). */
>>  void
>> -daemonize_start(bool access_datapath)
>> +daemonize_start(bool access_datapath, bool access_hardware_ports)
>>  {
>>  assert_single_threaded();
>>  daemonize_fd = -1;
>>
>>  if (switch_user) {
>> -daemon_become_new_user__(access_datapath);
>> +daemon_become_new_user__(access_datapath, access_hardware_ports);
>>  switch_user = false;
>>  }
>>
>> @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void)
>>  /* Linux specific implementation of daemon_become_new_user()
>>   * using libcap-ng.   */
>>  static void
>> -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
>> +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED,
>> + bool access_hardware_ports OVS_UNUSED)
>>  {
>>  #if defined __linux__ &&  HAVE_LIBCAPNG
>>  int ret;
>> @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath 
>> OVS_UNUSED)
>>  ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN)
>>|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW)
>>|| capng_update(CAPNG_ADD, cap_sets, 
>> CAP_NET_BROADCAST);
>> +#ifdef DPDK_NETDEV
>> +if (access_hardware_ports && !ret) {
>> +ret = capng_update(CAPNG_ADD, cap_sets, CAP_SYS_RAWIO);
>> +VLOG_INFO("CAP_SYS_RAWIO enabled.");
>> +}
>> +#else
>> +if (access_hardware_ports) {
>> +VLOG_WARN("Dropped CAP_SYS_RAWIO request (no 
>> drivers).");
>
> Does it mean we drop 

Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges

2023-03-07 Thread Flavio Leitner
On Tue, Mar 07, 2023 at 12:24:37PM -0300, Flavio Leitner wrote:
> On Tue, Mar 07, 2023 at 02:32:04PM +0100, David Marchand wrote:
> > On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole  wrote:
> > >
> > > Open vSwitch generally tries to let the underlying operating system
> > > managed the low level details of hardware, for example DMA mapping,
> > > bus arbitration, etc.  However, when using DPDK, the underlying
> > > operating system yields control of many of these details to userspace
> > > for management.
> > >
> > > In the case of some DPDK port drivers, configuring rte_flow or even
> > > allocating resources may require access to iopl/ioperm calls, which
> > > are guarded by the CAP_SYS_RAWIO privilege on linux systems.  These
> > > calls are dangerous, and can allow a process to completely compromise
> > > a system.  However, they are needed in the case of some userspace
> > > driver code which manages the hardware (for example, the mlx
> > > implementation of backend support for rte_flow).
> > >
> > > Here, we create an opt-in flag passed to the command line to allow
> > > this access.  We need to do this before ever accessing the database,
> > > because we want to drop all privileges asap, and cannot wait for
> > > a connection to the database to be established and functional before
> > > dropping.  There may be distribution specific ways to do capability
> > > management as well (using for example, systemd), but they are not
> > > as universal to the vswitchd as a flag.
> > >
> > > Reviewed-by: Simon Horman 
> > > Signed-off-by: Aaron Conole 
> > > ---
> > >  NEWS   |  4 
> > >  lib/daemon-unix.c  | 29 +
> > >  lib/daemon-windows.c   |  3 ++-
> > >  lib/daemon.c   |  2 +-
> > >  lib/daemon.h   |  4 ++--
> > >  ovsdb/ovsdb-client.c   |  6 +++---
> > >  ovsdb/ovsdb-server.c   |  4 ++--
> > >  tests/test-netflow.c   |  2 +-
> > >  tests/test-sflow.c |  2 +-
> > >  tests/test-unixctl.c   |  2 +-
> > >  utilities/ovs-ofctl.c  |  4 ++--
> > >  utilities/ovs-testcontroller.c |  4 ++--
> > >  vswitchd/ovs-vswitchd.8.in |  8 
> > >  vswitchd/ovs-vswitchd.c| 11 ++-
> > >  14 files changed, 60 insertions(+), 25 deletions(-)
> > >
> > > diff --git a/NEWS b/NEWS
> > > index 85b3496214..65f35dcdd5 100644
> > > --- a/NEWS
> > > +++ b/NEWS
> > > @@ -10,6 +10,10 @@ Post-v3.1.0
> > > in order to create OVSDB sockets with access mode of 0770.
> > > - QoS:
> > >   * Added new configuration option 'jitter' for a linux-netem QoS 
> > > type.
> > > +   - DPDK:
> > > + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started
> > > +   with the --hw-rawio-access command line option.  This allows the
> > > +   process extra privileges when mapping physical interconnect 
> > > memory.
> > >
> > >
> > >  v3.1.0 - 16 Feb 2023
> > > diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c
> > > index 1a7ba427d7..a080facddc 100644
> > > --- a/lib/daemon-unix.c
> > > +++ b/lib/daemon-unix.c
> > > @@ -88,7 +88,8 @@ static bool switch_user = false;
> > >  static uid_t uid;
> > >  static gid_t gid;
> > >  static char *user = NULL;
> > > -static void daemon_become_new_user__(bool access_datapath);
> > > +static void daemon_become_new_user__(bool access_datapath,
> > > + bool access_hardware_ports);
> > >
> > >  static void check_already_running(void);
> > >  static int lock_pidfile(FILE *, int command);
> > > @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid)
> > >   * daemonize_complete()) or that it failed to start up (by exiting with a
> > >   * nonzero exit code). */
> > >  void
> > > -daemonize_start(bool access_datapath)
> > > +daemonize_start(bool access_datapath, bool access_hardware_ports)
> > >  {
> > >  assert_single_threaded();
> > >  daemonize_fd = -1;
> > >
> > >  if (switch_user) {
> > > -daemon_become_new_user__(access_datapath);
> > > +daemon_become_new_user__(access_datapath, access_hardware_ports);
> > >  switch_user = false;
> > >  }
> > >
> > > @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void)
> > >  /* Linux specific implementation of daemon_become_new_user()
> > >   * using libcap-ng.   */
> > >  static void
> > > -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
> > > +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED,
> > > + bool access_hardware_ports OVS_UNUSED)
> > >  {
> > >  #if defined __linux__ &&  HAVE_LIBCAPNG
> > >  int ret;
> > > @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath 
> > > OVS_UNUSED)
> > >  ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN)
> > >|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW)
> > >|| capng_update(CAPNG_ADD, cap_sets, 
> > > 

Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges

2023-03-07 Thread Flavio Leitner
On Tue, Mar 07, 2023 at 02:32:04PM +0100, David Marchand wrote:
> On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole  wrote:
> >
> > Open vSwitch generally tries to let the underlying operating system
> > managed the low level details of hardware, for example DMA mapping,
> > bus arbitration, etc.  However, when using DPDK, the underlying
> > operating system yields control of many of these details to userspace
> > for management.
> >
> > In the case of some DPDK port drivers, configuring rte_flow or even
> > allocating resources may require access to iopl/ioperm calls, which
> > are guarded by the CAP_SYS_RAWIO privilege on linux systems.  These
> > calls are dangerous, and can allow a process to completely compromise
> > a system.  However, they are needed in the case of some userspace
> > driver code which manages the hardware (for example, the mlx
> > implementation of backend support for rte_flow).
> >
> > Here, we create an opt-in flag passed to the command line to allow
> > this access.  We need to do this before ever accessing the database,
> > because we want to drop all privileges asap, and cannot wait for
> > a connection to the database to be established and functional before
> > dropping.  There may be distribution specific ways to do capability
> > management as well (using for example, systemd), but they are not
> > as universal to the vswitchd as a flag.
> >
> > Reviewed-by: Simon Horman 
> > Signed-off-by: Aaron Conole 
> > ---
> >  NEWS   |  4 
> >  lib/daemon-unix.c  | 29 +
> >  lib/daemon-windows.c   |  3 ++-
> >  lib/daemon.c   |  2 +-
> >  lib/daemon.h   |  4 ++--
> >  ovsdb/ovsdb-client.c   |  6 +++---
> >  ovsdb/ovsdb-server.c   |  4 ++--
> >  tests/test-netflow.c   |  2 +-
> >  tests/test-sflow.c |  2 +-
> >  tests/test-unixctl.c   |  2 +-
> >  utilities/ovs-ofctl.c  |  4 ++--
> >  utilities/ovs-testcontroller.c |  4 ++--
> >  vswitchd/ovs-vswitchd.8.in |  8 
> >  vswitchd/ovs-vswitchd.c| 11 ++-
> >  14 files changed, 60 insertions(+), 25 deletions(-)
> >
> > diff --git a/NEWS b/NEWS
> > index 85b3496214..65f35dcdd5 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -10,6 +10,10 @@ Post-v3.1.0
> > in order to create OVSDB sockets with access mode of 0770.
> > - QoS:
> >   * Added new configuration option 'jitter' for a linux-netem QoS type.
> > +   - DPDK:
> > + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started
> > +   with the --hw-rawio-access command line option.  This allows the
> > +   process extra privileges when mapping physical interconnect memory.
> >
> >
> >  v3.1.0 - 16 Feb 2023
> > diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c
> > index 1a7ba427d7..a080facddc 100644
> > --- a/lib/daemon-unix.c
> > +++ b/lib/daemon-unix.c
> > @@ -88,7 +88,8 @@ static bool switch_user = false;
> >  static uid_t uid;
> >  static gid_t gid;
> >  static char *user = NULL;
> > -static void daemon_become_new_user__(bool access_datapath);
> > +static void daemon_become_new_user__(bool access_datapath,
> > + bool access_hardware_ports);
> >
> >  static void check_already_running(void);
> >  static int lock_pidfile(FILE *, int command);
> > @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid)
> >   * daemonize_complete()) or that it failed to start up (by exiting with a
> >   * nonzero exit code). */
> >  void
> > -daemonize_start(bool access_datapath)
> > +daemonize_start(bool access_datapath, bool access_hardware_ports)
> >  {
> >  assert_single_threaded();
> >  daemonize_fd = -1;
> >
> >  if (switch_user) {
> > -daemon_become_new_user__(access_datapath);
> > +daemon_become_new_user__(access_datapath, access_hardware_ports);
> >  switch_user = false;
> >  }
> >
> > @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void)
> >  /* Linux specific implementation of daemon_become_new_user()
> >   * using libcap-ng.   */
> >  static void
> > -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
> > +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED,
> > + bool access_hardware_ports OVS_UNUSED)
> >  {
> >  #if defined __linux__ &&  HAVE_LIBCAPNG
> >  int ret;
> > @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath 
> > OVS_UNUSED)
> >  ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN)
> >|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW)
> >|| capng_update(CAPNG_ADD, cap_sets, 
> > CAP_NET_BROADCAST);
> > +#ifdef DPDK_NETDEV
> > +if (access_hardware_ports && !ret) {
> > +ret = capng_update(CAPNG_ADD, cap_sets, CAP_SYS_RAWIO);
> > +VLOG_INFO("CAP_SYS_RAWIO enabled.");

Perhaps "The Linux capability CAP_SYS_RAWIO is 

Re: [ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges

2023-03-07 Thread David Marchand
On Tue, Mar 7, 2023 at 2:06 PM Aaron Conole  wrote:
>
> Open vSwitch generally tries to let the underlying operating system
> managed the low level details of hardware, for example DMA mapping,
> bus arbitration, etc.  However, when using DPDK, the underlying
> operating system yields control of many of these details to userspace
> for management.
>
> In the case of some DPDK port drivers, configuring rte_flow or even
> allocating resources may require access to iopl/ioperm calls, which
> are guarded by the CAP_SYS_RAWIO privilege on linux systems.  These
> calls are dangerous, and can allow a process to completely compromise
> a system.  However, they are needed in the case of some userspace
> driver code which manages the hardware (for example, the mlx
> implementation of backend support for rte_flow).
>
> Here, we create an opt-in flag passed to the command line to allow
> this access.  We need to do this before ever accessing the database,
> because we want to drop all privileges asap, and cannot wait for
> a connection to the database to be established and functional before
> dropping.  There may be distribution specific ways to do capability
> management as well (using for example, systemd), but they are not
> as universal to the vswitchd as a flag.
>
> Reviewed-by: Simon Horman 
> Signed-off-by: Aaron Conole 
> ---
>  NEWS   |  4 
>  lib/daemon-unix.c  | 29 +
>  lib/daemon-windows.c   |  3 ++-
>  lib/daemon.c   |  2 +-
>  lib/daemon.h   |  4 ++--
>  ovsdb/ovsdb-client.c   |  6 +++---
>  ovsdb/ovsdb-server.c   |  4 ++--
>  tests/test-netflow.c   |  2 +-
>  tests/test-sflow.c |  2 +-
>  tests/test-unixctl.c   |  2 +-
>  utilities/ovs-ofctl.c  |  4 ++--
>  utilities/ovs-testcontroller.c |  4 ++--
>  vswitchd/ovs-vswitchd.8.in |  8 
>  vswitchd/ovs-vswitchd.c| 11 ++-
>  14 files changed, 60 insertions(+), 25 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 85b3496214..65f35dcdd5 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -10,6 +10,10 @@ Post-v3.1.0
> in order to create OVSDB sockets with access mode of 0770.
> - QoS:
>   * Added new configuration option 'jitter' for a linux-netem QoS type.
> +   - DPDK:
> + * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started
> +   with the --hw-rawio-access command line option.  This allows the
> +   process extra privileges when mapping physical interconnect memory.
>
>
>  v3.1.0 - 16 Feb 2023
> diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c
> index 1a7ba427d7..a080facddc 100644
> --- a/lib/daemon-unix.c
> +++ b/lib/daemon-unix.c
> @@ -88,7 +88,8 @@ static bool switch_user = false;
>  static uid_t uid;
>  static gid_t gid;
>  static char *user = NULL;
> -static void daemon_become_new_user__(bool access_datapath);
> +static void daemon_become_new_user__(bool access_datapath,
> + bool access_hardware_ports);
>
>  static void check_already_running(void);
>  static int lock_pidfile(FILE *, int command);
> @@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid)
>   * daemonize_complete()) or that it failed to start up (by exiting with a
>   * nonzero exit code). */
>  void
> -daemonize_start(bool access_datapath)
> +daemonize_start(bool access_datapath, bool access_hardware_ports)
>  {
>  assert_single_threaded();
>  daemonize_fd = -1;
>
>  if (switch_user) {
> -daemon_become_new_user__(access_datapath);
> +daemon_become_new_user__(access_datapath, access_hardware_ports);
>  switch_user = false;
>  }
>
> @@ -807,7 +808,8 @@ daemon_become_new_user_unix(void)
>  /* Linux specific implementation of daemon_become_new_user()
>   * using libcap-ng.   */
>  static void
> -daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
> +daemon_become_new_user_linux(bool access_datapath OVS_UNUSED,
> + bool access_hardware_ports OVS_UNUSED)
>  {
>  #if defined __linux__ &&  HAVE_LIBCAPNG
>  int ret;
> @@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath 
> OVS_UNUSED)
>  ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN)
>|| capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW)
>|| capng_update(CAPNG_ADD, cap_sets, 
> CAP_NET_BROADCAST);
> +#ifdef DPDK_NETDEV
> +if (access_hardware_ports && !ret) {
> +ret = capng_update(CAPNG_ADD, cap_sets, CAP_SYS_RAWIO);
> +VLOG_INFO("CAP_SYS_RAWIO enabled.");
> +}
> +#else
> +if (access_hardware_ports) {
> +VLOG_WARN("Dropped CAP_SYS_RAWIO request (no drivers).");

Does it mean we drop the capability? or do we drop the drop?
I don't really have a better wording but the current log had me
reading it a few times.


> +   

[ovs-dev] [PATCH v2] dpdk: Allow retaining CAP_SYS_RAWIO privileges

2023-03-07 Thread Aaron Conole
Open vSwitch generally tries to let the underlying operating system
managed the low level details of hardware, for example DMA mapping,
bus arbitration, etc.  However, when using DPDK, the underlying
operating system yields control of many of these details to userspace
for management.

In the case of some DPDK port drivers, configuring rte_flow or even
allocating resources may require access to iopl/ioperm calls, which
are guarded by the CAP_SYS_RAWIO privilege on linux systems.  These
calls are dangerous, and can allow a process to completely compromise
a system.  However, they are needed in the case of some userspace
driver code which manages the hardware (for example, the mlx
implementation of backend support for rte_flow).

Here, we create an opt-in flag passed to the command line to allow
this access.  We need to do this before ever accessing the database,
because we want to drop all privileges asap, and cannot wait for
a connection to the database to be established and functional before
dropping.  There may be distribution specific ways to do capability
management as well (using for example, systemd), but they are not
as universal to the vswitchd as a flag.

Reviewed-by: Simon Horman 
Signed-off-by: Aaron Conole 
---
 NEWS   |  4 
 lib/daemon-unix.c  | 29 +
 lib/daemon-windows.c   |  3 ++-
 lib/daemon.c   |  2 +-
 lib/daemon.h   |  4 ++--
 ovsdb/ovsdb-client.c   |  6 +++---
 ovsdb/ovsdb-server.c   |  4 ++--
 tests/test-netflow.c   |  2 +-
 tests/test-sflow.c |  2 +-
 tests/test-unixctl.c   |  2 +-
 utilities/ovs-ofctl.c  |  4 ++--
 utilities/ovs-testcontroller.c |  4 ++--
 vswitchd/ovs-vswitchd.8.in |  8 
 vswitchd/ovs-vswitchd.c| 11 ++-
 14 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/NEWS b/NEWS
index 85b3496214..65f35dcdd5 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,10 @@ Post-v3.1.0
in order to create OVSDB sockets with access mode of 0770.
- QoS:
  * Added new configuration option 'jitter' for a linux-netem QoS type.
+   - DPDK:
+ * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started
+   with the --hw-rawio-access command line option.  This allows the
+   process extra privileges when mapping physical interconnect memory.
 
 
 v3.1.0 - 16 Feb 2023
diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c
index 1a7ba427d7..a080facddc 100644
--- a/lib/daemon-unix.c
+++ b/lib/daemon-unix.c
@@ -88,7 +88,8 @@ static bool switch_user = false;
 static uid_t uid;
 static gid_t gid;
 static char *user = NULL;
-static void daemon_become_new_user__(bool access_datapath);
+static void daemon_become_new_user__(bool access_datapath,
+ bool access_hardware_ports);
 
 static void check_already_running(void);
 static int lock_pidfile(FILE *, int command);
@@ -443,13 +444,13 @@ monitor_daemon(pid_t daemon_pid)
  * daemonize_complete()) or that it failed to start up (by exiting with a
  * nonzero exit code). */
 void
-daemonize_start(bool access_datapath)
+daemonize_start(bool access_datapath, bool access_hardware_ports)
 {
 assert_single_threaded();
 daemonize_fd = -1;
 
 if (switch_user) {
-daemon_become_new_user__(access_datapath);
+daemon_become_new_user__(access_datapath, access_hardware_ports);
 switch_user = false;
 }
 
@@ -807,7 +808,8 @@ daemon_become_new_user_unix(void)
 /* Linux specific implementation of daemon_become_new_user()
  * using libcap-ng.   */
 static void
-daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
+daemon_become_new_user_linux(bool access_datapath OVS_UNUSED,
+ bool access_hardware_ports OVS_UNUSED)
 {
 #if defined __linux__ &&  HAVE_LIBCAPNG
 int ret;
@@ -827,6 +829,16 @@ daemon_become_new_user_linux(bool access_datapath 
OVS_UNUSED)
 ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN)
   || capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW)
   || capng_update(CAPNG_ADD, cap_sets, CAP_NET_BROADCAST);
+#ifdef DPDK_NETDEV
+if (access_hardware_ports && !ret) {
+ret = capng_update(CAPNG_ADD, cap_sets, CAP_SYS_RAWIO);
+VLOG_INFO("CAP_SYS_RAWIO enabled.");
+}
+#else
+if (access_hardware_ports) {
+VLOG_WARN("Dropped CAP_SYS_RAWIO request (no drivers).");
+}
+#endif
 }
 } else {
 ret = -1;
@@ -854,7 +866,7 @@ daemon_become_new_user_linux(bool access_datapath 
OVS_UNUSED)
 }
 
 static void
-daemon_become_new_user__(bool access_datapath)
+daemon_become_new_user__(bool access_datapath, bool access_hardware_ports)
 {
 /* If vlog file has been created, change its owner to the non-root user
  * as specifed by the --user option.