On 2 Jun 2023, at 16:13, Adrian Moreno wrote:

> Use TCA_POLICE_RATE64 if the rate cannot be expressed using 32bits.
>
> This breaks the 32Gbps barrier. The new barrier is ~4Tbps caused by
> netdev's API expressing kbps rates using 32-bit integers.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2137643
> Signed-off-by: Adrian Moreno <[email protected]>

This patch looks good to me, however, I have one additional general issue with 
this series.

For example, if the running kernel does not support TCA_POLICE_RATE64, 
TCA_HTB_RATE64, or TCA_HTB_CEIL64 but the kernel it was compiled on does. This 
will break the implementation. Maybe we need some probe in 
netdev_tc_init_flow_api()?

However looking at the current code we already use TCA_POLICE_PKTRATE64 which 
is later than TCA_POLICE_RATE64, so we should be good?! If this is true we also 
do not need all the ‘#ifdef HAVE_TCA_POLICE_PKTRATE64’ and related code in the 
this patch.

The HTB ones were added in 2013, so maybe we are good? If we are we can remove 
all the ‘#ifdef HAVE_TCA_HTB_RATE64’ and related code in patch 4.

Simon/Ilya any opinions on this?

Cheers,

Eelco


> ---
>  acinclude.m4            | 10 ++++++++++
>  lib/netdev-linux.c      | 19 ++++++++++++-------
>  lib/netdev-linux.h      |  2 +-
>  lib/tc.c                |  2 ++
>  tests/atlocal.in        |  1 +
>  tests/system-traffic.at | 21 +++++++++++++++++++++
>  6 files changed, 47 insertions(+), 8 deletions(-)
>
> diff --git a/acinclude.m4 b/acinclude.m4
> index 1e5a9872d..3ac7072b5 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -221,6 +221,16 @@ AC_DEFUN([OVS_CHECK_LINUX_TC], [
>                 [Define to 1 if TCA_HTB_RATE64 is available.])],
>      [AC_SUBST(HAVE_TCA_HTB_RATE64,no)]
>      )
> +
> +  AC_COMPILE_IFELSE([
> +    AC_LANG_PROGRAM([#include <linux/pkt_cls.h>], [
> +        int x = TCA_POLICE_PKTRATE64;
> +    ])],
> +    [AC_SUBST(HAVE_TCA_POLICE_PKTRATE64,yes)
> +     AC_DEFINE([HAVE_TCA_POLICE_PKTRATE64], [1],
> +               [Define to 1 if TCA_POLICE_PKTRATE64 is available.])],
> +    [AC_SUBST(HAVE_TCA_POLICE_PKTRATE64,no)]
> +    )
>  ])
>
>  dnl OVS_CHECK_LINUX_SCTP_CT
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 3526fbfc3..6a9f36f1e 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -494,7 +494,7 @@ static struct tcmsg *netdev_linux_tc_make_request(const 
> struct netdev *,
>                                                    unsigned int flags,
>                                                    struct ofpbuf *);
>
> -static int tc_add_policer(struct netdev *, uint32_t kbits_rate,
> +static int tc_add_policer(struct netdev *, uint64_t kbits_rate,
>                            uint32_t kbits_burst, uint32_t kpkts_rate,
>                            uint32_t kpkts_burst);
>
> @@ -2660,6 +2660,7 @@ nl_msg_put_act_police(struct ofpbuf *request, uint32_t 
> index,
>                        uint64_t pkts_rate, uint64_t pkts_burst,
>                        uint32_t notexceed_act, bool single_action)
>  {
> +    uint64_t bytes_rate = kbits_rate / 8 * 1000;
>      size_t offset, act_offset;
>      struct tc_police police;
>      uint32_t prio = 0;
> @@ -2674,8 +2675,13 @@ nl_msg_put_act_police(struct ofpbuf *request, uint32_t 
> index,
>      nl_msg_act_police_start_nest(request, ++prio, &offset, &act_offset,
>                                   single_action);
>      if (police.rate.rate) {
> -        tc_put_rtab(request, TCA_POLICE_RATE, &police.rate, 0);
> +        tc_put_rtab(request, TCA_POLICE_RATE, &police.rate, bytes_rate);
>      }
> +#ifdef HAVE_TCA_POLICE_PKTRATE64
> +    if (bytes_rate > UINT32_MAX) {
> +        nl_msg_put_u64(request, TCA_POLICE_RATE64, bytes_rate);
> +    }
> +#endif
>      if (pkts_rate) {
>          uint64_t pkt_burst_ticks;
>          /* Here tc_bytes_to_ticks is used to convert packets rather than 
> bytes
> @@ -2689,7 +2695,7 @@ nl_msg_put_act_police(struct ofpbuf *request, uint32_t 
> index,
>  }
>
>  static int
> -tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
> +tc_add_matchall_policer(struct netdev *netdev, uint64_t kbits_rate,
>                          uint32_t kbits_burst, uint32_t kpkts_rate,
>                          uint32_t kpkts_burst)
>  {
> @@ -5703,9 +5709,8 @@ tc_policer_init(struct tc_police *tc_police, uint64_t 
> kbits_rate,
>   * Returns 0 if successful, otherwise a positive errno value.
>   */
>  static int
> -tc_add_policer(struct netdev *netdev, uint32_t kbits_rate,
> -               uint32_t kbits_burst, uint32_t kpkts_rate,
> -               uint32_t kpkts_burst)
> +tc_add_policer(struct netdev *netdev, uint64_t kbits_rate,
> +               uint32_t kbits_burst, uint32_t kpkts_rate, uint32_t 
> kpkts_burst)
>  {
>      size_t basic_offset, police_offset;
>      struct ofpbuf request;
> @@ -5739,7 +5744,7 @@ tc_add_policer(struct netdev *netdev, uint32_t 
> kbits_rate,
>  }
>
>  int
> -tc_add_policer_action(uint32_t index, uint32_t kbits_rate,
> +tc_add_policer_action(uint32_t index, uint64_t kbits_rate,
>                        uint32_t kbits_burst, uint32_t pkts_rate,
>                        uint32_t pkts_burst, bool update)
>  {
> diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h
> index 9a416ce50..ec19b0ded 100644
> --- a/lib/netdev-linux.h
> +++ b/lib/netdev-linux.h
> @@ -29,7 +29,7 @@ struct netdev;
>  int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag,
>                                    const char *flag_name, bool enable);
>  int linux_get_ifindex(const char *netdev_name);
> -int tc_add_policer_action(uint32_t index, uint32_t kbits_rate,
> +int tc_add_policer_action(uint32_t index, uint64_t kbits_rate,
>                            uint32_t kbits_burst, uint32_t pkts_rate,
>                            uint32_t pkts_burst, bool update);
>  int tc_del_policer_action(uint32_t index, struct ofputil_meter_stats *stats);
> diff --git a/lib/tc.c b/lib/tc.c
> index 5c32c6f97..b2a697774 100644
> --- a/lib/tc.c
> +++ b/lib/tc.c
> @@ -1427,6 +1427,8 @@ static const struct nl_policy police_policy[] = {
>      [TCA_POLICE_RATE] = { .type = NL_A_UNSPEC,
>                            .min_len = 1024,
>                            .optional = true, },
> +    [TCA_POLICE_RATE64] = { .type = NL_A_U32,
> +                            .optional = true, },
>      [TCA_POLICE_PEAKRATE] = { .type = NL_A_UNSPEC,
>                                .min_len = 1024,
>                                .optional = true, },
> diff --git a/tests/atlocal.in b/tests/atlocal.in
> index e3ee2d48c..2fc4faf80 100644
> --- a/tests/atlocal.in
> +++ b/tests/atlocal.in
> @@ -6,6 +6,7 @@ EGREP='@EGREP@'
>  PYTHON3='@PYTHON3@'
>  CFLAGS='@CFLAGS@'
>  HAVE_TCA_HTB_RATE64='@HAVE_TCA_HTB_RATE64@'
> +HAVE_TCA_POLICE_PKTRATE64='@HAVE_TCA_POLICE_PKTRATE64@'
>
>  # PYTHONCOERCECLOCALE=0 disables the Unicode compatibility warning on
>  # stderr that breaks almost any Python3 test (PEP 0538)
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 6200ec52e..b3683e856 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -2382,6 +2382,27 @@ AT_CHECK([tc class show dev ovs-p1 | grep -q 'class 
> htb .* HTB_CONF'])
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>
> +AT_SETUP([Ingress Policy - 64-bit])
> +AT_SKIP_IF([test $HAVE_TC = no])
> +AT_SKIP_IF([test $HAVE_TCA_POLICE_PKTRATE64 = no])
> +OVS_TRAFFIC_VSWITCHD_START()
> +ADD_NAMESPACES(ns0)
> +ADD_VETH(p0, ns0, br0, "10.1.1.1/24")
> +
> +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_rate=50000000])
> +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_burst=40000000])
> +
> +AT_CHECK([tc -o -s -d filter show dev ovs-p0 ingress |
> +  sed -n 's/.*\(rate [[0-9]]*[[a-zA-Z]]* burst [[0-9]]*[[a-zA-Z]]*\).*/\1/; 
> T; p; q'],
> +  [0],[dnl
> +rate 50Gbit burst 1200575000b
> +])
> +
> +AT_CHECK([tc -s -d filter show dev ovs-p0 ingress |
> +  grep -E "basic|matchall" > /dev/null], [0])
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
>  AT_BANNER([conntrack])
>
>  AT_SETUP([conntrack - controller])
> -- 
> 2.40.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to