On 7 Jul 2023, at 17:04, Ilya Maximets wrote:

> On 7/6/23 18:22, Eelco Chaudron wrote:
>>
>>
>> On 2 Jun 2023, at 16:13, Adrian Moreno wrote:
>>
>>> Use TCA_POLICE_RATE64 if the rate cannot be expressed using 32bits.
>>>
>>> This breaks the 32Gbps barrier. The new barrier is ~4Tbps caused by
>>> netdev's API expressing kbps rates using 32-bit integers.
>>>
>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2137643
>>> Signed-off-by: Adrian Moreno <[email protected]>
>>
>> This patch looks good to me, however, I have one additional general issue 
>> with this series.
>>
>> For example, if the running kernel does not support TCA_POLICE_RATE64, 
>> TCA_HTB_RATE64, or TCA_HTB_CEIL64 but the kernel it was compiled on does. 
>> This will break the implementation. Maybe we need some probe in 
>> netdev_tc_init_flow_api()?
>
> netdev_tc_init_flow_api() will not be called if hw-offload is not enabled.
> But anyway, we only add 64bit arguments if we have to.  i.e. if the value
> fits into 32bit range, 64bit argument will not be used.  That should provide
> required backward compatibility.  If we need 64bit, but kernel doesn't
> support, then we need to fail anyway.  Current code will configure incorrect
> values, there is no need preserving that.
>
>>
>> However looking at the current code we already use TCA_POLICE_PKTRATE64 
>> which is later than TCA_POLICE_RATE64, so we should be good?! If this is 
>> true we also do not need all the ‘#ifdef HAVE_TCA_POLICE_PKTRATE64’ and 
>> related code in the this patch.
>
> kpkts policing in a relatively new feature in OVS, so we do not expect it
> to be used on old kernels.  So, use of TCA_POLICE_PKTRATE64 should be fine.

Ok, I’m fine with both explanations on top of my own research, so:

Acked-by: Eelco Chaudron <[email protected]>

>>
>> The HTB ones were added in 2013, so maybe we are good? If we are we can 
>> remove all the ‘#ifdef HAVE_TCA_HTB_RATE64’ and related code in patch 4.
>>
>> Simon/Ilya any opinions on this?
>>
>> Cheers,
>>
>> Eelco
>>
>>
>>> ---
>>>  acinclude.m4            | 10 ++++++++++
>>>  lib/netdev-linux.c      | 19 ++++++++++++-------
>>>  lib/netdev-linux.h      |  2 +-
>>>  lib/tc.c                |  2 ++
>>>  tests/atlocal.in        |  1 +
>>>  tests/system-traffic.at | 21 +++++++++++++++++++++
>>>  6 files changed, 47 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/acinclude.m4 b/acinclude.m4
>>> index 1e5a9872d..3ac7072b5 100644
>>> --- a/acinclude.m4
>>> +++ b/acinclude.m4
>>> @@ -221,6 +221,16 @@ AC_DEFUN([OVS_CHECK_LINUX_TC], [
>>>                 [Define to 1 if TCA_HTB_RATE64 is available.])],
>>>      [AC_SUBST(HAVE_TCA_HTB_RATE64,no)]
>>>      )
>>> +
>>> +  AC_COMPILE_IFELSE([
>>> +    AC_LANG_PROGRAM([#include <linux/pkt_cls.h>], [
>>> +        int x = TCA_POLICE_PKTRATE64;
>>> +    ])],
>>> +    [AC_SUBST(HAVE_TCA_POLICE_PKTRATE64,yes)
>>> +     AC_DEFINE([HAVE_TCA_POLICE_PKTRATE64], [1],
>>> +               [Define to 1 if TCA_POLICE_PKTRATE64 is available.])],
>>> +    [AC_SUBST(HAVE_TCA_POLICE_PKTRATE64,no)]
>>> +    )
>>>  ])
>>>
>>>  dnl OVS_CHECK_LINUX_SCTP_CT
>>> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>>> index 3526fbfc3..6a9f36f1e 100644
>>> --- a/lib/netdev-linux.c
>>> +++ b/lib/netdev-linux.c
>>> @@ -494,7 +494,7 @@ static struct tcmsg *netdev_linux_tc_make_request(const 
>>> struct netdev *,
>>>                                                    unsigned int flags,
>>>                                                    struct ofpbuf *);
>>>
>>> -static int tc_add_policer(struct netdev *, uint32_t kbits_rate,
>>> +static int tc_add_policer(struct netdev *, uint64_t kbits_rate,
>>>                            uint32_t kbits_burst, uint32_t kpkts_rate,
>>>                            uint32_t kpkts_burst);
>>>
>>> @@ -2660,6 +2660,7 @@ nl_msg_put_act_police(struct ofpbuf *request, 
>>> uint32_t index,
>>>                        uint64_t pkts_rate, uint64_t pkts_burst,
>>>                        uint32_t notexceed_act, bool single_action)
>>>  {
>>> +    uint64_t bytes_rate = kbits_rate / 8 * 1000;
>>>      size_t offset, act_offset;
>>>      struct tc_police police;
>>>      uint32_t prio = 0;
>>> @@ -2674,8 +2675,13 @@ nl_msg_put_act_police(struct ofpbuf *request, 
>>> uint32_t index,
>>>      nl_msg_act_police_start_nest(request, ++prio, &offset, &act_offset,
>>>                                   single_action);
>>>      if (police.rate.rate) {
>>> -        tc_put_rtab(request, TCA_POLICE_RATE, &police.rate, 0);
>>> +        tc_put_rtab(request, TCA_POLICE_RATE, &police.rate, bytes_rate);
>>>      }
>>> +#ifdef HAVE_TCA_POLICE_PKTRATE64
>>> +    if (bytes_rate > UINT32_MAX) {
>>> +        nl_msg_put_u64(request, TCA_POLICE_RATE64, bytes_rate);
>>> +    }
>>> +#endif
>>>      if (pkts_rate) {
>>>          uint64_t pkt_burst_ticks;
>>>          /* Here tc_bytes_to_ticks is used to convert packets rather than 
>>> bytes
>>> @@ -2689,7 +2695,7 @@ nl_msg_put_act_police(struct ofpbuf *request, 
>>> uint32_t index,
>>>  }
>>>
>>>  static int
>>> -tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
>>> +tc_add_matchall_policer(struct netdev *netdev, uint64_t kbits_rate,
>>>                          uint32_t kbits_burst, uint32_t kpkts_rate,
>>>                          uint32_t kpkts_burst)
>>>  {
>>> @@ -5703,9 +5709,8 @@ tc_policer_init(struct tc_police *tc_police, uint64_t 
>>> kbits_rate,
>>>   * Returns 0 if successful, otherwise a positive errno value.
>>>   */
>>>  static int
>>> -tc_add_policer(struct netdev *netdev, uint32_t kbits_rate,
>>> -               uint32_t kbits_burst, uint32_t kpkts_rate,
>>> -               uint32_t kpkts_burst)
>>> +tc_add_policer(struct netdev *netdev, uint64_t kbits_rate,
>>> +               uint32_t kbits_burst, uint32_t kpkts_rate, uint32_t 
>>> kpkts_burst)
>>>  {
>>>      size_t basic_offset, police_offset;
>>>      struct ofpbuf request;
>>> @@ -5739,7 +5744,7 @@ tc_add_policer(struct netdev *netdev, uint32_t 
>>> kbits_rate,
>>>  }
>>>
>>>  int
>>> -tc_add_policer_action(uint32_t index, uint32_t kbits_rate,
>>> +tc_add_policer_action(uint32_t index, uint64_t kbits_rate,
>>>                        uint32_t kbits_burst, uint32_t pkts_rate,
>>>                        uint32_t pkts_burst, bool update)
>>>  {
>>> diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h
>>> index 9a416ce50..ec19b0ded 100644
>>> --- a/lib/netdev-linux.h
>>> +++ b/lib/netdev-linux.h
>>> @@ -29,7 +29,7 @@ struct netdev;
>>>  int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag,
>>>                                    const char *flag_name, bool enable);
>>>  int linux_get_ifindex(const char *netdev_name);
>>> -int tc_add_policer_action(uint32_t index, uint32_t kbits_rate,
>>> +int tc_add_policer_action(uint32_t index, uint64_t kbits_rate,
>>>                            uint32_t kbits_burst, uint32_t pkts_rate,
>>>                            uint32_t pkts_burst, bool update);
>>>  int tc_del_policer_action(uint32_t index, struct ofputil_meter_stats 
>>> *stats);
>>> diff --git a/lib/tc.c b/lib/tc.c
>>> index 5c32c6f97..b2a697774 100644
>>> --- a/lib/tc.c
>>> +++ b/lib/tc.c
>>> @@ -1427,6 +1427,8 @@ static const struct nl_policy police_policy[] = {
>>>      [TCA_POLICE_RATE] = { .type = NL_A_UNSPEC,
>>>                            .min_len = 1024,
>>>                            .optional = true, },
>>> +    [TCA_POLICE_RATE64] = { .type = NL_A_U32,
>>> +                            .optional = true, },
>>>      [TCA_POLICE_PEAKRATE] = { .type = NL_A_UNSPEC,
>>>                                .min_len = 1024,
>>>                                .optional = true, },
>>> diff --git a/tests/atlocal.in b/tests/atlocal.in
>>> index e3ee2d48c..2fc4faf80 100644
>>> --- a/tests/atlocal.in
>>> +++ b/tests/atlocal.in
>>> @@ -6,6 +6,7 @@ EGREP='@EGREP@'
>>>  PYTHON3='@PYTHON3@'
>>>  CFLAGS='@CFLAGS@'
>>>  HAVE_TCA_HTB_RATE64='@HAVE_TCA_HTB_RATE64@'
>>> +HAVE_TCA_POLICE_PKTRATE64='@HAVE_TCA_POLICE_PKTRATE64@'
>>>
>>>  # PYTHONCOERCECLOCALE=0 disables the Unicode compatibility warning on
>>>  # stderr that breaks almost any Python3 test (PEP 0538)
>>> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
>>> index 6200ec52e..b3683e856 100644
>>> --- a/tests/system-traffic.at
>>> +++ b/tests/system-traffic.at
>>> @@ -2382,6 +2382,27 @@ AT_CHECK([tc class show dev ovs-p1 | grep -q 'class 
>>> htb .* HTB_CONF'])
>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>  AT_CLEANUP
>>>
>>> +AT_SETUP([Ingress Policy - 64-bit])
>>> +AT_SKIP_IF([test $HAVE_TC = no])
>>> +AT_SKIP_IF([test $HAVE_TCA_POLICE_PKTRATE64 = no])
>>> +OVS_TRAFFIC_VSWITCHD_START()
>>> +ADD_NAMESPACES(ns0)
>>> +ADD_VETH(p0, ns0, br0, "10.1.1.1/24")
>>> +
>>> +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_rate=50000000])
>>> +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_burst=40000000])
>
> We have 64bit rate, but not burst.  Doesn't 40000000 overflow the value?
>
>>> +
>>> +AT_CHECK([tc -o -s -d filter show dev ovs-p0 ingress |
>>> +  sed -n 's/.*\(rate [[0-9]]*[[a-zA-Z]]* burst 
>>> [[0-9]]*[[a-zA-Z]]*\).*/\1/; T; p; q'],
>>> +  [0],[dnl
>>> +rate 50Gbit burst 1200575000b
>
> This burst value seem to be overflowed.
> 40000000000 & (2**32 - 1) = 1345294336
> Math doesn't match exactly, but anyway.
>
>>> +])
>>> +
>>> +AT_CHECK([tc -s -d filter show dev ovs-p0 ingress |
>>> +  grep -E "basic|matchall" > /dev/null], [0])
>>> +OVS_TRAFFIC_VSWITCHD_STOP
>>> +AT_CLEANUP
>>> +
>>>  AT_BANNER([conntrack])
>>>
>>>  AT_SETUP([conntrack - controller])
>>> -- 
>>> 2.40.1
>>>
>>> _______________________________________________
>>> dev mailing list
>>> [email protected]
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to