Hi Ilya Maximets and Aaron Conole.
The following is newest patch. Please help me review again.
---
lib/dpif.c | 20 +++----------
tests/ofproto-dpif.at | 29 ++++++++++++++++++
tests/ofproto-macros.at | 66 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 99 insertions(+), 16 deletions(-)
diff --git a/lib/dpif.c b/lib/dpif.c
index d07241f1e..02429a30c 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -71,6 +71,7 @@ COVERAGE_DEFINE(dpif_meter_set);
COVERAGE_DEFINE(dpif_port_add);
COVERAGE_DEFINE(dpif_port_del);
COVERAGE_DEFINE(dpif_purge);
+COVERAGE_DEFINE(dpif_execute_meters_discard);
static const struct dpif_class *base_dpif_classes[] = {
#if defined(__linux__) || defined(_WIN32)
@@ -1182,6 +1183,8 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch
*packets_,
/* Maintain a pointer to the first meter action seen. */
if (!aux->meter_action) {
aux->meter_action = action;
+ } else {
+ COVERAGE_INC(dpif_execute_meters_discard);
}
break;
@@ -1202,22 +1205,7 @@ dpif_execute_helper_cb(void *aux_, struct
dp_packet_batch *packets_,
if (aux->meter_action) {
const struct nlattr *a = aux->meter_action;
-
- /* XXX: This code collects meter actions since the last action
- * execution via the datapath to be executed right before the
- * current action that needs to be executed by the datapath.
- * This is only an approximation, but better than nothing.
- * Fundamentally, we should have a mechanism by which the
- * datapath could return the result of the meter action so that
- * we could execute them at the right order. */
- do {
- ofpbuf_put(&execute_actions, a, NLA_ALIGN(a->nla_len));
- /* Find next meter action before 'action', if any. */
- do {
- a = nl_attr_next(a);
- } while (a != action &&
- nl_attr_type(a) != OVS_ACTION_ATTR_METER);
- } while (a != action);
+ ofpbuf_put(&execute_actions, a, NLA_ALIGN(a->nla_len));
}
/* The Linux kernel datapath throws away the tunnel information
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index a1393f7f8..f5f665e45 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -12063,3 +12063,32 @@ AT_CHECK([test 1 = `ovs-ofctl parse-pcap p2-tx.pcap |
wc -l`])
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+
+AT_SETUP([ofproto-dpif - collecting meters])
+OVS_NETDEV_VSWITCHD_START
+
+AT_CHECK([ovs-vsctl add-br br-vm -- set Bridge br-vm datapath_type=netdev])
+AT_CHECK([ovs-vsctl add-br br-phy -- set Bridge br-phy datapath_type=netdev])
+AT_CHECK([ovs-vsctl add-port br-vm vx1 -- set Interface vx1 type=vxlan
options:remote_ip=1.1.1.1 options:local_ip=1.1.1.2])
+
+dnl Active vxlan egress network
+ADD_NAMESPACES(ns1)
+ADD_VETH(p1, ns1, br-phy, "1.1.1.1/24")
+AT_CHECK([ip link set up dev br-phy])
+AT_CHECK([ip addr add 1.1.1.2/24 dev br-phy])
+
+dnl Force packet-out into collecting meters
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br-vm 'meter=1 pktps
bands=type=drop rate=1'])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br-vm 'table=2,arp
actions=meter:1,set_field:2->arp_op,vx1,br-vm'])
+
+dnl Wait egress network ready
+AT_CHECK([ping -c 3 1.1.1.1], [0], [stdout])
+OVS_WAIT_UNTIL([test -n "`ovs-appctl tnl/arp/show | grep 1.1.1.1`"])
+OVS_WAIT_UNTIL([test -n "`ovs-appctl ovs/route/show | grep 1.1.1.0`"])
+
+dnl Send a arp packet-out message
+AT_CHECK([ovs-ofctl packet-out br-vm 'in_port=controller
packet=ffffffffffff000000010102080600010800060400010000000101020101010300000000000001010102
actions=resubmit(,2)'])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
\ No newline at end of file
diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at
index c22fb3c79..50512f08b 100644
--- a/tests/ofproto-macros.at
+++ b/tests/ofproto-macros.at
@@ -384,3 +384,69 @@ m4_define([WAIT_FOR_DUMMY_PORTS], \
| grep 'unknown\|disconnected'])])])
+# DEL_NAMESPACES(ns [, ns ... ])
+#
+# Delete namespaces from the running OS
+m4_define([DEL_NAMESPACES],
+ [m4_foreach([ns], [$@],
+ [ip netns del ns
+])
+ ]
+)
+
+
+# ADD_NAMESPACES(ns [, ns ... ])
+#
+# Add new namespaces, if ns exists, the old one
+# will be remove before new ones are installed.
+m4_define([ADD_NAMESPACES],
+ [m4_foreach([ns], [$@],
+ [DEL_NAMESPACES(ns)
+ AT_CHECK([ip netns add ns || return 77])
+ on_exit 'DEL_NAMESPACES(ns)'
+ ip netns exec ns sysctl -w net.netfilter.nf_conntrack_helper=0
+ ])
+ ]
+)
+# NS_EXEC([namespace], [command])
+#
+# Execute 'command' in 'namespace'
+m4_define([NS_EXEC],
+ [ip netns exec $1 sh << NS_EXEC_HEREDOC
+$2
+NS_EXEC_HEREDOC])
+
+# NS_CHECK_EXEC([namespace], [command], other_params...)
+#
+# Wrapper for AT_CHECK that executes 'command' inside 'namespace'.
+# 'other_params' as passed as they are to AT_CHECK.
+m4_define([NS_CHECK_EXEC],
+ [ AT_CHECK([NS_EXEC([$1], [$2])], m4_shift(m4_shift($@))) ]
+)
+
+m4_define([ADD_VETH],
+ [ AT_CHECK([ip link add $1 type veth peer name ovs-$1 || return 77])
+ AT_CHECK([ip link set $1 netns $2])
+ AT_CHECK([ip link set dev ovs-$1 up])
+ AT_CHECK([ovs-vsctl add-port $3 ovs-$1])
+ NS_CHECK_EXEC([$2], [ip addr add $4 dev $1 $7])
+ NS_CHECK_EXEC([$2], [ip link set dev $1 up])
+ if test -n "$5"; then
+ NS_CHECK_EXEC([$2], [ip link set dev $1 address $5])
+ fi
+ if test -n "$6"; then
+ NS_CHECK_EXEC([$2], [ip route add default via $6])
+ fi
+ on_exit 'ip link del ovs-$1'
+ ]
+)
+
+# OVS_NETDEV_VSWITCHD_START([dbinit-aux-args])
+#
+# Creates a database and starts ovsdb-server, starts ovs-vswitchd
+# connected to that database.
+# 'dbinit-aux-args' are passed as additional commands to 'ovs-vsctl init'
+# before starting ovs-vswitchd.
+m4_define([OVS_NETDEV_VSWITCHD_START],
+ [_OVS_VSWITCHD_START([--disable-system], [$1])
+])
Regards,
Daniel Ding
> On Apr 2, 2025, at 12:16 AM, Ilya Maximets <i.maxim...@ovn.org> wrote:
>
> On 4/1/25 5:21 PM, Aaron Conole via dev wrote:
>> Daniel Ding <danieldin...@gmail.com> writes:
>>
>>> In order to collect meters since the last meter action
>>> to the current action in the dpif_execute_helper_cb,
>>> the next entry is simply obtained using nl_attr_next, and
>>> not considering the wrong nested nl without end_nested.
>>> This will cause memory access to be overflowed.
>>>
>>> Gdb debug looks like this:
>>>
>>> (gdb) print a
>>> $6 = (const struct nlattr *) 0xaaaad175a69c
>>> (gdb) n
>>> 1206 a = nl_attr_next(a);
>>> (gdb) n
>>> 1207 } while (a != action &&
>>> (gdb) n
>>> 1206 a = nl_attr_next(a);
>>> (gdb) print a
>>> $7 = (const struct nlattr *) 0xaaaad175a69c
>>> (gdb) print *a
>>> $8 = {nla_len = 0, nla_type = 0}
>>> (gdb)
>>>
>>> Fixes: 076caa2fb077 ("ofproto: Meter translation.")
>>> Signed-off-by: Daniel Ding <danieldin...@gmail.com>
>>> ---
>>> lib/dpif.c | 43 ++++++++++++++++--------------------------
>>> tests/system-dpdk.at | 45 ++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 61 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/lib/dpif.c b/lib/dpif.c
>>> index d07241f1e..05dfd2919 100644
>>> --- a/lib/dpif.c
>>> +++ b/lib/dpif.c
>>> @@ -1158,11 +1158,14 @@ dpif_flow_dump_next(struct dpif_flow_dump_thread
>>> *thread,
>>> return n;
>>> }
>>>
>>> +enum { EXECUTE_HELPER_MAX_METER = 32 };
>>> +
>>> struct dpif_execute_helper_aux {
>>> struct dpif *dpif;
>>> const struct flow *flow;
>>> int error;
>>> - const struct nlattr *meter_action; /* Non-NULL, if have a meter
>>> action. */
>>> + size_t meter_count;
>>> + const struct nlattr *meter_actions[EXECUTE_HELPER_MAX_METER + 1];
>>> };
>>>
>>> /* This is called for actions that need the context of the datapath to be
>>> @@ -1179,9 +1182,9 @@ dpif_execute_helper_cb(void *aux_, struct
>>> dp_packet_batch *packets_,
>>>
>>> switch ((enum ovs_action_attr)type) {
>>> case OVS_ACTION_ATTR_METER:
>>> - /* Maintain a pointer to the first meter action seen. */
>>> - if (!aux->meter_action) {
>>> - aux->meter_action = action;
>>> + /* Records the meter actions. */
>>> + if (aux->meter_count < EXECUTE_HELPER_MAX_METER) {
>>> + aux->meter_actions[aux->meter_count++] = action;
>>> }
>>
>> Can we have a coverage counter that trips for the else case? Maybe a
>> rate limited warning log as well. At least if we're not going to do the
>> search as described by Ilya in:
>>
>> https://patchwork.ozlabs.org/comment/3487109/
>
> If we have more than one meter, it's very likely we'll collect wrong ones
> here. Executing just the first one may be better in this case, if we're
> not going for the proper solution. i.e. just drop that accumulation
> logic and the traversal below and just put one meter.
> It appears to me that it works somewhat like that today anyways, since we're
> missing all the nested meters.
>
> And we can work on making the proper logic later.
>
>>
>>> break;
>>>
>>> @@ -1197,27 +1200,15 @@ dpif_execute_helper_cb(void *aux_, struct
>>> dp_packet_batch *packets_,
>>> uint64_t stub[256 / 8];
>>> struct pkt_metadata *md = &packet->md;
>>>
>>> - if (flow_tnl_dst_is_set(&md->tunnel) || aux->meter_action) {
>>> + if (flow_tnl_dst_is_set(&md->tunnel) || aux->meter_count) {
>>> ofpbuf_use_stub(&execute_actions, stub, sizeof stub);
>>>
>>> - if (aux->meter_action) {
>>> - const struct nlattr *a = aux->meter_action;
>>> -
>>> - /* XXX: This code collects meter actions since the last
>>> action
>>> - * execution via the datapath to be executed right before
>>> the
>>> - * current action that needs to be executed by the
>>> datapath.
>>> - * This is only an approximation, but better than nothing.
>>> - * Fundamentally, we should have a mechanism by which the
>>> - * datapath could return the result of the meter action so
>>> that
>>> - * we could execute them at the right order. */
>>> - do {
>>> + if (aux->meter_count) {
>>> + size_t i;
>>> + for (i = 0; i < aux->meter_count; i++) {
>>> + const struct nlattr *a = aux->meter_actions[i];
>>> ofpbuf_put(&execute_actions, a, NLA_ALIGN(a->nla_len));
>>> - /* Find next meter action before 'action', if any. */
>>> - do {
>>> - a = nl_attr_next(a);
>>> - } while (a != action &&
>>> - nl_attr_type(a) != OVS_ACTION_ATTR_METER);
>>> - } while (a != action);
>>> + }
>>> }
>>>
>>> /* The Linux kernel datapath throws away the tunnel information
>>> @@ -1261,11 +1252,9 @@ dpif_execute_helper_cb(void *aux_, struct
>>> dp_packet_batch *packets_,
>>>
>>> dp_packet_delete(clone);
>>>
>>> - if (flow_tnl_dst_is_set(&md->tunnel) || aux->meter_action) {
>>> + if (flow_tnl_dst_is_set(&md->tunnel) || aux->meter_count) {
>>> ofpbuf_uninit(&execute_actions);
>>> -
>>> - /* Do not re-use the same meters for later output actions. */
>>> - aux->meter_action = NULL;
>>> + aux->meter_count = 0;
>>> }
>>> break;
>>> }
>>> @@ -1303,7 +1292,7 @@ dpif_execute_helper_cb(void *aux_, struct
>>> dp_packet_batch *packets_,
>>> static int
>>> dpif_execute_with_help(struct dpif *dpif, struct dpif_execute *execute)
>>> {
>>> - struct dpif_execute_helper_aux aux = {dpif, execute->flow, 0, NULL};
>>> + struct dpif_execute_helper_aux aux = {dpif, execute->flow, 0, 0,
>>> {NULL}};
>>> struct dp_packet_batch pb;
>>>
>>> COVERAGE_INC(dpif_execute_with_help);
>>> diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
>>> index 1c97bf777..43b41a9f2 100644
>>> --- a/tests/system-dpdk.at
>>> +++ b/tests/system-dpdk.at
>>
>> The test suite you've submitted is broken on my system. Additionally,
>> it will only run for DPDK testing, but it really can be run on all
>> users of the userspace datapath.
>>
>> I think it should be written for system-traffic.at rather than as DPDK
>> specific suite.
>
> It shouldn't actually be even a system test. It should be a normal test
> in the tests/ofproto-dpif.at <http://ofproto-dpif.at/>. The issue doesn't
> require any real interfaces
> to be triggered.
>
>> I didn't actually test with the supplied fix, but the
>> test suite below will invoke a hang with ``make check-system-userspace``
>> like:
>>
>> Thread 1 (LWP 4033950 "ovs-vswitchd"):
>> #0 0x00000000004bf397 in nl_attr_type (nla=nla@entry=0x2cf879d8) at
>> lib/netlink.c:642
>> #1 0x0000000000490778 in dpif_execute_helper_cb
>> (aux_=aux_@entry=0x7ffc1e9b0d60, packets_=packets_@entry=0x7ffc1e9b0a80,
>> action=action@entry=0x2cf7d2d4, should_steal=should_steal@entry=false) at
>> lib/dpif.c:1222
>> ...
>>
>>
>> Ex suite:
>>
>> --- a/tests/system-traffic.at
>> +++ b/tests/system-traffic.at
>> @@ -2813,6 +2813,40 @@ OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c
>> "${packet}") -eq 5])
>> OVS_TRAFFIC_VSWITCHD_STOP
>> AT_CLEANUP
>>
>> +dnl Setup dpif collecting meters
>> +AT_SETUP([datapath - meter execution helpers])
>> +AT_KEYWORDS([meter meters])
>> +OVS_TRAFFIC_VSWITCHD_START()
>> +
>> +ADD_BR([br-vm])
>> +
>> +dnl Active vxlan egress network
>> +AT_CHECK([ip link set up dev br0])
>> +AT_CHECK([ip addr add 1.1.1.2/24 dev br0])
>> +AT_CHECK([ovs-vsctl add-port br-vm p1 -- set Interface p1 type=vxlan
>> options:remote_ip=1.1.1.1 options:local_ip=1.1.1.2])
>> +
>> +ADD_NAMESPACES(at_ns1)
>> +NS_CHECK_EXEC([at_ns1], [sysctl -w net.ipv6.conf.all.disable_ipv6=1], [0],
>> [ignore])
>> +
>> +ADD_VETH(p0, at_ns1, br0, "1.1.1.1/24", "e4:11:22:33:44:55")
>> +
>> +AT_CHECK([ovs-ofctl add-flow br0 "table=0,priority=0 actions=NORMAL"])
>> +AT_CHECK([ovs-ofctl add-flow br-vm "table=0,priority=0 actions=NORMAL"])
>> +
>> +AT_CHECK([ping -c 3 1.1.1.1], [0], [stdout])
>> +OVS_WAIT_UNTIL([test -n "`ovs-appctl tnl/arp/show | grep 1.1.1.1`"])
>> +
>> +dnl Force arps into collecting meters
>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br-vm 'meter=1 pktps
>> bands=type=drop rate=1'])
>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br-vm 'table=2,arp
>> actions=meter:1,set_field:2->arp_op,p1,br-vm'])
>> +
>> +dnl Send an arp packet-out message
>> +AT_CHECK([ovs-ofctl packet-out br-vm 'in_port=controller
>> packet=ffffffffffff000000010102080600010800060400010000000101020101010300000000000001010102
>> actions=resubmit(,2)'])
>> +
>> +dnl Clean up
>> +OVS_TRAFFIC_VSWITCHD_STOP
>> +AT_CLEANUP
>> +
>> AT_BANNER([MPLS])
>>
>> AT_SETUP([mpls - encap header dp-support])
>>
>> _______________________________________________
>> dev mailing list
>> d...@openvswitch.org <mailto:d...@openvswitch.org>
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev