Hi Mike, This patch will need some rebasing.
On Tue, Nov 1, 2022 at 5:27 PM Mike Pattrick <m...@redhat.com> wrote: > > Currently a bridge mirror will collect all packets and tools like > ovs-tcpdump can apply additional filters after they have already been > duplicated by vswitchd. This can result in inefficient collection. > > This patch adds support to apply pre-selection to bridge mirrors, which > can limit which packets are mirrored based on flow metadata. This > significantly improves overall vswitchd performance during mirroring if > only a subset of traffic is required. > > I bencharked this with two setups. A netlink based test with two veth benchmarked* > pairs connected to a single bridge, and a netdev based test involving a > mix of DPDK nics, and netdev-linux interfaces. Both tests involved > saturating the link with iperf3 and then sending an icmp ping every > second. I then measured the throughput on the link with no mirroring, > icmp pre-selected mirroring, and full mirroring. The results, below, > indicate a significant reduction to impact of mirroring when only a > subset of the traffic on a port is selected for collection. > > Test No Filter | Filter | Ratio | No Mirror | > +-----------+-----------+-------+-----------+ > netlink | 36.1 Gbps | 38.2 Gbps | 350 % | 39.0 Gbps | > netdev | 4.95 Gbps | 6.24 Gbps | 126 % | 7.39 Gbps | > > Signed-off-by: Mike Pattrick <m...@redhat.com> This looks really nice. I just have some trouble with "Ratio" in the table above. I tried to do different combinations of those numbers.. but I can't find a match. Could you detail what it is? Some small comments for this first pass, I did not run this patch yet. > --- > Documentation/ref/ovs-tcpdump.8.rst | 4 +++ > NEWS | 2 ++ > lib/flow.h | 11 ++++++ > ofproto/ofproto-dpif-mirror.c | 53 +++++++++++++++++++++++++++-- > ofproto/ofproto-dpif-mirror.h | 9 +++-- > ofproto/ofproto-dpif-xlate.c | 4 ++- > ofproto/ofproto-dpif.c | 2 +- > ofproto/ofproto.h | 3 ++ > tests/ofproto-dpif.at | 43 +++++++++++++++++++++++ > utilities/ovs-tcpdump.in | 13 +++++-- > vswitchd/bridge.c | 2 ++ > vswitchd/vswitch.ovsschema | 4 ++- > vswitchd/vswitch.xml | 6 ++++ > 13 files changed, 147 insertions(+), 9 deletions(-) > > diff --git a/Documentation/ref/ovs-tcpdump.8.rst > b/Documentation/ref/ovs-tcpdump.8.rst > index b9f8cdf6f..9163c8a5e 100644 > --- a/Documentation/ref/ovs-tcpdump.8.rst > +++ b/Documentation/ref/ovs-tcpdump.8.rst > @@ -61,6 +61,10 @@ Options > > If specified, mirror all ports (optional). > > +* ``--filter <flow>`` > + > + If specified, only mirror flows that match the provided filter. > + > See Also > ======== > > diff --git a/NEWS b/NEWS > index ff77ee404..d4eee89dd 100644 > --- a/NEWS > +++ b/NEWS > @@ -23,6 +23,8 @@ Post-v3.0.0 > bug and CVE fixes addressed since its release. > If a user wishes to benefit from these fixes it is recommended to use > DPDK 21.11.2. > + - ovs-tcpdump: > + * Added new --filter parameter to apply pre-selection on mirrored > flows. This feature affects the kernel datapath too (iow, move this entry out of the userspace datapath entries, by removing one indent level). > diff --git a/lib/flow.h b/lib/flow.h > index c647ad83c..403288ee5 100644 > --- a/lib/flow.h > +++ b/lib/flow.h > @@ -938,6 +938,17 @@ flow_union_with_miniflow(struct flow *dst, const struct > miniflow *src) > flow_union_with_miniflow_subset(dst, src, src->map); > } > > +/* Perform a bitwise OR of minimsdk 'src' mask data with the equivalent minimask* > + * fields in 'dst', storing the result in 'dst'. */ > +static inline void > +flow_wildcards_union_with_minimask(struct flow_wildcards *dst, > + const struct minimask *src) > +{ > + flow_union_with_miniflow_subset(&dst->masks, > + &src->masks, > + src->masks.map); > +} > + > static inline bool is_ct_valid(const struct flow *flow, > const struct flow_wildcards *mask, > struct flow_wildcards *wc) > diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c > index 343b75f0e..37439de48 100644 > --- a/ofproto/ofproto-dpif-mirror.c > +++ b/ofproto/ofproto-dpif-mirror.c > @@ -21,6 +21,7 @@ > #include "cmap.h" > #include "hmapx.h" > #include "ofproto.h" > +#include "ofproto-dpif-trace.h" > #include "vlan-bitmap.h" > #include "openvswitch/vlog.h" > > @@ -57,6 +58,10 @@ struct mirror { > struct hmapx srcs; /* Contains "struct mbundle*"s. */ > struct hmapx dsts; /* Contains "struct mbundle*"s. */ > > + /* Filter criteria. */ > + struct miniflow *filter; > + struct minimask *mask; > + > /* This is accessed by handler threads assuming RCU protection (see > * mirror_get()), but can be manipulated by mirror_set() without any > * explicit synchronization. */ > @@ -212,7 +217,9 @@ mirror_set(struct mbridge *mbridge, void *aux, const char > *name, > struct ofbundle **dsts, size_t n_dsts, > unsigned long *src_vlans, struct ofbundle *out_bundle, > uint16_t snaplen, > - uint16_t out_vlan) > + uint16_t out_vlan, > + char *filter, const char *filter > + const struct ofproto *ofprotop) Nit: const struct ofproto *ofproto > { > struct mbundle *mbundle, *out; > mirror_mask_t mirror_bit; > @@ -285,6 +292,31 @@ mirror_set(struct mbridge *mbridge, void *aux, const > char *name, > mirror->out_vlan = out_vlan; > mirror->snaplen = snaplen; > > + if (mirror->filter) { > + ovsrcu_postpone(free, mirror->filter); > + ovsrcu_postpone(free, mirror->mask); > + mirror->filter = NULL; > + } > + if (filter) { > + struct ofputil_port_map map = OFPUTIL_PORT_MAP_INITIALIZER(&map); > + struct flow_wildcards wc = {}; > + struct flow flow = {}; Those are initialised by parse_ofp_exact_flow() and don't need to be zero'd. > + char *err; > + > + ofproto_append_ports_to_map(&map, ofprotop->ports); > + err = parse_ofp_exact_flow(&flow, &wc, > + ofproto_get_tun_tab(ofprotop), > + filter, &map); > + ofputil_port_map_destroy(&map); > + if (err) { > + VLOG_WARN("filter is invalid: %s", err); free(err); > + return EINVAL; > + } > + > + mirror->mask = minimask_create(&wc); > + mirror->filter = miniflow_create(&flow); > + } > + > /* Update mbundles. */ > mirror_bit = MIRROR_MASK_C(1) << mirror->idx; > CMAP_FOR_EACH (mbundle, cmap_node, &mirror->mbridge->mbundles) { > @@ -340,6 +372,12 @@ mirror_destroy(struct mbridge *mbridge, void *aux) > ovsrcu_postpone(free, vlans); > } > > + if (mirror->filter) { > + ovsrcu_postpone(free, mirror->filter); > + ovsrcu_postpone(free, mirror->mask); > + mirror->filter = NULL; I understand mirror->mask is protected by mirror->filter, but I would still reset mirror->mask to NULL for consistency. > + } > + > mbridge->mirrors[mirror->idx] = NULL; > /* mirror_get() might have just read the pointer, so we must postpone the > * free. */ > @@ -418,7 +456,8 @@ mirror_update_stats(struct mbridge *mbridge, > mirror_mask_t mirrors, > bool > mirror_get(struct mbridge *mbridge, int index, const unsigned long **vlans, > mirror_mask_t *dup_mirrors, struct ofbundle **out, > - int *snaplen, int *out_vlan) > + int *snaplen, int *out_vlan, struct flow *flow, > + struct flow_wildcards *wc) Maybe worth describing that this function "outputs" to wc too (like for other params). > { > struct mirror *mirror; > > @@ -430,6 +469,16 @@ mirror_get(struct mbridge *mbridge, int index, const > unsigned long **vlans, > if (!mirror) { > return false; > } > + > + if (wc && mirror->filter) { > + if (miniflow_equal_flow_in_minimask(mirror->filter, flow, > + mirror->mask)) { > + flow_wildcards_union_with_minimask(wc, mirror->mask); > + } else { > + return false; > + } > + } > + > /* Assume 'mirror' is RCU protected, i.e., it will not be freed until > this > * thread quiesces. */ > > diff --git a/ofproto/ofproto-dpif-mirror.h b/ofproto/ofproto-dpif-mirror.h > index eed63ec4a..9c63faeba 100644 > --- a/ofproto/ofproto-dpif-mirror.h > +++ b/ofproto/ofproto-dpif-mirror.h > @@ -24,6 +24,9 @@ typedef uint32_t mirror_mask_t; > > struct ofproto_dpif; > struct ofbundle; > +struct ofproto; > +struct flow; > +struct flow_wildcards; > > /* The following functions are used by handler threads without any locking, > * assuming RCU protection. */ > @@ -40,7 +43,8 @@ void mirror_update_stats(struct mbridge*, mirror_mask_t, > uint64_t packets, > uint64_t bytes); > bool mirror_get(struct mbridge *, int index, const unsigned long **vlans, > mirror_mask_t *dup_mirrors, struct ofbundle **out, > - int *snaplen, int *out_vlan); > + int *snaplen, int *out_vlan, struct flow *flow, > + struct flow_wildcards *wc); > > /* The remaining functions are assumed to be called by the main thread only. > */ > > @@ -54,7 +58,8 @@ int mirror_set(struct mbridge *, void *aux, const char > *name, > struct ofbundle **srcs, size_t n_srcs, > struct ofbundle **dsts, size_t n_dsts, > unsigned long *src_vlans, struct ofbundle *out_bundle, > - uint16_t snaplen, uint16_t out_vlan); > + uint16_t snaplen, uint16_t out_vlan, char *filter, > + const struct ofproto *ofprotop); Same as for .c. const char *filter, and *ofproto. > void mirror_destroy(struct mbridge *, void *aux); > int mirror_get_stats(struct mbridge *, void *aux, uint64_t *packets, > uint64_t *bytes); > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > index 5d2af93fa..1c3bd0381 100644 > --- a/ofproto/ofproto-dpif-xlate.c > +++ b/ofproto/ofproto-dpif-xlate.c > @@ -2163,7 +2163,9 @@ mirror_packet(struct xlate_ctx *ctx, struct xbundle > *xbundle, > /* Get the details of the mirror represented by the rightmost 1-bit. > */ > if (OVS_UNLIKELY(!mirror_get(xbridge->mbridge, raw_ctz(mirrors), > &vlans, &dup_mirrors, > - &out, &snaplen, &out_vlan))) { > + &out, &snaplen, &out_vlan, > + &ctx->xin->flow, > + ctx->wc))) { > /* The mirror got reconfigured before we got to read it's > * configuration. */ I don't think this comment is relevant (anymore?). > mirrors = zero_rightmost_1bit(mirrors); > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index f9562dee8..bf290471a 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -3672,7 +3672,7 @@ mirror_set__(struct ofproto *ofproto_, void *aux, > error = mirror_set(ofproto->mbridge, aux, s->name, srcs, s->n_srcs, dsts, > s->n_dsts, s->src_vlans, > bundle_lookup(ofproto, s->out_bundle), > - s->snaplen, s->out_vlan); > + s->snaplen, s->out_vlan, s->filter, ofproto_); > free(srcs); > free(dsts); > return error; > diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h > index 4e15167ab..eff6d9a46 100644 > --- a/ofproto/ofproto.h > +++ b/ofproto/ofproto.h > @@ -486,6 +486,9 @@ struct ofproto_mirror_settings { > uint16_t out_vlan; /* Output VLAN, only if out_bundle is NULL. > */ > uint16_t snaplen; /* Max packet size of a mirrored packet > in byte, set to 0 equals 65535. */ > + > + /* Output filter */ > + char *filter; > }; > > int ofproto_mirror_register(struct ofproto *, void *aux, > diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at > index 8e993c585..75c676f4c 100644 > --- a/tests/ofproto-dpif.at > +++ b/tests/ofproto-dpif.at > @@ -5163,6 +5163,49 @@ OVS_VSWITCHD_STOP > AT_CLEANUP > > > +AT_SETUP([ofproto-dpif - mirroring, filter]) > +AT_KEYWORDS([mirror mirrors mirroring]) > +OVS_VSWITCHD_START > +add_of_ports br0 1 2 3 > +ovs-vsctl \ > + set Bridge br0 mirrors=@m --\ > + --id=@p3 get Port p3 --\ > + --id=@m create Mirror name=mymirror select_all=true output_port=@p3 > filter="icmp" > + > +AT_DATA([flows.txt], [dnl > +in_port=1 actions=output:2 > +in_port=2 actions=output:1 > +]) > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) > + > +flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) > +AT_CHECK_UNQUOTED([tail -1 stdout], [0], > + [Datapath actions: 3,2 > +]) > + > +flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=128,frag=no),tcp()" > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) > +AT_CHECK_UNQUOTED([tail -1 stdout], [0], > + [Datapath actions: 2 > +]) > + > +flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) > +AT_CHECK_UNQUOTED([tail -1 stdout], [0], > + [Datapath actions: 3,1 > +]) > + > +flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=128,frag=no),tcp()" > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) > +AT_CHECK_UNQUOTED([tail -1 stdout], [0], > + [Datapath actions: 1 > +]) > + > +OVS_VSWITCHD_STOP > +AT_CLEANUP > + > + > AT_SETUP([ofproto-dpif - mirroring, select_all]) > AT_KEYWORDS([mirror mirrors mirroring]) > OVS_VSWITCHD_START > diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in > index e12bab889..83454960b 100755 > --- a/utilities/ovs-tcpdump.in > +++ b/utilities/ovs-tcpdump.in > @@ -137,6 +137,7 @@ The following options are available: > --mirror-to The name for the mirror port to use (optional) > Default 'miINTERFACE' > --span If specified, mirror all ports (optional) > + --filter Set a preselection filter > """ % {'prog': sys.argv[0]}) > sys.exit(0) > > @@ -349,7 +350,7 @@ class OVSDB(object): > return result > > def bridge_mirror(self, intf_name, mirror_intf_name, br_name, > - mirror_select_all=False): > + mirror_select_all=False, mirror_filter=None): > > txn = self._start_txn() > mirror = txn.insert(self.get_table('Mirror')) > @@ -357,6 +358,9 @@ class OVSDB(object): > > mirror.select_all = mirror_select_all > > + if mirror_filter is not None: > + mirror.filter = mirror_filter > + > mirrored_port = self._find_row_by_name('Port', intf_name) > > mirror.verify('select_dst_port') > @@ -422,6 +426,7 @@ def main(): > mirror_interface = None > mirror_select_all = False > dump_cmd = 'tcpdump' > + fltr = None mirror_filter for readability. > > for cur, nxt in argv_tuples(sys.argv[1:]): > if skip_next: > @@ -451,6 +456,10 @@ def main(): > elif cur in ['--span']: > mirror_select_all = True > continue > + elif cur in ['--filter']: > + fltr = nxt > + skip_next = True > + continue > tcpdargs.append(cur) > > if interface is None: > @@ -500,7 +509,7 @@ def main(): > ovsdb.make_port(mirror_interface, ovsdb.port_bridge(interface)) > ovsdb.bridge_mirror(interface, mirror_interface, > ovsdb.port_bridge(interface), > - mirror_select_all) > + mirror_select_all, mirror_filter=fltr) > except OVSDBException as oe: > print("ERROR: Unable to properly setup the mirror: %s." % str(oe)) > try: > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index 25ce45e3d..081b61526 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -5106,6 +5106,8 @@ mirror_configure(struct mirror *m) > s.snaplen = 0; > } > > + s.filter = cfg->filter; I am not familiar with this code. I prefer to ask. Is it safe to only reference the ovsrec content? I think it is in this case, since mirror_set will convert this information into a flow / mask. > + > /* Get port selection. */ > if (cfg->select_all) { > size_t n_ports = hmap_count(&m->bridge->ports); > diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema > index 4873cfde7..d77b5100b 100644 > --- a/vswitchd/vswitch.ovsschema > +++ b/vswitchd/vswitch.ovsschema > @@ -1,6 +1,6 @@ > {"name": "Open_vSwitch", > "version": "8.3.0", > - "cksum": "3781850481 26690", > + "cksum": "3953232702 26737", > "tables": { > "Open_vSwitch": { > "columns": { > @@ -460,6 +460,8 @@ > "type": {"key": "string", "value": "integer", > "min": 0, "max": "unlimited"}, > "ephemeral": true}, > + "filter": { > + "type": "string"}, > "external_ids": { > "type": {"key": "string", "value": "string", > "min": 0, "max": "unlimited"}}}}, > diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml > index 36388e3c4..39b1b5ce6 100644 > --- a/vswitchd/vswitch.xml > +++ b/vswitchd/vswitch.xml > @@ -5102,6 +5102,12 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 > type=patch options:peer=p1 \ > are not truncated. > </p> > </column> > + <column name="filter"> > + <p>Filter to apply to mirror.</p> > + <p>Flows that match <ref column="filter"/> will flow through this > + mirror, flows that do not match will be ignored. > + </p> > + </column> > </group> > > <group title="Statistics: Mirror counters"> > -- > 2.31.1 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > -- David Marchand _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev