This patch introduces a new API to the offload provider framework that
allows hardware offload implementations to control hash value calculation
for the OVS_ACTION_ATTR_HASH action.
Background and Motivation
==========================
The OVS hash action (OVS_ACTION_ATTR_HASH) is used to compute a hash value
from packet header fields, primarily for load balancing across multiple
paths using the select group action. The hash value is stored in the
packet's metadata and used by subsequent actions to distribute flows
across multiple output ports.
However, hardware offload implementations may require different approaches
to hash calculation:
1. Hardware NICs may use different hash functions or hash inputs than
the software datapath, which can lead to inconsistent load distribution
when mixing hardware and software paths.
2. Some hardware may support enhanced hashing mechanisms (e.g., using
symmetric hashing for bidirectional flows or hardware-specific hash
engines) that provide better load distribution than the default
software implementation.
Design
======
This patch adds a new optional callback to the dpif_offload_class:
bool (*netdev_get_dp_hash)(const struct dpif_offload *,
const struct netdev *ingress_netdev,
struct dp_packet *,
const struct ovs_action_hash *, uint32_t *hash);
The callback is invoked during hash action execution when hardware offload
is enabled and the original ingress port is known. It receives:
- ingress_netdev: The original ingress port where the packet was received
- packet: The packet to be hashed
- hash_action: The hash action parameters including algorithm and basis
If the provider implements this callback and returns true, the returned
hash value is used. Otherwise, OVS falls back to the standard hash
calculation.
NOTE: This patch needs to be applied on top of the patch series below:
https://patchwork.ozlabs.org/project/openvswitch/list/?series=491639
https://patchwork.ozlabs.org/project/openvswitch/list/?series=493971
Signed-off-by: Eelco Chaudron <[email protected]>
---
Changes from v2 -> v3:
- Remove the const qualifier for the packet in the API.
- Updated depending patches.
---
lib/dpif-netdev.c | 102 ++++++++++++++++++++++++++++++++----
lib/dpif-offload-dummy.c | 98 ++++++++++++++++++++++++++++++++++
lib/dpif-offload-provider.h | 12 +++++
lib/dpif-offload.c | 19 +++++++
lib/dpif-offload.h | 4 ++
lib/dpif.c | 7 ++-
lib/odp-execute.c | 15 ++++--
lib/odp-execute.h | 2 +-
tests/dpif-netdev.at | 47 +++++++++++++++++
9 files changed, 291 insertions(+), 15 deletions(-)
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 9384edcd96..c12bd2ff14 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -8514,7 +8514,28 @@ dp_execute_lb_output_action(struct dp_netdev_pmd_thread
*pmd,
}
}
-static void
+static inline bool
+dp_hw_offload_hash(struct dp_netdev_pmd_thread *pmd,
+ const struct ovs_action_hash *hash_act,
+ struct dp_packet *packet, odp_port_t *cached_in_odpp,
+ struct netdev **cached_in_netdev)
+{
+ if (*cached_in_odpp != packet->md.orig_in_port || !*cached_in_netdev) {
+ struct tx_port *in_port = pmd_send_port_cache_lookup(
+ pmd, packet->md.orig_in_port);
+
+ if (!in_port) {
+ return false;
+ }
+ *cached_in_odpp = packet->md.orig_in_port;
+ *cached_in_netdev = in_port->port->netdev;
+ }
+
+ return dpif_offload_netdev_get_dp_hash(*cached_in_netdev, packet, hash_act,
+ &packet->md.dp_hash);
+}
+
+static bool
dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
const struct nlattr *a, bool should_steal)
OVS_NO_THREAD_SAFETY_ANALYSIS
@@ -8531,12 +8552,12 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
*packets_,
case OVS_ACTION_ATTR_OUTPUT:
dp_execute_output_action(pmd, packets_, should_steal,
nl_attr_get_odp_port(a));
- return;
+ return true;
case OVS_ACTION_ATTR_LB_OUTPUT:
dp_execute_lb_output_action(pmd, packets_, should_steal,
nl_attr_get_u32(a));
- return;
+ return true;
case OVS_ACTION_ATTR_TUNNEL_PUSH:
if (should_steal) {
@@ -8552,7 +8573,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
*packets_,
COVERAGE_ADD(datapath_drop_tunnel_push_error,
packet_count);
}
- return;
+ return true;
case OVS_ACTION_ATTR_TUNNEL_POP:
if (*depth < MAX_RECIRC_DEPTH) {
@@ -8580,7 +8601,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
*packets_,
packets_dropped);
}
if (dp_packet_batch_is_empty(packets_)) {
- return;
+ return true;
}
struct dp_packet *packet;
@@ -8591,7 +8612,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
*packets_,
(*depth)++;
dp_netdev_recirculate(pmd, packets_);
(*depth)--;
- return;
+ return true;
}
COVERAGE_ADD(datapath_drop_invalid_tnl_port,
dp_packet_batch_size(packets_));
@@ -8640,7 +8661,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
*packets_,
ofpbuf_uninit(&actions);
fat_rwlock_unlock(&dp->upcall_rwlock);
- return;
+ return true;
}
COVERAGE_ADD(datapath_drop_lock_error,
dp_packet_batch_size(packets_));
@@ -8664,7 +8685,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
*packets_,
dp_netdev_recirculate(pmd, packets_);
(*depth)--;
- return;
+ return true;
}
COVERAGE_ADD(datapath_drop_recirc_error,
@@ -8816,6 +8837,69 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
*packets_,
pmd->ctx.now / 1000);
break;
+ case OVS_ACTION_ATTR_HASH: {
+ const struct ovs_action_hash *hash_act = nl_attr_get(a);
+ struct dp_packet *packet;
+ struct netdev *cached_in_netdev = NULL;
+ odp_port_t cached_in_odpp = ODPP_NONE;
+
+ if (!dp->offload_enabled) {
+ return false;
+ }
+
+ /* This is a hardware offload-enhanced version of similar code
+ * executed for OVS_ACTION_ATTR_HASH in odp_execute_actions(). */
+ switch (hash_act->hash_alg) {
+ case OVS_HASH_ALG_L4: {
+ struct flow flow;
+ uint32_t hash;
+
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
+ /* RSS hash can be used here instead of 5tuple for
+ * performance reasons. */
+ if (dp_hw_offload_hash(pmd, hash_act, packet,
+ &cached_in_odpp,
+ &cached_in_netdev)) {
+ continue;
+ }
+
+ if (dp_packet_rss_valid(packet)) {
+ hash = dp_packet_get_rss_hash(packet);
+ hash = hash_int(hash, hash_act->hash_basis);
+ } else {
+ flow_extract(packet, &flow);
+ hash = flow_hash_5tuple(&flow, hash_act->hash_basis);
+ }
+ packet->md.dp_hash = hash;
+ }
+ break;
+ }
+ case OVS_HASH_ALG_SYM_L4: {
+ struct flow flow;
+ uint32_t hash;
+
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
+ if (dp_hw_offload_hash(pmd, hash_act, packet,
+ &cached_in_odpp,
+ &cached_in_netdev)) {
+ continue;
+ }
+
+ flow_extract(packet, &flow);
+ hash = flow_hash_symmetric_l3l4(&flow,
+ hash_act->hash_basis,
+ false);
+ packet->md.dp_hash = hash;
+ }
+ break;
+ }
+ default:
+ /* Assert on unknown hash algorithm. */
+ OVS_NOT_REACHED();
+ }
+ return true;
+ }
+
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_MPLS:
@@ -8823,7 +8907,6 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
*packets_,
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SET_MASKED:
case OVS_ACTION_ATTR_SAMPLE:
- case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_UNSPEC:
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_PUSH_ETH:
@@ -8842,6 +8925,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch
*packets_,
}
dp_packet_delete_batch(packets_, should_steal);
+ return true;
}
static void
diff --git a/lib/dpif-offload-dummy.c b/lib/dpif-offload-dummy.c
index 118e0471e4..f1c087bdbd 100644
--- a/lib/dpif-offload-dummy.c
+++ b/lib/dpif-offload-dummy.c
@@ -643,6 +643,103 @@ dummy_offload_udp_tnl_get_src_port(
return true;
}
+static inline uint32_t
+fnv1a_add(uint32_t hash, uint32_t word)
+{
+ const uint32_t prime = 16777619U;
+ const uint8_t *data = (const uint8_t *) &word;
+
+ for (size_t i = 0; i < sizeof(uint32_t); i++) {
+ hash ^= data[i];
+ hash *= prime;
+ }
+ return hash;
+}
+
+/* Add a 64-bit word to FNV-1a hash. */
+static inline uint32_t
+fnv1a_add64(uint32_t hash, uint64_t word)
+{
+ return fnv1a_add(fnv1a_add(hash, word), word >> 32);
+}
+
+static uint32_t
+fnv1a_init(uint32_t seed)
+{
+ const uint32_t fnv_offset_basis = 2166136261U;
+
+ return seed ? fnv1a_add(fnv_offset_basis, seed) : fnv_offset_basis;
+}
+
+static bool
+get_l4_sym_hash(struct flow *flow, uint32_t seed, uint32_t *hash_)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ uint32_t hash = fnv1a_init(seed);
+
+ if (flow->dl_type == htons(ETH_TYPE_IP)) {
+ hash = fnv1a_add(hash,
+ (OVS_FORCE uint32_t) (flow->nw_src ^ flow->nw_dst));
+ } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
+ /* IPv6 addresses are 64-bit aligned inside struct flow. */
+ const uint64_t *a = ALIGNED_CAST(uint64_t *, flow->ipv6_src.s6_addr);
+ const uint64_t *b = ALIGNED_CAST(uint64_t *, flow->ipv6_dst.s6_addr);
+
+ for (int i = 0; i < sizeof flow->ipv6_src / sizeof *a; i++) {
+ hash = fnv1a_add64(hash, a[i] ^ b[i]);
+ }
+ } else {
+ return false;
+ }
+
+ hash = fnv1a_add(hash, flow->nw_proto);
+ if (!(flow->nw_frag & FLOW_NW_FRAG_MASK)
+ && (flow->nw_proto == IPPROTO_TCP
+ || flow->nw_proto == IPPROTO_SCTP
+ || flow->nw_proto == IPPROTO_UDP)) {
+ hash = fnv1a_add(hash,
+ (OVS_FORCE uint16_t)(flow->tp_src ^ flow->tp_dst));
+ }
+
+ ds_put_format(&ds, "l4_sym_hash: %8.8x for packet: ", hash);
+ flow_format(&ds, flow, NULL);
+ VLOG_DBG("%s", ds_cstr(&ds));
+ ds_destroy(&ds);
+
+ *hash_ = hash;
+ return true;
+}
+
+static bool
+dummy_offload_get_dp_hash(const struct dpif_offload *offload OVS_UNUSED,
+ const struct netdev *ingress_netdev OVS_UNUSED,
+ struct dp_packet *packet,
+ const struct ovs_action_hash *hash_act,
+ uint32_t *hash)
+{
+ switch ((enum ovs_hash_alg) hash_act->hash_alg) {
+ case OVS_HASH_ALG_L4:
+ case OVS_HASH_ALG_SYM_L4: {
+ /* For our implementation we will just use a symmetric L4 hash.
+ * Note that we will use our own hash that will give consistent results
+ * across all platforms/compilers. */
+ struct flow flow;
+
+ flow_extract(packet, &flow);
+ if (!get_l4_sym_hash(&flow, hash_act->hash_basis, hash)) {
+ return false;
+ }
+ break;
+ }
+
+ case __OVS_HASH_MAX:
+ default:
+ OVS_NOT_REACHED();
+ }
+
+ return true;
+}
+
static bool
dummy_offload_are_all_actions_supported(const struct dpif_offload *offload_,
odp_port_t in_odp,
@@ -1189,6 +1286,7 @@ dummy_pmd_thread_lifecycle(const struct dpif_offload
*dpif_offload,
.get_netdev = dummy_offload_get_netdev, \
.netdev_hw_post_process = dummy_offload_hw_post_process, \
.netdev_udp_tnl_get_src_port = dummy_offload_udp_tnl_get_src_port, \
+ .netdev_get_dp_hash = dummy_offload_get_dp_hash, \
.netdev_flow_put = dummy_flow_put, \
.netdev_flow_del = dummy_flow_del, \
.netdev_flow_stats = dummy_flow_stats, \
diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h
index 36d07190db..420cb982ef 100644
--- a/lib/dpif-offload-provider.h
+++ b/lib/dpif-offload-provider.h
@@ -292,6 +292,18 @@ struct dpif_offload_class {
struct dp_packet *packet,
ovs_be16 *src_port);
+ /* Allows the offload provider to override the default dp-hash calculation.
+ * Called during packet processing to determine the hash value for datapath
+ * operations (e.g., load balancing, packet distribution).
+ *
+ * If implemented, should return true and set 'hash' to the desired hash
+ * value. If not implemented or if default behavior is desired, should
+ * return false to use the standard hash calculation. */
+ bool (*netdev_get_dp_hash)(const struct dpif_offload *,
+ const struct netdev *ingress_netdev,
+ struct dp_packet *,
+ const struct ovs_action_hash *, uint32_t *hash);
+
/* Add or modify the specified flow directly in the offload datapath.
* The actual implementation may choose to handle the offload
* asynchronously by returning EINPROGRESS and invoking the supplied
diff --git a/lib/dpif-offload.c b/lib/dpif-offload.c
index d004cb1089..e803556db2 100644
--- a/lib/dpif-offload.c
+++ b/lib/dpif-offload.c
@@ -1484,6 +1484,25 @@ dpif_offload_netdev_udp_tnl_get_src_port(const struct
netdev *ingress_netdev,
packet, src_port);
}
+bool
+dpif_offload_netdev_get_dp_hash(const struct netdev *ingress_netdev,
+ struct dp_packet *packet,
+ const struct ovs_action_hash *hash_action,
+ uint32_t *hash)
+{
+ const struct dpif_offload *offload;
+
+ offload = ovsrcu_get(const struct dpif_offload *,
+ &ingress_netdev->dpif_offload);
+
+ if (OVS_UNLIKELY(!offload) || !offload->class->netdev_get_dp_hash) {
+ return false;
+ }
+
+ return offload->class->netdev_get_dp_hash(offload, ingress_netdev, packet,
+ hash_action, hash);
+}
+
void
dpif_offload_datapath_register_flow_unreference_cb(
struct dpif *dpif, dpif_offload_flow_unreference_cb *cb)
diff --git a/lib/dpif-offload.h b/lib/dpif-offload.h
index 1b778168e6..f3b6853cde 100644
--- a/lib/dpif-offload.h
+++ b/lib/dpif-offload.h
@@ -117,6 +117,10 @@ int dpif_offload_netdev_hw_post_process(struct netdev *,
unsigned pmd_id,
bool dpif_offload_netdev_udp_tnl_get_src_port(const struct netdev *,
struct dp_packet *,
ovs_be16 *src_port);
+bool dpif_offload_netdev_get_dp_hash(const struct netdev *,
+ struct dp_packet *,
+ const struct ovs_action_hash *,
+ uint32_t *hash);
/* Callback invoked when a hardware flow offload operation (put/del) completes.
diff --git a/lib/dpif.c b/lib/dpif.c
index cab5884254..a4b26964b3 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1182,7 +1182,7 @@ struct dpif_execute_helper_aux {
/* This is called for actions that need the context of the datapath to be
* meaningful. */
-static void
+static bool
dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
const struct nlattr *action, bool should_steal)
{
@@ -1264,6 +1264,10 @@ dpif_execute_helper_cb(void *aux_, struct
dp_packet_batch *packets_,
}
case OVS_ACTION_ATTR_HASH:
+ /* For this action, no dp-specific handling is needed; fall back to
+ * the default software handling. */
+ return false;
+
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_MPLS:
@@ -1286,6 +1290,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch
*packets_,
OVS_NOT_REACHED();
}
dp_packet_delete_batch(packets_, should_steal);
+ return true;
}
/* Executes 'execute' by performing most of the actions in userspace and
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index ecbda8c010..356b22294f 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -862,13 +862,13 @@ requires_datapath_assistance(const struct nlattr *a)
case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_METER:
case OVS_ACTION_ATTR_PSAMPLE:
+ case OVS_ACTION_ATTR_HASH:
return true;
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SET_MASKED:
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
- case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_PUSH_MPLS:
case OVS_ACTION_ATTR_POP_MPLS:
case OVS_ACTION_ATTR_TRUNC:
@@ -1066,14 +1066,17 @@ odp_execute_actions(void *dp, struct dp_packet_batch
*batch, bool steal,
bool last_action = (left <= NLA_ALIGN(a->nla_len));
if (requires_datapath_assistance(a)) {
+ bool handled = true;
+
if (dp_execute_action) {
/* Allow 'dp_execute_action' to steal the packet data if we do
* not need it any more. */
bool should_steal = steal && last_action;
- dp_execute_action(dp, batch, a, should_steal);
+ handled = dp_execute_action(dp, batch, a, should_steal);
- if (last_action || dp_packet_batch_is_empty(batch)) {
+ if (handled
+ && (last_action || dp_packet_batch_is_empty(batch))) {
/* We do not need to free the packets.
* Either dp_execute_actions() has stolen them
* or the batch is freed due to errors. In either
@@ -1082,7 +1085,11 @@ odp_execute_actions(void *dp, struct dp_packet_batch
*batch, bool steal,
return;
}
}
- continue;
+ if (handled) {
+ continue;
+ }
+ /* If not handled by the datapath for some specific reason,
+ * fall through to the non-datapath-assisted handling. */
}
/* If type is set in the active actions implementation, call the
diff --git a/lib/odp-execute.h b/lib/odp-execute.h
index 2ba1ec5d2a..ccf1e2e064 100644
--- a/lib/odp-execute.h
+++ b/lib/odp-execute.h
@@ -35,7 +35,7 @@ void odp_execute_init(void);
typedef void (*odp_execute_action_cb)(struct dp_packet_batch *batch,
const struct nlattr *action);
-typedef void (*odp_execute_cb)(void *dp, struct dp_packet_batch *batch,
+typedef bool (*odp_execute_cb)(void *dp, struct dp_packet_batch *batch,
const struct nlattr *action, bool should_steal);
/* Actions that need to be executed in the context of a datapath are handed
diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at
index 5b6c961f90..447927ae14 100644
--- a/tests/dpif-netdev.at
+++ b/tests/dpif-netdev.at
@@ -662,6 +662,53 @@
arp,in_port=ANY,dl_vlan=11,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:
DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP([dummy])
DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP([dummy-pmd])
+AT_SETUP([dpif-netdev - partial hw offload - dp_hash - dummy-pmd])
+OVS_VSWITCHD_START(
+ [add-port br0 p1 -- \
+ add-port br0 p2 -- \
+ set interface p1 type=dummy-pmd ofport_request=1 options:ifindex=1100 -- \
+ set interface p2 type=dummy-pmd ofport_request=2 options:ifindex=1200 \
+ options:tx_pcap=p2.pcap -- \
+ set bridge br0 datapath-type=dummy other-config:datapath-id=1234 \
+ fail-mode=secure], [], [], [--dummy-numa="0"])
+AT_CHECK([ovs-appctl vlog/set dpif_offload_dummy:file:dbg])
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-offload=true])
+OVS_WAIT_UNTIL([grep "Flow HW offload is enabled" ovs-vswitchd.log])
+
+AT_CHECK([ovs-ofctl -O OpenFlow12 add-group br0 \
+ 'group_id=123,type=select,bucket=output:p2,output:p2'])
+AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 'ip actions=group:123'])
+
+packet="in_port(1),"\
+"eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:1),eth_type(0x0800),"\
+"ipv4(src=192.168.1.1,dst=192.168.1.100,proto=6,tos=0,ttl=128,frag=no),"\
+"tcp(src=1000,dst=1000)"
+
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows -m | strip_hw_offload], [0], [dnl
+recirc_id(0),in_port(p1),packet_type(ns=0,id=0),eth(),eth_type(0x0800),ipv4(),tcp(src=1000/0,dst=1000/0),
packets:1, bytes:118, used:0.0s, offloaded:partial, dp:ovs,
actions:hash(sym_l4(0)),recirc(0x1)
+recirc_id(0x1),dp_hash(0x34a08190/0xf),in_port(p1),packet_type(ns=0,id=0),eth(),eth_type(0x0800),ipv4(),tcp(src=1000/0,dst=1000/0),
packets:1, bytes:118, used:0.0s, offloaded:yes, dp:dummy, actions:p2
+])
+
+OVS_WAIT_UNTIL([grep "l4_sym_hash: 34a08190 for packet: tcp,in_port=1," \
+ ovs-vswitchd.log])
+
+AT_CHECK(
+ [ovs-appctl --format json dpif/offload/show \
+ | sed 's/.*"p1":{\([[^}]]*\)}.*/\1/; s/,/\n/g; s/"//g' \
+ | sed -n '/^rx_offload_/p' | sort], [0], [dnl
+rx_offload_full:0
+rx_offload_miss:1
+rx_offload_partial:1
+rx_offload_pipe_abort:0
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([dpif-netdev - full hw offload - dummy-pmd])
OVS_VSWITCHD_START(
[add-port br0 p1 -- \
--
2.52.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev