On Tue, Jun 2, 2020 at 11:47 AM Eli Britstein <el...@mellanox.com> wrote: > > > On 6/1/2020 8:29 PM, Sriharsha Basavapatna wrote: > > On Mon, Jun 1, 2020 at 9:18 PM Eli Britstein <el...@mellanox.com> wrote: > >> > >> On 6/1/2020 6:15 PM, Sriharsha Basavapatna wrote: > >>> On Mon, Jun 1, 2020 at 7:58 PM Eli Britstein <el...@mellanox.com> wrote: > >>>> On 5/28/2020 11:19 AM, Sriharsha Basavapatna wrote: > >>>>> In this patch we check if action processing (apart from OUTPUT action), > >>>>> should be skipped for a given dp_netdev_flow. Specifically, we check if > >>>>> the action is TNL_PUSH and if it has been offloaded to HW, then we do > >>>>> not > >>>>> push the tunnel header in SW. The datapath only executes the OUTPUT > >>>>> action. > >>>>> The packet will be encapsulated in HW during transmit. > >>>>> > >>>>> Signed-off-by: Sriharsha Basavapatna > >>>>> <sriharsha.basavapa...@broadcom.com> > >>>>> --- > >>>>> lib/dpif-netdev.c | 46 > >>>>> ++++++++++++++++++++++++++++++++++++---------- > >>>>> 1 file changed, 36 insertions(+), 10 deletions(-) > >>>>> > >>>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > >>>>> index 781b233f4..3e175c25e 100644 > >>>>> --- a/lib/dpif-netdev.c > >>>>> +++ b/lib/dpif-netdev.c > >>>>> @@ -112,6 +112,7 @@ COVERAGE_DEFINE(datapath_drop_recirc_error); > >>>>> COVERAGE_DEFINE(datapath_drop_invalid_port); > >>>>> COVERAGE_DEFINE(datapath_drop_invalid_tnl_port); > >>>>> COVERAGE_DEFINE(datapath_drop_rx_invalid_packet); > >>>>> +COVERAGE_DEFINE(datapath_skip_tunnel_push); > >>>>> > >>>>> /* Protects against changes to 'dp_netdevs'. */ > >>>>> static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; > >>>>> @@ -538,6 +539,16 @@ struct dp_netdev_flow { > >>>>> bool dead; > >>>>> uint32_t mark; /* Unique flow mark assigned to a > >>>>> flow */ > >>>>> > >>>>> + /* The next two members are used to support partial offloading of > >>>>> + * actions. The boolean flag tells if this flow has its actions > >>>>> partially > >>>>> + * offloaded. The egress port# tells if the action should be > >>>>> offloaded > >>>>> + * on the egress (output) port instead of the in-port for the > >>>>> flow. Note > >>>>> + * that we support flows with a single egress port action. > >>>>> + * (see MAX_ACTION_ATTRS for related comments). > >>>>> + */ > >>>>> + bool partial_actions_offloaded; > >>>>> + odp_port_t egress_offload_port; > >>>>> + > >>>>> /* Statistics. */ > >>>>> struct dp_netdev_flow_stats stats; > >>>>> > >>>>> @@ -791,7 +802,8 @@ static void dp_netdev_execute_actions(struct > >>>>> dp_netdev_pmd_thread *pmd, > >>>>> bool should_steal, > >>>>> const struct flow *flow, > >>>>> const struct nlattr *actions, > >>>>> - size_t actions_len); > >>>>> + size_t actions_len, > >>>>> + const struct dp_netdev_flow > >>>>> *dp_flow); > >>>>> static void dp_netdev_input(struct dp_netdev_pmd_thread *, > >>>>> struct dp_packet_batch *, odp_port_t > >>>>> port_no); > >>>>> static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *, > >>>>> @@ -3828,7 +3840,7 @@ dpif_netdev_execute(struct dpif *dpif, struct > >>>>> dpif_execute *execute) > >>>>> dp_packet_batch_init_packet(&pp, execute->packet); > >>>>> pp.do_not_steal = true; > >>>>> dp_netdev_execute_actions(pmd, &pp, false, execute->flow, > >>>>> - execute->actions, execute->actions_len); > >>>>> + execute->actions, execute->actions_len, > >>>>> NULL); > >>>>> dp_netdev_pmd_flush_output_packets(pmd, true); > >>>>> > >>>>> if (pmd->core_id == NON_PMD_CORE_ID) { > >>>>> @@ -6456,7 +6468,7 @@ packet_batch_per_flow_execute(struct > >>>>> packet_batch_per_flow *batch, > >>>>> actions = dp_netdev_flow_get_actions(flow); > >>>>> > >>>>> dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow, > >>>>> - actions->actions, actions->size); > >>>>> + actions->actions, actions->size, flow); > >>>>> } > >>>>> > >>>>> static inline void > >>>>> @@ -6764,7 +6776,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread > >>>>> *pmd, > >>>>> * we'll send the packet up twice. */ > >>>>> dp_packet_batch_init_packet(&b, packet); > >>>>> dp_netdev_execute_actions(pmd, &b, true, &match.flow, > >>>>> - actions->data, actions->size); > >>>>> + actions->data, actions->size, NULL); > >>>>> > >>>>> add_actions = put_actions->size ? put_actions : actions; > >>>>> if (OVS_LIKELY(error != ENOSPC)) { > >>>>> @@ -6999,6 +7011,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread > >>>>> *pmd, > >>>>> struct dp_netdev_execute_aux { > >>>>> struct dp_netdev_pmd_thread *pmd; > >>>>> const struct flow *flow; > >>>>> + const struct dp_netdev_flow *dp_flow; /* for partial action > >>>>> offload */ > >>>>> }; > >>>>> > >>>>> static void > >>>>> @@ -7143,7 +7156,7 @@ dp_execute_userspace_action(struct > >>>>> dp_netdev_pmd_thread *pmd, > >>>>> if (!error || error == ENOSPC) { > >>>>> dp_packet_batch_init_packet(&b, packet); > >>>>> dp_netdev_execute_actions(pmd, &b, should_steal, flow, > >>>>> - actions->data, actions->size); > >>>>> + actions->data, actions->size, NULL); > >>>>> } else if (should_steal) { > >>>>> dp_packet_delete(packet); > >>>>> COVERAGE_INC(datapath_drop_userspace_action_error); > >>>>> @@ -7162,6 +7175,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > >>>>> *packets_, > >>>>> int type = nl_attr_type(a); > >>>>> struct tx_port *p; > >>>>> uint32_t packet_count, packets_dropped; > >>>>> + struct dp_netdev_flow *dp_flow = aux->dp_flow; > >>>>> > >>>>> switch ((enum ovs_action_attr)type) { > >>>>> case OVS_ACTION_ATTR_OUTPUT: > >>>>> @@ -7219,9 +7233,20 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > >>>>> *packets_, > >>>>> } > >>>>> dp_packet_batch_apply_cutlen(packets_); > >>>>> packet_count = dp_packet_batch_size(packets_); > >>>>> - if (push_tnl_action(pmd, a, packets_)) { > >>>>> - COVERAGE_ADD(datapath_drop_tunnel_push_error, > >>>>> - packet_count); > >>>>> + /* Execute tnl_push action in SW, if it is not offloaded as a > >>>>> partial > >>>>> + * action in HW. Otherwise, HW pushes the tunnel header during > >>>>> output > >>>>> + * processing. There's a small window here in which the > >>>>> offload thread > >>>>> + * offloads the flow, but the partial_actions_offloaded flag > >>>>> is still > >>>>> + * not set. In this case, as the packet is already > >>>>> encapsulated, it > >>>>> + * wouldn't match the offloaded flow and the action won't be > >>>>> executed > >>>>> + * in HW. > >>>> I have created a scenario in which it would hit. Just set the inner > >>>> properties as the outer. Of course it is not the common case, but it > >>>> takes only an example. > >>>> > >>>> Maybe add a logic not to offload if the matches would apply to the outer. > >>> The outer properties specifically outer mac addresses are resolved by > >>> OVS and then added to the header to be pushed. So I'm not sure how you > >>> created this scenario if I'm correctly understanding your comment. Can > >>> you please provide some details (example/commands that you used etc) > >>> to hit the condition ? More clarity on this would help me to add the > >>> additional logic to decline offload in this case. > >> I've just manually set the properties of the inner according to my > >> setup's outers. Regarding suggestion where to check, please see patch 5/5. > > Outer headers can't be the same as inner, unless it is some kind of > > misconfiguration. This misconfiguration could occur in full-offload > > and non-offload modes too, not just in partial offload mode, right ? > > It's like having duplicate addresses (address conflicts) in the > > network. Code to handle this condition might look more confusing ? > > There are facilities in the networking infrastructure to detect/report > > such conditions and I feel flow offload layer is not the right place > > to handle such issues. What do you think ? > > I think outer/inner might be the same. There is no restriction to do it > and it is not a wrong configuration. Those are separated networks. There > is currently no restriction in OVS to support it (SW, partial mark/rss, > full offloads). This patchset should not introduce such limitation, and > should handle it. I'm sorry for not being able to respond sooner. I tested this configuration with a Broadcom-NIC. There is no issue and the reason being the same as mentioned in the above comments. That is, even with the same inner/outer header (packet already encapsulated in SW), it doesn't match the offloaded rule and the packet gets transmitted properly (no double-encap). Thanks, -Harsha
> > > > > Thanks, > > -Harsha > >>> Thanks, > >>> -Harsha > >>>>> + */ > >>>>> + if (!dp_flow || !dp_flow->partial_actions_offloaded) { > >>>>> + if (push_tnl_action(pmd, a, packets_)) { > >>>>> + COVERAGE_ADD(datapath_drop_tunnel_push_error, > >>>>> packet_count); > >>>>> + } > >>>>> + } else { > >>>>> + COVERAGE_ADD(datapath_skip_tunnel_push, packet_count); > >>>>> } > >>>>> return; > >>>>> > >>>>> @@ -7509,9 +7534,10 @@ static void > >>>>> dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, > >>>>> struct dp_packet_batch *packets, > >>>>> bool should_steal, const struct flow > >>>>> *flow, > >>>>> - const struct nlattr *actions, size_t > >>>>> actions_len) > >>>>> + const struct nlattr *actions, size_t > >>>>> actions_len, > >>>>> + const struct dp_netdev_flow *dp_flow) > >>>>> { > >>>>> - struct dp_netdev_execute_aux aux = { pmd, flow }; > >>>>> + struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow }; > >>>>> > >>>>> odp_execute_actions(&aux, packets, should_steal, actions, > >>>>> actions_len, dp_execute_cb); _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev