Hi Pieter, nice to hear from you :)
On Tue, Feb 22, 2022 at 12:17:40PM +0000, [email protected] wrote: > From: Pieter Jansen van Vuuren <[email protected]> > > Previously when making use of the tc datapath to achieve decrement ttl, > we would install a filter that matches on the ttl/hoplimit field and use > a pedit set action to set the ttl/hoplimit to one less than the match > value. This results in a filter for each unique ttl value we want to > decrement. > > This patch introduces true decrement ttl which makes use of the pedit add > action. Adding 0xff is equivalent to decrementing the ttl/hoplimit > field. This also improves the hardware offload case by reducing the > number of filters required to support decrement ttl. In order to utilise > this, the following config option needs to be set to true. I'd be interested to understand the specific motivation for wanting to reduce flows. But I agree that in the general case it seems nice. One thing that I am curious about is the approach you have taken, which seems to be to patch pedit commands. But I am wondering if you considered plumbing the OpenFlow DEC_TTL command more directly into the ODP layer - perhaps hinging on the handling of OFPACT_DEC_TTL in do_xlate_actions(). > ovs-vsctl set Open_vSwitch . other_config:tc-pedit-add=true > Signed-off-by: Pieter Jansen van Vuuren <[email protected]> > Reviewed-by: Alejandro Lucero Palau <[email protected]> > Reviewed-by: Martin Habets <[email protected]> > --- > .../linux/compat/include/linux/openvswitch.h | 1 + > lib/dpif-netdev.c | 1 + > lib/dpif.c | 1 + > lib/netdev-offload-tc.c | 16 ++++++++- > lib/netdev-offload.c | 12 +++++++ > lib/netdev-offload.h | 1 + > lib/odp-execute.c | 2 ++ > lib/odp-util.c | 4 +++ > lib/tc.c | 35 +++++++++++++++++-- > lib/tc.h | 1 + > ofproto/ofproto-dpif-ipfix.c | 1 + > ofproto/ofproto-dpif-sflow.c | 1 + > vswitchd/vswitch.xml | 15 ++++++++ > 13 files changed, 87 insertions(+), 4 deletions(-) > > diff --git a/datapath/linux/compat/include/linux/openvswitch.h > b/datapath/linux/compat/include/linux/openvswitch.h > index 8d9300091..f7daffeb0 100644 > --- a/datapath/linux/compat/include/linux/openvswitch.h > +++ b/datapath/linux/compat/include/linux/openvswitch.h > @@ -1075,6 +1075,7 @@ enum ovs_action_attr { > OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */ > OVS_ACTION_ATTR_LB_OUTPUT, /* u32 bond-id. */ > #endif > + OVS_ACTION_ATTR_DEC_TTL, /* No argument. */ I'm not entirely sure, but I think that: * if this attribute is not used by the OVS kernel datapath then it should be inside the ifndef; otherwise * it should be above the ifndef Perhaps there is precedence, but it would strike me as unusual to implement a feature in the TC offload datapath but not in the OVS kernel datapath. > __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted > * from userspace. */ > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 9f35713ef..6343da93b 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -9067,6 +9067,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > *packets_, > case OVS_ACTION_ATTR_PUSH_ETH: > case OVS_ACTION_ATTR_POP_ETH: > case OVS_ACTION_ATTR_CLONE: > + case OVS_ACTION_ATTR_DEC_TTL: > case OVS_ACTION_ATTR_PUSH_NSH: > case OVS_ACTION_ATTR_POP_NSH: > case OVS_ACTION_ATTR_CT_CLEAR: > diff --git a/lib/dpif.c b/lib/dpif.c > index 40f5fe446..6ac5e3581 100644 > --- a/lib/dpif.c > +++ b/lib/dpif.c > @@ -1265,6 +1265,7 @@ dpif_execute_helper_cb(void *aux_, struct > dp_packet_batch *packets_, > case OVS_ACTION_ATTR_POP_VLAN: > case OVS_ACTION_ATTR_PUSH_MPLS: > case OVS_ACTION_ATTR_POP_MPLS: > + case OVS_ACTION_ATTR_DEC_TTL: > case OVS_ACTION_ATTR_SET: > case OVS_ACTION_ATTR_SET_MASKED: > case OVS_ACTION_ATTR_SAMPLE: > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c > index 9845e8d3f..878d63d37 100644 > --- a/lib/netdev-offload-tc.c > +++ b/lib/netdev-offload-tc.c > @@ -18,6 +18,7 @@ > > #include <errno.h> > #include <linux/if_ether.h> > +#include <linux/tc_act/tc_pedit.h> > > #include "dpif.h" > #include "hash.h" > @@ -877,7 +878,20 @@ parse_tc_flower_to_match(struct tc_flower *flower, > } > break; > case TC_ACT_PEDIT: { > - parse_flower_rewrite_to_netlink_action(buf, flower); > + if (action->pedit_type == TCA_PEDIT_KEY_EX_CMD_ADD) { > + if (flower->rewrite.key.eth_type == htons(ETH_TYPE_IP) && > + flower->rewrite.mask.ipv4.rewrite_ttl == 0xff && > + flower->rewrite.key.ipv4.rewrite_ttl == 0xff) { > + nl_msg_put_flag(buf, OVS_ACTION_ATTR_DEC_TTL); > + } > + if (flower->rewrite.key.eth_type == htons(ETH_TYPE_IPV6) > && > + flower->rewrite.mask.ipv6.rewrite_hlimit == 0xff && > + flower->rewrite.key.ipv6.rewrite_hlimit == 0xff) { > + nl_msg_put_flag(buf, OVS_ACTION_ATTR_DEC_TTL); > + } > + } else { > + parse_flower_rewrite_to_netlink_action(buf, flower); > + } > } > break; > case TC_ACT_ENCAP: { > diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c > index fb108c0d5..c7551dd97 100644 > --- a/lib/netdev-offload.c > +++ b/lib/netdev-offload.c > @@ -765,6 +765,14 @@ netdev_is_offload_rebalance_policy_enabled(void) > return netdev_offload_rebalance_policy; > } > > +static bool netdev_offload_tc_pedit_add = false; > + > +bool > +netdev_is_tc_pedit_add_enabled(void) > +{ > + return netdev_offload_tc_pedit_add; > +} > + > static void > netdev_ports_flow_init(void) > { > @@ -780,6 +788,10 @@ netdev_ports_flow_init(void) > void > netdev_set_flow_api_enabled(const struct smap *ovs_other_config) > { > + if (smap_get_bool(ovs_other_config, "tc-pedit-add", false)) { > + netdev_offload_tc_pedit_add = true; > + } > + > if (smap_get_bool(ovs_other_config, "hw-offload", false)) { > static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; > > diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h > index 8237a85dd..1d38fe338 100644 > --- a/lib/netdev-offload.h > +++ b/lib/netdev-offload.h > @@ -126,6 +126,7 @@ bool netdev_any_oor(void); > bool netdev_is_flow_api_enabled(void); > void netdev_set_flow_api_enabled(const struct smap *ovs_other_config); > bool netdev_is_offload_rebalance_policy_enabled(void); > +bool netdev_is_tc_pedit_add_enabled(void); > int netdev_flow_get_n_flows(struct netdev *netdev, uint64_t *n_flows); > > struct dpif_port; > diff --git a/lib/odp-execute.c b/lib/odp-execute.c > index 2f4cdd92c..026d6fb0e 100644 > --- a/lib/odp-execute.c > +++ b/lib/odp-execute.c > @@ -821,6 +821,7 @@ requires_datapath_assistance(const struct nlattr *a) > case OVS_ACTION_ATTR_CHECK_PKT_LEN: > case OVS_ACTION_ATTR_ADD_MPLS: > case OVS_ACTION_ATTR_DROP: > + case OVS_ACTION_ATTR_DEC_TTL: > return false; > > case OVS_ACTION_ATTR_UNSPEC: > @@ -1001,6 +1002,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch > *batch, bool steal, > } > break; > case OVS_ACTION_ATTR_METER: > + case OVS_ACTION_ATTR_DEC_TTL: > /* Not implemented yet. */ > break; > case OVS_ACTION_ATTR_PUSH_ETH: { > diff --git a/lib/odp-util.c b/lib/odp-util.c > index 9a705cffa..abbf649c1 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -131,6 +131,7 @@ odp_action_len(uint16_t type) > case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16); > case OVS_ACTION_ATTR_RECIRC: return sizeof(uint32_t); > case OVS_ACTION_ATTR_HASH: return sizeof(struct ovs_action_hash); > + case OVS_ACTION_ATTR_DEC_TTL: return ATTR_LEN_VARIABLE; > case OVS_ACTION_ATTR_SET: return ATTR_LEN_VARIABLE; > case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE; > case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE; > @@ -1162,6 +1163,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a, > case OVS_ACTION_ATTR_HASH: > format_odp_hash_action(ds, nl_attr_get(a)); > break; > + case OVS_ACTION_ATTR_DEC_TTL: > + ds_put_cstr(ds, "dec_ttl"); > + break; > case OVS_ACTION_ATTR_SET_MASKED: > a = nl_attr_get(a); > /* OVS_KEY_ATTR_NSH is nested attribute, so it needs special process > */ > diff --git a/lib/tc.c b/lib/tc.c > index adb2d3182..01bb4c643 100644 > --- a/lib/tc.c > +++ b/lib/tc.c > @@ -36,6 +36,7 @@ > #include <unistd.h> > > #include "byte-order.h" > +#include "netdev-offload.h" > #include "netlink-socket.h" > #include "netlink.h" > #include "openvswitch/ofpbuf.h" > @@ -1010,12 +1011,13 @@ nl_parse_act_pedit(struct nlattr *options, struct > tc_flower *flower) > struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)]; > const struct tc_pedit *pe; > const struct tc_pedit_key *keys; > - const struct nlattr *nla, *keys_ex, *ex_type; > + const struct nlattr *nla, *keys_ex, *ex_type, *ex_cmd; > const void *keys_attr; > char *rewrite_key = (void *) &flower->rewrite.key; > char *rewrite_mask = (void *) &flower->rewrite.mask; > size_t keys_ex_size, left; > int type, i = 0, err; > + uint16_t pedit_type; > > if (!nl_parse_nested(options, pedit_policy, pe_attrs, > ARRAY_SIZE(pedit_policy))) { > @@ -1043,6 +1045,10 @@ nl_parse_act_pedit(struct nlattr *options, struct > tc_flower *flower) > ex_type = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_HTYPE); > type = nl_attr_get_u16(ex_type); > > + ex_cmd = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_CMD); > + pedit_type = nl_attr_get_u16(ex_cmd); > + flower->actions[flower->action_count].pedit_type = pedit_type; > + Is there a case where TCA_PEDIT_KEY_EX_CMD will not be present? If so, does the above deal with such a case? > err = csum_update_flag(flower, type); > if (err) { > return err; > @@ -2473,7 +2479,8 @@ csum_update_flag(struct tc_flower *flower, > > static int > nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, > - struct tc_flower *flower) > + struct tc_flower *flower, > + struct tc_action *action) > { > struct { > struct tc_pedit sel; > @@ -2484,6 +2491,7 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, > .nkeys = 0 > } > }; > + uint8_t tr_ttl; nit: I think that this declaration can move inside the block where tr_ttl is used. > int i, j, err; > > for (i = 0; i < ARRAY_SIZE(flower_pedit_map); i++) { > @@ -2529,6 +2537,26 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf > *request, > pedit_key->mask = ~mask_word; > pedit_key->val = data_word & mask_word; > sel.sel.nkeys++; > + tr_ttl = (flower->key.ip_ttl & flower->mask.ip_ttl) - 1; > + if (netdev_is_tc_pedit_add_enabled()) { > + /* rewrite ttl action */ > + if (pedit_key_ex->htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 && > + pedit_key->val == tr_ttl && > + pedit_key->off == 8) { > + pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_ADD; > + pedit_key->val = 0xff; > + flower->mask.ip_ttl = 0; > + action->pedit_type = TCA_PEDIT_KEY_EX_CMD_ADD; > + } > + if (pedit_key_ex->htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 && > + pedit_key->val == tr_ttl << 24 && > + pedit_key->off == 4) { > + pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_ADD; > + pedit_key->val = 0xff << 24; > + flower->mask.ip_ttl = 0; > + action->pedit_type = TCA_PEDIT_KEY_EX_CMD_ADD; > + } > + } Perhaps this problem doesn't arise, but I'm somewhat confused about how the above differentiates between the following two cases described in OpenFlow: 1. match on ip & ttl==7; set ttl to 6 2. match on ip; dec_ttl > > err = csum_update_flag(flower, m->htype); > if (err) { > @@ -2575,7 +2603,8 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct > tc_flower *flower) > switch (action->type) { > case TC_ACT_PEDIT: { > act_offset = nl_msg_start_nested(request, act_index++); > - error = nl_msg_put_flower_rewrite_pedits(request, flower); > + error = nl_msg_put_flower_rewrite_pedits(request, flower, > + action); > if (error) { > return error; > } > diff --git a/lib/tc.h b/lib/tc.h > index a147ca461..f33870e76 100644 > --- a/lib/tc.h > +++ b/lib/tc.h > @@ -259,6 +259,7 @@ struct tc_action { > }; > > enum tc_action_type type; > + int pedit_type; > }; > > enum tc_offloaded_state { > diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c > index 9280e008e..3c2af4891 100644 > --- a/ofproto/ofproto-dpif-ipfix.c > +++ b/ofproto/ofproto-dpif-ipfix.c > @@ -3007,6 +3007,7 @@ dpif_ipfix_read_actions(const struct flow *flow, > case OVS_ACTION_ATTR_METER: > case OVS_ACTION_ATTR_SET_MASKED: > case OVS_ACTION_ATTR_SET: > + case OVS_ACTION_ATTR_DEC_TTL: > case OVS_ACTION_ATTR_PUSH_VLAN: > case OVS_ACTION_ATTR_POP_VLAN: > case OVS_ACTION_ATTR_PUSH_MPLS: > diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c > index 30e7caf54..95c1989f1 100644 > --- a/ofproto/ofproto-dpif-sflow.c > +++ b/ofproto/ofproto-dpif-sflow.c > @@ -1221,6 +1221,7 @@ dpif_sflow_read_actions(const struct flow *flow, > } > break; > case OVS_ACTION_ATTR_SAMPLE: > + case OVS_ACTION_ATTR_DEC_TTL: > case OVS_ACTION_ATTR_PUSH_NSH: > case OVS_ACTION_ATTR_POP_NSH: > case OVS_ACTION_ATTR_UNSPEC: > diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml > index 0c6632617..5c1595b9e 100644 > --- a/vswitchd/vswitch.xml > +++ b/vswitchd/vswitch.xml > @@ -222,6 +222,21 @@ > </p> > </column> > > + <column name="other_config" key="tc-pedit-add" > + type='{"type": "boolean"}'> > + <p> > + Set this value to <code>true</code> to enable tc pedit add actions > + used for decrementing the ttl. > + </p> > + <p> > + The default value is <code>false</code>. > + </p> > + <p> > + This is only relevant if > + <ref column="other_config" key="hw-offload"/> is enabled. > + </p> > + </column> > + > <column name="other_config" key="hw-offload" > type='{"type": "boolean"}'> > <p> > -- > 2.17.1 > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
