Hi Pieter,

nice to hear from you :)

On Tue, Feb 22, 2022 at 12:17:40PM +0000, [email protected] 
wrote:
> From: Pieter Jansen van Vuuren <[email protected]>
> 
> Previously when making use of the tc datapath to achieve decrement ttl,
> we would install a filter that matches on the ttl/hoplimit field and use
> a pedit set action to set the ttl/hoplimit to one less than the match
> value. This results in a filter for each unique ttl value we want to
> decrement.
> 
> This patch introduces true decrement ttl which makes use of the pedit add
> action.  Adding 0xff is equivalent to decrementing the ttl/hoplimit
> field. This also improves the hardware offload case by reducing the
> number of filters required to support decrement ttl. In order to utilise
> this, the following config option needs to be set to true.

I'd be interested to understand the specific motivation for wanting
to reduce flows. But I agree that in the general case it seems nice.

One thing that I am curious about is the approach you have taken,
which seems to be to patch pedit commands. But I am wondering if you
considered plumbing the OpenFlow DEC_TTL command more directly into
the ODP layer - perhaps hinging on the handling of OFPACT_DEC_TTL in
do_xlate_actions().

> ovs-vsctl set Open_vSwitch . other_config:tc-pedit-add=true

> Signed-off-by: Pieter Jansen van Vuuren <[email protected]>
> Reviewed-by: Alejandro Lucero Palau <[email protected]>
> Reviewed-by: Martin Habets <[email protected]>
> ---
>  .../linux/compat/include/linux/openvswitch.h  |  1 +
>  lib/dpif-netdev.c                             |  1 +
>  lib/dpif.c                                    |  1 +
>  lib/netdev-offload-tc.c                       | 16 ++++++++-
>  lib/netdev-offload.c                          | 12 +++++++
>  lib/netdev-offload.h                          |  1 +
>  lib/odp-execute.c                             |  2 ++
>  lib/odp-util.c                                |  4 +++
>  lib/tc.c                                      | 35 +++++++++++++++++--
>  lib/tc.h                                      |  1 +
>  ofproto/ofproto-dpif-ipfix.c                  |  1 +
>  ofproto/ofproto-dpif-sflow.c                  |  1 +
>  vswitchd/vswitch.xml                          | 15 ++++++++
>  13 files changed, 87 insertions(+), 4 deletions(-)
> 
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
> b/datapath/linux/compat/include/linux/openvswitch.h
> index 8d9300091..f7daffeb0 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -1075,6 +1075,7 @@ enum ovs_action_attr {
>       OVS_ACTION_ATTR_DROP,          /* u32 xlate_error. */
>       OVS_ACTION_ATTR_LB_OUTPUT,     /* u32 bond-id. */
>  #endif
> +     OVS_ACTION_ATTR_DEC_TTL,      /* No argument. */

I'm not entirely sure, but I think that:
* if this attribute is not used by the OVS kernel datapath then
  it should be inside the ifndef; otherwise
* it should be above the ifndef

Perhaps there is precedence, but it would strike me as unusual to implement
a feature in the TC offload datapath but not in the OVS kernel datapath.

>       __OVS_ACTION_ATTR_MAX,        /* Nothing past this will be accepted
>                                      * from userspace. */
>  
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 9f35713ef..6343da93b 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -9067,6 +9067,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
> *packets_,
>      case OVS_ACTION_ATTR_PUSH_ETH:
>      case OVS_ACTION_ATTR_POP_ETH:
>      case OVS_ACTION_ATTR_CLONE:
> +    case OVS_ACTION_ATTR_DEC_TTL:
>      case OVS_ACTION_ATTR_PUSH_NSH:
>      case OVS_ACTION_ATTR_POP_NSH:
>      case OVS_ACTION_ATTR_CT_CLEAR:
> diff --git a/lib/dpif.c b/lib/dpif.c
> index 40f5fe446..6ac5e3581 100644
> --- a/lib/dpif.c
> +++ b/lib/dpif.c
> @@ -1265,6 +1265,7 @@ dpif_execute_helper_cb(void *aux_, struct 
> dp_packet_batch *packets_,
>      case OVS_ACTION_ATTR_POP_VLAN:
>      case OVS_ACTION_ATTR_PUSH_MPLS:
>      case OVS_ACTION_ATTR_POP_MPLS:
> +    case OVS_ACTION_ATTR_DEC_TTL:
>      case OVS_ACTION_ATTR_SET:
>      case OVS_ACTION_ATTR_SET_MASKED:
>      case OVS_ACTION_ATTR_SAMPLE:
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 9845e8d3f..878d63d37 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -18,6 +18,7 @@
>  
>  #include <errno.h>
>  #include <linux/if_ether.h>
> +#include <linux/tc_act/tc_pedit.h>
>  
>  #include "dpif.h"
>  #include "hash.h"
> @@ -877,7 +878,20 @@ parse_tc_flower_to_match(struct tc_flower *flower,
>              }
>              break;
>              case TC_ACT_PEDIT: {
> -                parse_flower_rewrite_to_netlink_action(buf, flower);
> +                if (action->pedit_type == TCA_PEDIT_KEY_EX_CMD_ADD) {
> +                    if (flower->rewrite.key.eth_type == htons(ETH_TYPE_IP) &&
> +                        flower->rewrite.mask.ipv4.rewrite_ttl == 0xff &&
> +                        flower->rewrite.key.ipv4.rewrite_ttl == 0xff) {
> +                        nl_msg_put_flag(buf, OVS_ACTION_ATTR_DEC_TTL);
> +                    }
> +                    if (flower->rewrite.key.eth_type == htons(ETH_TYPE_IPV6) 
> &&
> +                        flower->rewrite.mask.ipv6.rewrite_hlimit == 0xff &&
> +                        flower->rewrite.key.ipv6.rewrite_hlimit == 0xff) {
> +                        nl_msg_put_flag(buf, OVS_ACTION_ATTR_DEC_TTL);
> +                    }
> +                } else {
> +                    parse_flower_rewrite_to_netlink_action(buf, flower);
> +                }
>              }
>              break;
>              case TC_ACT_ENCAP: {
> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
> index fb108c0d5..c7551dd97 100644
> --- a/lib/netdev-offload.c
> +++ b/lib/netdev-offload.c
> @@ -765,6 +765,14 @@ netdev_is_offload_rebalance_policy_enabled(void)
>      return netdev_offload_rebalance_policy;
>  }
>  
> +static bool netdev_offload_tc_pedit_add = false;
> +
> +bool
> +netdev_is_tc_pedit_add_enabled(void)
> +{
> +    return netdev_offload_tc_pedit_add;
> +}
> +
>  static void
>  netdev_ports_flow_init(void)
>  {
> @@ -780,6 +788,10 @@ netdev_ports_flow_init(void)
>  void
>  netdev_set_flow_api_enabled(const struct smap *ovs_other_config)
>  {
> +    if (smap_get_bool(ovs_other_config, "tc-pedit-add", false)) {
> +        netdev_offload_tc_pedit_add = true;
> +    }
> +
>      if (smap_get_bool(ovs_other_config, "hw-offload", false)) {
>          static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>  
> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
> index 8237a85dd..1d38fe338 100644
> --- a/lib/netdev-offload.h
> +++ b/lib/netdev-offload.h
> @@ -126,6 +126,7 @@ bool netdev_any_oor(void);
>  bool netdev_is_flow_api_enabled(void);
>  void netdev_set_flow_api_enabled(const struct smap *ovs_other_config);
>  bool netdev_is_offload_rebalance_policy_enabled(void);
> +bool netdev_is_tc_pedit_add_enabled(void);
>  int netdev_flow_get_n_flows(struct netdev *netdev, uint64_t *n_flows);
>  
>  struct dpif_port;
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index 2f4cdd92c..026d6fb0e 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -821,6 +821,7 @@ requires_datapath_assistance(const struct nlattr *a)
>      case OVS_ACTION_ATTR_CHECK_PKT_LEN:
>      case OVS_ACTION_ATTR_ADD_MPLS:
>      case OVS_ACTION_ATTR_DROP:
> +    case OVS_ACTION_ATTR_DEC_TTL:
>          return false;
>  
>      case OVS_ACTION_ATTR_UNSPEC:
> @@ -1001,6 +1002,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch 
> *batch, bool steal,
>              }
>              break;
>          case OVS_ACTION_ATTR_METER:
> +        case OVS_ACTION_ATTR_DEC_TTL:
>              /* Not implemented yet. */
>              break;
>          case OVS_ACTION_ATTR_PUSH_ETH: {
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index 9a705cffa..abbf649c1 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -131,6 +131,7 @@ odp_action_len(uint16_t type)
>      case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16);
>      case OVS_ACTION_ATTR_RECIRC: return sizeof(uint32_t);
>      case OVS_ACTION_ATTR_HASH: return sizeof(struct ovs_action_hash);
> +    case OVS_ACTION_ATTR_DEC_TTL: return ATTR_LEN_VARIABLE;
>      case OVS_ACTION_ATTR_SET: return ATTR_LEN_VARIABLE;
>      case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE;
>      case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE;
> @@ -1162,6 +1163,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a,
>      case OVS_ACTION_ATTR_HASH:
>          format_odp_hash_action(ds, nl_attr_get(a));
>          break;
> +    case OVS_ACTION_ATTR_DEC_TTL:
> +        ds_put_cstr(ds, "dec_ttl");
> +        break;
>      case OVS_ACTION_ATTR_SET_MASKED:
>          a = nl_attr_get(a);
>          /* OVS_KEY_ATTR_NSH is nested attribute, so it needs special process 
> */
> diff --git a/lib/tc.c b/lib/tc.c
> index adb2d3182..01bb4c643 100644
> --- a/lib/tc.c
> +++ b/lib/tc.c
> @@ -36,6 +36,7 @@
>  #include <unistd.h>
>  
>  #include "byte-order.h"
> +#include "netdev-offload.h"
>  #include "netlink-socket.h"
>  #include "netlink.h"
>  #include "openvswitch/ofpbuf.h"
> @@ -1010,12 +1011,13 @@ nl_parse_act_pedit(struct nlattr *options, struct 
> tc_flower *flower)
>      struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)];
>      const struct tc_pedit *pe;
>      const struct tc_pedit_key *keys;
> -    const struct nlattr *nla, *keys_ex, *ex_type;
> +    const struct nlattr *nla, *keys_ex, *ex_type, *ex_cmd;
>      const void *keys_attr;
>      char *rewrite_key = (void *) &flower->rewrite.key;
>      char *rewrite_mask = (void *) &flower->rewrite.mask;
>      size_t keys_ex_size, left;
>      int type, i = 0, err;
> +    uint16_t pedit_type;
>  
>      if (!nl_parse_nested(options, pedit_policy, pe_attrs,
>                           ARRAY_SIZE(pedit_policy))) {
> @@ -1043,6 +1045,10 @@ nl_parse_act_pedit(struct nlattr *options, struct 
> tc_flower *flower)
>          ex_type = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_HTYPE);
>          type = nl_attr_get_u16(ex_type);
>  
> +        ex_cmd = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_CMD);
> +        pedit_type = nl_attr_get_u16(ex_cmd);
> +        flower->actions[flower->action_count].pedit_type = pedit_type;
> +

Is there a case where TCA_PEDIT_KEY_EX_CMD will not be present?
If so, does the above deal with such a case?

>          err = csum_update_flag(flower, type);
>          if (err) {
>              return err;
> @@ -2473,7 +2479,8 @@ csum_update_flag(struct tc_flower *flower,
>  
>  static int
>  nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
> -                                 struct tc_flower *flower)
> +                                 struct tc_flower *flower,
> +                                 struct tc_action *action)
>  {
>      struct {
>          struct tc_pedit sel;
> @@ -2484,6 +2491,7 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
>              .nkeys = 0
>          }
>      };
> +    uint8_t tr_ttl;

nit: I think that this declaration can move inside the block where tr_ttl
is used.

>      int i, j, err;
>  
>      for (i = 0; i < ARRAY_SIZE(flower_pedit_map); i++) {
> @@ -2529,6 +2537,26 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf 
> *request,
>              pedit_key->mask = ~mask_word;
>              pedit_key->val = data_word & mask_word;
>              sel.sel.nkeys++;
> +            tr_ttl = (flower->key.ip_ttl & flower->mask.ip_ttl) - 1;
> +            if (netdev_is_tc_pedit_add_enabled()) {
> +                /* rewrite ttl action */
> +                if (pedit_key_ex->htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 &&
> +                    pedit_key->val  == tr_ttl &&
> +                    pedit_key->off == 8) {
> +                    pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
> +                    pedit_key->val = 0xff;
> +                    flower->mask.ip_ttl = 0;
> +                    action->pedit_type = TCA_PEDIT_KEY_EX_CMD_ADD;
> +                }
> +                if (pedit_key_ex->htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 &&
> +                    pedit_key->val  == tr_ttl << 24 &&
> +                    pedit_key->off == 4) {
> +                    pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
> +                    pedit_key->val = 0xff << 24;
> +                    flower->mask.ip_ttl = 0;
> +                    action->pedit_type = TCA_PEDIT_KEY_EX_CMD_ADD;
> +                }
> +            }

Perhaps this problem doesn't arise, but I'm somewhat confused about how
the above differentiates between the following two cases described
in OpenFlow:

1. match on ip & ttl==7; set ttl to 6
2. match on ip; dec_ttl

>  
>              err = csum_update_flag(flower, m->htype);
>              if (err) {
> @@ -2575,7 +2603,8 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct 
> tc_flower *flower)
>              switch (action->type) {
>              case TC_ACT_PEDIT: {
>                  act_offset = nl_msg_start_nested(request, act_index++);
> -                error = nl_msg_put_flower_rewrite_pedits(request, flower);
> +                error = nl_msg_put_flower_rewrite_pedits(request, flower,
> +                                                         action);
>                  if (error) {
>                      return error;
>                  }
> diff --git a/lib/tc.h b/lib/tc.h
> index a147ca461..f33870e76 100644
> --- a/lib/tc.h
> +++ b/lib/tc.h
> @@ -259,6 +259,7 @@ struct tc_action {
>       };
>  
>       enum tc_action_type type;
> +     int pedit_type;
>  };
>  
>  enum tc_offloaded_state {
> diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
> index 9280e008e..3c2af4891 100644
> --- a/ofproto/ofproto-dpif-ipfix.c
> +++ b/ofproto/ofproto-dpif-ipfix.c
> @@ -3007,6 +3007,7 @@ dpif_ipfix_read_actions(const struct flow *flow,
>          case OVS_ACTION_ATTR_METER:
>          case OVS_ACTION_ATTR_SET_MASKED:
>          case OVS_ACTION_ATTR_SET:
> +        case OVS_ACTION_ATTR_DEC_TTL:
>          case OVS_ACTION_ATTR_PUSH_VLAN:
>          case OVS_ACTION_ATTR_POP_VLAN:
>          case OVS_ACTION_ATTR_PUSH_MPLS:
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index 30e7caf54..95c1989f1 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -1221,6 +1221,7 @@ dpif_sflow_read_actions(const struct flow *flow,
>          }
>          break;
>          case OVS_ACTION_ATTR_SAMPLE:
> +        case OVS_ACTION_ATTR_DEC_TTL:
>          case OVS_ACTION_ATTR_PUSH_NSH:
>          case OVS_ACTION_ATTR_POP_NSH:
>          case OVS_ACTION_ATTR_UNSPEC:
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 0c6632617..5c1595b9e 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -222,6 +222,21 @@
>          </p>
>        </column>
>  
> +      <column name="other_config" key="tc-pedit-add"
> +              type='{"type": "boolean"}'>
> +        <p>
> +          Set this value to <code>true</code> to enable tc pedit add actions
> +          used for decrementing the ttl.
> +        </p>
> +        <p>
> +          The default value is <code>false</code>.
> +        </p>
> +        <p>
> +          This is only relevant if
> +          <ref column="other_config" key="hw-offload"/> is enabled.
> +        </p>
> +      </column>
> +
>        <column name="other_config" key="hw-offload"
>                type='{"type": "boolean"}'>
>          <p>
> -- 
> 2.17.1
> 
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to