On Tue, Oct 31, 2017 at 09:20:55AM +0200, Paul Blakey wrote:
>
>
> On 30/10/2017 15:42, Simon Horman wrote:
> > On Wed, Oct 25, 2017 at 02:24:15PM +0300, Roi Dayan wrote:
> > >
> > >
> > > On 27/09/2017 12:08, Simon Horman wrote:
> > > > On Mon, Sep 25, 2017 at 04:31:42PM +0300, Paul Blakey wrote:
> > > > >
> > > > >
> > > > > On 18/09/2017 18:01, Simon Horman wrote:
> > > > > > On Mon, Sep 18, 2017 at 07:16:03AM +0300, Roi Dayan wrote:
> > > > > > > From: Paul Blakey <[email protected]>
> > > > > > >
> > > > > > > To be later used to implement ovs action set offloading.
> > > > > > >
> > > > > > > Signed-off-by: Paul Blakey <[email protected]>
> > > > > > > Reviewed-by: Roi Dayan <[email protected]>
> > > > > > > ---
> > > > > > > lib/tc.c | 372
> > > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> > > > > > > lib/tc.h | 16 +++
> > > > > > > 2 files changed, 385 insertions(+), 3 deletions(-)
> > > > > > >
> > > > > > > diff --git a/lib/tc.c b/lib/tc.c
> > > > > > > index c9cada2..743b2ee 100644
> > > > > > > --- a/lib/tc.c
> > > > > > > +++ b/lib/tc.c
> > > > > > > @@ -21,8 +21,10 @@
> > > > > > > #include <errno.h>
> > > > > > > #include <linux/if_ether.h>
> > > > > > > #include <linux/rtnetlink.h>
> > > > > > > +#include <linux/tc_act/tc_csum.h>
> > > > > > > #include <linux/tc_act/tc_gact.h>
> > > > > > > #include <linux/tc_act/tc_mirred.h>
> > > > > > > +#include <linux/tc_act/tc_pedit.h>
> > > > > > > #include <linux/tc_act/tc_tunnel_key.h>
> > > > > > > #include <linux/tc_act/tc_vlan.h>
> > > > > > > #include <linux/gen_stats.h>
> > > > > > > @@ -33,11 +35,14 @@
> > > > > > > #include "netlink-socket.h"
> > > > > > > #include "netlink.h"
> > > > > > > #include "openvswitch/ofpbuf.h"
> > > > > > > +#include "openvswitch/util.h"
> > > > > > > #include "openvswitch/vlog.h"
> > > > > > > #include "packets.h"
> > > > > > > #include "timeval.h"
> > > > > > > #include "unaligned.h"
> > > > > > > +#define MAX_PEDIT_OFFSETS 8
> > > > > >
> > > > > > Why 8?
> > > > > We don't expect anything more right now (ipv6 src/dst rewrite
> > > > > requires 8
> > > > > pedits iirc). I can't think of a larger use case, maybe ipv6 + macs if
> > > > > that's makes sens. do you suggest we increase it? to what?
> > > >
> > > > It seems strange to me to place a somewhat arbitrary small limit
> > > > when none exists in the pedit API being used. I would at prefer if
> > > > it was at least a bigger, say 16 or 32.
> > >
> > >
> > > Hi Simon,
> > >
> > > Sorry for the late reply due to holidays and vacations.
> > > Me & Paul going to go over this and do the fixes needed and
> > > also rebase over latest master and run tests again.
> >
> > Likewise, sorry for not responding earlier (same reason).
> >
> > > I'll answer what I'm more familiar with now and Paul will continue.
> > > The 8 here is too low and you right. We used this definition
> > > for allocation of the pedit keys on the stack in
> > > nl_msg_put_flower_rewrite_pedits()
> > >
> > > It was for convenience instead of calculating the maximum possible
> > > keys that could exists and allocating it there and freeing it at
> > > the end.
> > >
> > > Increasing it to 32 is probably more than enough and wont waste much.
> >
> > Thanks, that sounds good.
> >
> > > > > > > VLOG_DEFINE_THIS_MODULE(tc);
> > > > > > > static struct vlog_rate_limit error_rl =
> > > > > > > VLOG_RATE_LIMIT_INIT(60, 5);
> > > > > > > @@ -50,6 +55,82 @@ enum tc_offload_policy {
> > > > > > > static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
> > > > > > > +struct tc_pedit_key_ex {
> > > > > > > + enum pedit_header_type htype;
> > > > > > > + enum pedit_cmd cmd;
> > > > > > > +};
> > > > > > > +
> > > > > > > +struct flower_key_to_pedit {
> > > > > > > + enum pedit_header_type htype;
> > > > > > > + int flower_offset;
> > > > > > > + int offset;
> > > > > > > + int size;
> > > > > > > +};
> > > > > > > +
> > > > > > > +static struct flower_key_to_pedit flower_pedit_map[] = {
> > > > > > > + {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
> > > > > > > + 12,
> > > > > > > + offsetof(struct tc_flower_key, ipv4.ipv4_src),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
> > > > > > > + 16,
> > > > > > > + offsetof(struct tc_flower_key, ipv4.ipv4_dst),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
> > > > > > > + 8,
> > > > > > > + offsetof(struct tc_flower_key, ipv4.rewrite_ttl),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
> > > > > > > + 8,
> > > > > > > + offsetof(struct tc_flower_key, ipv6.ipv6_src),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_src)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
> > > > > > > + 24,
> > > > > > > + offsetof(struct tc_flower_key, ipv6.ipv6_dst),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_dst)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
> > > > > > > + 6,
> > > > > > > + offsetof(struct tc_flower_key, src_mac),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, src_mac)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
> > > > > > > + 0,
> > > > > > > + offsetof(struct tc_flower_key, dst_mac),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, dst_mac)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
> > > > > > > + 12,
> > > > > > > + offsetof(struct tc_flower_key, eth_type),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, eth_type)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
> > > > > > > + 0,
> > > > > > > + offsetof(struct tc_flower_key, tcp_src),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, tcp_src)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
> > > > > > > + 2,
> > > > > > > + offsetof(struct tc_flower_key, tcp_dst),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, tcp_dst)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
> > > > > > > + 0,
> > > > > > > + offsetof(struct tc_flower_key, udp_src),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, udp_src)
> > > > > > > + }, {
> > > > > > > + TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
> > > > > > > + 2,
> > > > > > > + offsetof(struct tc_flower_key, udp_dst),
> > > > > > > + MEMBER_SIZEOF(struct tc_flower_key, udp_dst)
> > > > > > > + },
> > > > > > > +};
> > > > > > > +
> > > > > > > struct tcmsg *
> > > > > > > tc_make_request(int ifindex, int type, unsigned int flags,
> > > > > > > struct ofpbuf *request)
> > > > > > > @@ -365,6 +446,96 @@ nl_parse_flower_ip(struct nlattr **attrs,
> > > > > > > struct tc_flower *flower) {
> > > > > > > }
> > > > > > > }
> > > > > > > +static const struct nl_policy pedit_policy[] = {
> > > > > > > + [TCA_PEDIT_PARMS_EX] = { .type = NL_A_UNSPEC,
> > > > > > > + .min_len = sizeof(struct
> > > > > > > tc_pedit),
> > > > > > > + .optional = false, },
> > > > > > > + [TCA_PEDIT_KEYS_EX] = { .type = NL_A_NESTED,
> > > > > > > + .optional = false, },
> > > > > > > +};
> > > > > > > +
> > > > > > > +static int
> > > > > > > +nl_parse_act_pedit(struct nlattr *options, struct tc_flower
> > > > > > > *flower)
> > > > > > > +{
> > > > > > > + struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)];
> > > > > > > + const struct tc_pedit *pe;
> > > > > > > + const struct tc_pedit_key *keys;
> > > > > > > + const struct nlattr *nla, *keys_ex, *ex_type;
> > > > > > > + const void *keys_attr;
> > > > > > > + char *rewrite_key = (void *) &flower->rewrite.key;
> > > > > > > + char *rewrite_mask = (void *) &flower->rewrite.mask;
> > > > > > > + size_t keys_ex_size, left;
> > > > > > > + int type, i = 0;
> > > > > > > +
> > > > > > > + if (!nl_parse_nested(options, pedit_policy, pe_attrs,
> > > > > > > + ARRAY_SIZE(pedit_policy))) {
> > > > > > > + VLOG_ERR_RL(&error_rl, "failed to parse pedit action
> > > > > > > options");
> > > > > > > + return EPROTO;
> > > > > > > + }
> > > > > > > +
> > > > > > > + pe = nl_attr_get_unspec(pe_attrs[TCA_PEDIT_PARMS_EX], sizeof
> > > > > > > *pe);
> > > > > > > + keys = pe->keys;
> > > > > > > + keys_attr = pe_attrs[TCA_PEDIT_KEYS_EX];
> > > > > > > + keys_ex = nl_attr_get(keys_attr);
> > > > > > > + keys_ex_size = nl_attr_get_size(keys_attr);
> > > > > > > +
> > > > > > > + NL_ATTR_FOR_EACH (nla, left, keys_ex, keys_ex_size) {
> > > > > > > + if (i >= pe->nkeys) {
> > > > > > > + break;
> > > > > > > + }
> > > > > > > +
> > > > > > > + if (nl_attr_type(nla) == TCA_PEDIT_KEY_EX) {
> > > > > > > + ex_type = nl_attr_find_nested(nla,
> > > > > > > TCA_PEDIT_KEY_EX_HTYPE);
> > > > > > > + type = nl_attr_get_u16(ex_type);
> > > > > > > +
> > > > > > > + for (int j = 0; j < ARRAY_SIZE(flower_pedit_map);
> > > > > > > j++) {
> > > > > > > + struct flower_key_to_pedit *m =
> > > > > > > &flower_pedit_map[j];
> > > > > > > + int flower_off = m->flower_offset;
> > > > > > > + int sz = m->size;
> > > > > > > + int mf = m->offset;
> > > > > > > +
> > > > > > > + if (m->htype != type) {
> > > > > > > + continue;
> > > > > > > + }
> > > > > > > +
> > > > > > > + /* check overlap between current pedit key,
> > > > > > > which is always
> > > > > > > + * 4 bytes (range [off, off + 3]), and a map
> > > > > > > entry in
> > > > > > > + * flower_pedit_map (range [mf, mf + sz - 1]) */
> > > > > > > + if ((keys->off >= mf && keys->off < mf + sz)
> > > > > > > + || (keys->off + 3 >= mf && keys->off + 3 <
> > > > > > > mf + sz)) {
> > > > > > > + int diff = flower_off + (keys->off - mf);
> > > > > > > + uint32_t *dst = (void *) (rewrite_key +
> > > > > > > diff);
> > > > > > > + uint32_t *dst_m = (void *) (rewrite_mask +
> > > > > > > diff);
> > > > > > > + uint32_t mask = ~(keys->mask);
> > > > > > > + uint32_t zero_bits;
> > > > > > > +
> > > > > > > + if (keys->off < mf) {
> > > > > > > + zero_bits = 8 * (mf - keys->off);
> > > > > > > + mask &= UINT32_MAX << zero_bits;
> > > > > > > + } else if (keys->off + 4 > mf + m->size) {
> > > > > > > + zero_bits = 8 * (keys->off + 4 - mf -
> > > > > > > m->size);
> > > > > > > + mask &= UINT32_MAX >> zero_bits;
> > > > > > > + }
> > > > > > > +
> > > > > > > + *dst_m |= mask;
> > > > > > > + *dst |= keys->val & mask;
> > > > > > > + }
> > > > > > > + }
> > > > > >
> > > > > > If I understand the above correctly it is designed to make
> > > > > > pedit actions disjoint. If so, why is that necessary? >
> > > > >
> > > > > It's not, as a single flower key rewrite can be split to multiple
> > > > > pedit
> > > > > actions it finds the overlap between a flower key and a pedit action,
> > > > > if
> > > > > they do overlap it translates it to the correct offset and masks it
> > > > > out.
> > > >
> > > > Thanks, understood.
> > > >
> > > > >
> > > > > > > + } else {
> > > > > > > + VLOG_ERR_RL(&error_rl, "unable to parse legacy pedit
> > > > > > > type: %d",
> > > > > > > + nl_attr_type(nla));
> > > > > > > + return EOPNOTSUPP;
> > > > > > > + }
> > > > > >
> > > > > > I think the code could exit early here as
> > > > > > nl_msg_put_flower_rewrite_pedits() does below.
> > > > > >
> > > > >
> > > > > Sorry, didn't understand. can you give an example?
> > > > >
> > > >
> > > > I meant something like this.
> > > >
> > > > if (nl_attr_type(nla) != TCA_PEDIT_KEY_EX) {
> > > > VLOG_ERR_RL(...);
> > > > return EOPNOTSUPP;
> > > > }
> > >
> > > understood. we'll do that. thanks.
> > >
> > > >
> > > > /* Overlap detection code goes here */
> > > >
> > > > > >
> > > > > > > +
> > > > > > > + keys++;
> > > > > > > + i++;
> > > > > > > + }
> > > > > > > +
> > > > > > > + flower->rewrite.rewrite = true;
> > > > > > > +
> > > > > > > + return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > > static const struct nl_policy tunnel_key_policy[] = {
> > > > > > > [TCA_TUNNEL_KEY_PARMS] = { .type = NL_A_UNSPEC,
> > > > > > > .min_len = sizeof(struct
> > > > > > > tc_tunnel_key),
> > > > > > > @@ -608,6 +779,11 @@ nl_parse_single_action(struct nlattr
> > > > > > > *action, struct tc_flower *flower)
> > > > > > > nl_parse_act_vlan(act_options, flower);
> > > > > > > } else if (!strcmp(act_kind, "tunnel_key")) {
> > > > > > > nl_parse_act_tunnel_key(act_options, flower);
> > > > > > > + } else if (!strcmp(act_kind, "pedit")) {
> > > > > > > + nl_parse_act_pedit(act_options, flower);
> > > > > > > + } else if (!strcmp(act_kind, "csum")) {
> > > > > > > + /* not doing anything for now, ovs has an implicit csum
> > > > > > > recalculation
> > > > > > > + * with rewriting of packet headers (translating of
> > > > > > > pedit acts). */
> > > > > >
> > > > > > I wonder if the absence of a csum action when needed (by TC)
> > > > > > should be treated as an error >
> > > > >
> > > > > Do you mean validating csum flags from each protocol and such (like
> > > > > in put)?
> > > >
> > > > Yes, I think so.
> > > >
> > > > In OvS (without TC acceleration) csum recalculation is implicit
> > > > but with TC an explicit csum update action is required. I see
> > > > in your code you are handling this by adding csum actions to TC actions
> > > > when translating from OvS DPIF actions. And in the reverse (above) csum
> > > > actions are simply ignored.
> > > >
> > > > What I am wondering is if when translating TC actions to OvS DPIF
> > > > actions
> > > > you should track if the TC actions should include a csum rule because
> > > > of other actions and treat its absence as an error.
> > > >
> > > > >
> > > > > > > } else {
> > > > > > > VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s",
> > > > > > > act_kind);
> > > > > > > return EINVAL;
> > > > > > > @@ -809,6 +985,48 @@ tc_get_tc_cls_policy(enum tc_offload_policy
> > > > > > > policy)
> > > > > > > }
> > > > > > > static void
> > > > > > > +nl_msg_put_act_csum(struct ofpbuf *request, uint32_t flags)
> > > > > > > +{
> > > > > > > + size_t offset;
> > > > > > > +
> > > > > > > + nl_msg_put_string(request, TCA_ACT_KIND, "csum");
> > > > > > > + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
> > > > > > > + {
> > > > > > > + struct tc_csum parm = { .action = TC_ACT_PIPE,
> > > > > > > + .update_flags = flags };
> > > > > > > +
> > > > > > > + nl_msg_put_unspec(request, TCA_CSUM_PARMS, &parm, sizeof
> > > > > > > parm);
> > > > > > > + }
> > > > > > > + nl_msg_end_nested(request, offset);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void
> > > > > > > +nl_msg_put_act_pedit(struct ofpbuf *request, struct tc_pedit
> > > > > > > *parm,
> > > > > > > + struct tc_pedit_key_ex *ex)
> > > > > > > +{
> > > > > > > + size_t ksize = sizeof *parm + (parm->nkeys * sizeof(struct
> > > > > > > tc_pedit_key));
> > > > > >
> > > > > > Are there unnecessary () on the line above?
> > > > > No, I'll remove them.
> > > > >
> > > > > >
> > > > > > > + size_t offset, offset_keys_ex, offset_key;
> > > > > > > + int i;
> > > > > > > +
> > > > > > > + nl_msg_put_string(request, TCA_ACT_KIND, "pedit");
> > > > > > > + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
> > > > > > > + {
> > > > > > > + parm->action = TC_ACT_PIPE;
> > > > > > > +
> > > > > > > + nl_msg_put_unspec(request, TCA_PEDIT_PARMS_EX, parm,
> > > > > > > ksize);
> > > > > > > + offset_keys_ex = nl_msg_start_nested(request,
> > > > > > > TCA_PEDIT_KEYS_EX);
> > > > > > > + for (i = 0; i < parm->nkeys; i++, ex++) {
> > > > > > > + offset_key = nl_msg_start_nested(request,
> > > > > > > TCA_PEDIT_KEY_EX);
> > > > > > > + nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_HTYPE,
> > > > > > > ex->htype);
> > > > > > > + nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_CMD,
> > > > > > > ex->cmd);
> > > > > > > + nl_msg_end_nested(request, offset_key);
> > > > > > > + }
> > > > > > > + nl_msg_end_nested(request, offset_keys_ex);
> > > > > > > + }
> > > > > > > + nl_msg_end_nested(request, offset);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void
> > > > > > > nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid,
> > > > > > > uint8_t prio)
> > > > > > > {
> > > > > > > size_t offset;
> > > > > > > @@ -930,7 +1148,127 @@ nl_msg_put_act_cookie(struct ofpbuf
> > > > > > > *request, struct tc_cookie *ck) {
> > > > > > > }
> > > > > > > }
> > > > > > > +/* Given flower, a key_to_pedit map entry, calculates the rest,
> > > > > > > + * where:
> > > > > > > + *
> > > > > > > + * mask, data - pointers of where read the first word of
> > > > > > > flower->key/mask.
> > > > > > > + * current_offset - which offset to use for the first pedit
> > > > > > > action.
> > > > > > > + * cnt - max pedits actions to use.
> > > > > > > + * first_word_mask/last_word_mask - the mask to use for the
> > > > > > > first/last read
> > > > > > > + * (as we read entire words). */
> > > > > > > static void
> > > > > > > +calc_offsets(struct tc_flower *flower, struct
> > > > > > > flower_key_to_pedit *m,
> > > > > > > + int *cur_offset, int *cnt, uint32_t *last_word_mask,
> > > > > > > + uint32_t *first_word_mask, uint32_t **mask,
> > > > > > > uint32_t **data)
> > > > > > > +{
> > > > > > > + int start_offset, max_offset, total_size;
> > > > > > > + int diff, right_zero_bits, left_zero_bits;
> > > > > > > + char *rewrite_key = (void *) &flower->rewrite.key;
> > > > > > > + char *rewrite_mask = (void *) &flower->rewrite.mask;
> > > > > > > +
> > > > > > > + max_offset = m->offset + m->size;
> > > > > > > + start_offset = ROUND_DOWN(m->offset, 4);
> > > > > > > + diff = m->offset - start_offset;
> > > > > > > + total_size = max_offset - start_offset;
> > > > > > > + right_zero_bits = 8 * (4 - (max_offset % 4));
> > > > > > > + left_zero_bits = 8 * (m->offset - start_offset);
> > > > > > > +
> > > > > > > + *cur_offset = start_offset;
> > > > > > > + *cnt = (total_size / 4) + (total_size % 4 ? 1 : 0);
> > > > > > > + *last_word_mask = UINT32_MAX >> right_zero_bits;
> > > > > > > + *first_word_mask = UINT32_MAX << left_zero_bits;
> > > > > > > + *data = (void *) (rewrite_key + m->flower_offset - diff);
> > > > > > > + *mask = (void *) (rewrite_mask + m->flower_offset - diff);
> > > > > >
> > > > > > The type of *data and *mask is uint32_t *.
> > > > > > Why not cast to that type?
> > > > >
> > > > > It's to avoid warning of increasing pointer alignment (-Wcast-align).
> > > >
> > > > It sounds like the warning should be addressed rather than overridden
> > > > using
> > > > a cast.
> > > >
> > > > > >
> > > > > > > +}
> > > > > > > +
> > > > > > > +static inline void
> > > > > > > +csum_update_flag(struct tc_flower *flower,
> > > > > > > + enum pedit_header_type htype) {
> > > > > >
> > > > > > I think the above two lines could be one.
> > > > > >
> > > > > > > + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4) {
> > > > > >
> > > > > > A case statement might be nicer here.
> > > > >
> > > > > Right, thanks.
> > > > >
> > > > > >
> > > > > > > + flower->csum_update_flags |=
> > > > > > > TCA_CSUM_UPDATE_FLAG_IPV4HDR;
> > > > > > > + }
> > > > > > > + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
> > > > > > > + || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
> > > > > > > + || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
> > > > > > > + || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_UDP) {
> > > > > > > + if (flower->key.ip_proto == IPPROTO_TCP) {
> > > > > > > + flower->mask.ip_proto = UINT8_MAX;
> > > > > >
> > > > > > What if the mask was not UINT8_MAX to start with?
> > > > > > Doesn't this create a different flow?
> > > > >
> > > > > This is so the checksum will be strict (not setting/recalc udp
> > > > > checksum on
> > > > > non udp packets). It creates a more specific flow, a subset of the
> > > > > original.
> > > > > This is fine as datapath is free to ignore a mask, which is the same
> > > > > as a
> > > > > full mask we've set.
> > > >
> > > > I'm not sure that I understand why its fine for the datapath to ignore
> > > > the
> > > > mask.
> > >
> > > I'll try to explain.
> > > When we want the HW to recalc the csum we need to also do matching on
> > > it. We do that for TCP, UDP, ICMP.
> >
> > Ok, that makes sense. But I am concerned that the mask of the offloaded
> > flow no longer matches the mask of the flow OvS created. What are the
> > implications of that?
> >
>
> Hi, I remember the mask being a suggestion to the datapath (can't find it
> right now) as the datapath might not support the full mask. After the flow
> is generated from a miss upcall it will be put to the datapath and isn't
> saved anywhere, only the flow ufid (hash based on key, mask and some seed,
> saved in ukey map at ofproto upcall...), if I recall correctly. We return
> the same ufid for the more specific flow that we put instead, so it will be
> aged correctly.
> On dump it will show the more specific flow, as its a direct parse of dump
> from the datapath where there is the more specific one and not a saved flow.
> On a miss that could have been caught by the original flow, ovs will
> generate a new UFID, since the key is different. So we will have
> another flow put and again a more specific flow and so on.
> This flows will be disjointed and dumped and aged correctly.
Thanks for the explanation. I don't think I have any outstanding questions
and I'd be happy to see a new version of this patchset.
>
>
> > > > > > > + flower->csum_update_flags |=
> > > > > > > TCA_CSUM_UPDATE_FLAG_TCP;
> > > > > > > + } else if (flower->key.ip_proto == IPPROTO_UDP) {
> > > > > > > + flower->mask.ip_proto = UINT8_MAX;
> > > > > > > + flower->csum_update_flags |=
> > > > > > > TCA_CSUM_UPDATE_FLAG_UDP;
> > > > > > > + } else if (flower->key.ip_proto == IPPROTO_ICMP
> > > > > > > + || flower->key.ip_proto == IPPROTO_ICMPV6) {
> > > > > > > + flower->mask.ip_proto = UINT8_MAX;
> > > > > > > + flower->csum_update_flags |=
> > > > > > > TCA_CSUM_UPDATE_FLAG_ICMP;
> > > > > > > + }
> > > > > > > + }
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int
> > > > > > > +nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
> > > > > > > + struct tc_flower *flower)
> > > > > > > +{
> > > > > > > + struct {
> > > > > > > + struct tc_pedit sel;
> > > > > > > + struct tc_pedit_key keys[MAX_PEDIT_OFFSETS];
> > > > > > > + struct tc_pedit_key_ex keys_ex[MAX_PEDIT_OFFSETS];
> > > > > > > + } sel = {
> > > > > > > + .sel = {
> > > > > > > + .nkeys = 0
> > > > > > > + }
> > > > > > > + };
> > > > > > > + int i, j;
> > > > > > > +
> > > > > > > + for (i = 0; i < ARRAY_SIZE(flower_pedit_map); i++) {
> > > > > > > + struct flower_key_to_pedit *m = &flower_pedit_map[i];
> > > > > > > + struct tc_pedit_key *pedit_key = NULL;
> > > > > > > + struct tc_pedit_key_ex *pedit_key_ex = NULL;
> > > > > > > + uint32_t *mask, *data, first_word_mask, last_word_mask;
> > > > > > > + int cnt = 0, cur_offset = 0;
> > > > > > > +
> > > > > > > + if (!m->size) {
> > > > > > > + continue;
> > > > > > > + }
> > > > > > > +
> > > > > > > + calc_offsets(flower, m, &cur_offset, &cnt,
> > > > > > > &last_word_mask,
> > > > > > > + &first_word_mask, &mask, &data);
> > > > > > > +
> > > > > > > + for (j = 0; j < cnt; j++, mask++, data++, cur_offset +=
> > > > > > > 4) {
> > > > > > > + uint32_t mask_word = *mask;
> > > > > > > +
> > > > > > > + if (j == 0) {
> > > > > > > + mask_word &= first_word_mask;
> > > > > > > + }
> > > > > > > + if (j == cnt - 1) {
> > > > > > > + mask_word &= last_word_mask;
> > > > > > > + }
> > > > > > > + if (!mask_word) {
> > > > > > > + continue;
> > > > > > > + }
> > > > > > > + if (sel.sel.nkeys == MAX_PEDIT_OFFSETS) {
> > > > > > > + VLOG_WARN_RL(&error_rl, "reached too many pedit
> > > > > > > offsets: %d",
> > > > > > > + MAX_PEDIT_OFFSETS);
> > > > > > > + return EOPNOTSUPP;
> > > > > > > + }
> > > > > > > +
> > > > > > > + pedit_key = &sel.keys[sel.sel.nkeys];
> > > > > > > + pedit_key_ex = &sel.keys_ex[sel.sel.nkeys];
> > > > > > > + pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_SET;
> > > > > > > + pedit_key_ex->htype = m->htype;
> > > > > > > + pedit_key->off = cur_offset;
> > > > > > > + pedit_key->mask = ~mask_word;
> > > > > > > + pedit_key->val = *data & mask_word;
> > > > > > > + sel.sel.nkeys++;
> > > > > > > + csum_update_flag(flower, m->htype);
> > > > > > > + }
> > > > > > > + }
> > > > > > > + nl_msg_put_act_pedit(request, &sel.sel, sel.keys_ex);
> > > > > > > +
> > > > > > > + return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int
> > > > > > > nl_msg_put_flower_acts(struct ofpbuf *request, struct
> > > > > > > tc_flower *flower)
> > > > > > > {
> > > > > > > size_t offset;
> > > > > > > @@ -939,7 +1277,20 @@ nl_msg_put_flower_acts(struct ofpbuf
> > > > > > > *request, struct tc_flower *flower)
> > > > > > > offset = nl_msg_start_nested(request, TCA_FLOWER_ACT);
> > > > > > > {
> > > > > > > uint16_t act_index = 1;
> > > > > > > + int error;
> > > > > > > + if (flower->rewrite.rewrite) {
> > > > > > > + act_offset = nl_msg_start_nested(request,
> > > > > > > act_index++);
> > > > > > > + error = nl_msg_put_flower_rewrite_pedits(request,
> > > > > > > flower);
> > > > > > > + if (error) {
> > > > > > > + return error;
> > > > > > > + }
> > > > > > > + nl_msg_end_nested(request, act_offset);
> > > > > > > +
> > > > > > > + act_offset = nl_msg_start_nested(request,
> > > > > > > act_index++);
> > > > > > > + nl_msg_put_act_csum(request,
> > > > > > > flower->csum_update_flags);
> > > > > > > + nl_msg_end_nested(request, act_offset);
> > > > > > > + }
> > > > > > > if (flower->set.set) {
> > > > > > > act_offset = nl_msg_start_nested(request,
> > > > > > > act_index++);
> > > > > > > nl_msg_put_act_tunnel_key_set(request,
> > > > > > > flower->set.id,
> > > > > > > @@ -980,6 +1331,8 @@ nl_msg_put_flower_acts(struct ofpbuf
> > > > > > > *request, struct tc_flower *flower)
> > > > > > > }
> > > > > > > }
> > > > > > > nl_msg_end_nested(request, offset);
> > > > > > > +
> > > > > > > + return 0;
> > > > > > > }
> > > > > > > static void
> > > > > > > @@ -1021,11 +1374,19 @@ nl_msg_put_flower_tunnel(struct ofpbuf
> > > > > > > *request, struct tc_flower *flower)
> > > > > > > nl_msg_put_masked_value(request, type, type##_MASK,
> > > > > > > &flower->key.member, \
> > > > > > > &flower->mask.member, sizeof
> > > > > > > flower->key.member)
> > > > > > > -static void
> > > > > > > +static int
> > > > > > > nl_msg_put_flower_options(struct ofpbuf *request, struct
> > > > > > > tc_flower *flower)
> > > > > > > {
> > > > > > > +
> > > > > > > uint16_t host_eth_type = ntohs(flower->key.eth_type);
> > > > > > > bool is_vlan = (host_eth_type == ETH_TYPE_VLAN);
> > > > > > > + int err;
> > > > > > > +
> > > > > > > + /* need to parse acts first as some acts require changing
> > > > > > > the matching */
> > > > > >
> > > > > > This seems strange to me.
> > > > >
> > > > > from the strict (normalized?) header checksum.
> > > >
> > > > I think this code relates to setting flower->key.ip_proto == IPPROTO_TCP
> > > > above. Is that correct. Are there any other cases?
> > >
> > > yes and also for UDP and ICMP. as explained above.
> > > So going over the acts in header rewrite case we might want to signal the
> > > HW
> > > recalc csum and we need matching on those fields that require new
> > > csum so the matching mask being updated. this is why we parse the acts
> > > first.
> >
> > Thanks, understood.
> >
> > > > > > > + err = nl_msg_put_flower_acts(request, flower);
> > > > > > > + if (err) {
> > > > > > > + return err;
> > > > > > > + }
> > > > > > > if (is_vlan) {
> > > > > > > host_eth_type = ntohs(flower->key.encap_eth_type);
> > > > > > > @@ -1083,7 +1444,7 @@ nl_msg_put_flower_options(struct ofpbuf
> > > > > > > *request, struct tc_flower *flower)
> > > > > > > nl_msg_put_flower_tunnel(request, flower);
> > > > > > > }
> > > > > > > - nl_msg_put_flower_acts(request, flower);
> > > > > > > + return 0;
> > > > > > > }
> > > > > > > int
> > > > > > > @@ -1106,7 +1467,12 @@ tc_replace_flower(int ifindex, uint16_t
> > > > > > > prio, uint32_t handle,
> > > > > > > nl_msg_put_string(&request, TCA_KIND, "flower");
> > > > > > > basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
> > > > > > > {
> > > > > > > - nl_msg_put_flower_options(&request, flower);
> > > > > > > + error = nl_msg_put_flower_options(&request, flower);
> > > > > > > +
> > > > > > > + if (error) {
> > > > > > > + ofpbuf_uninit(&request);
> > > > > > > + return error;
> > > > > > > + }
> > > > > > > }
> > > > > > > nl_msg_end_nested(&request, basic_offset);
> > > > > > > diff --git a/lib/tc.h b/lib/tc.h
> > > > > > > index 6c69b79..7876051 100644
> > > > > > > --- a/lib/tc.h
> > > > > > > +++ b/lib/tc.h
> > > > > > > @@ -96,6 +96,7 @@ struct tc_flower_key {
> > > > > > > struct {
> > > > > > > ovs_be32 ipv4_src;
> > > > > > > ovs_be32 ipv4_dst;
> > > > > > > + uint8_t rewrite_ttl;
> > > > > > > } ipv4;
> > > > > > > struct {
> > > > > > > struct in6_addr ipv6_src;
> > > > > > > @@ -120,6 +121,14 @@ struct tc_flower {
> > > > > > > uint64_t lastused;
> > > > > > > struct {
> > > > > > > + bool rewrite;
> > > > > > > + struct tc_flower_key key;
> > > > > > > + struct tc_flower_key mask;
> > > > > > > + } rewrite;
> > > > > > > +
> > > > > > > + uint32_t csum_update_flags;
> > > > > > > +
> > > > > > > + struct {
> > > > > > > bool set;
> > > > > > > ovs_be64 id;
> > > > > > > ovs_be16 tp_src;
> > > > > > > @@ -152,6 +161,13 @@ struct tc_flower {
> > > > > > > struct tc_cookie act_cookie;
> > > > > > > };
> > > > > > > +/* assert that if we overflow with a masked write of uint32_t to
> > > > > > > the last byte
> > > > > > > + * of flower.rewrite we overflow inside struct flower.
> > > > > > > + * shouldn't happen unless someone moves rewrite to the end of
> > > > > > > flower */
> > > > > > > +BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite)
> > > > > > > + + MEMBER_SIZEOF(struct tc_flower, rewrite)
> > > > > > > + + sizeof(uint32_t) - 2 < sizeof(struct
> > > > > > > tc_flower));
> > > > > > > +
> > > > > > > int tc_replace_flower(int ifindex, uint16_t prio, uint32_t
> > > > > > > handle,
> > > > > > > struct tc_flower *flower);
> > > > > > > int tc_del_filter(int ifindex, int prio, int handle);
> > > > > > > --
> > > > > > > 2.7.5
> > > > > > >
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev