On 7/29/22 14:44, Ales Musil wrote:
>     > diff --git a/lib/netlink.c b/lib/netlink.c
>     > index 6215282d6..8bcf56953 100644
>     > --- a/lib/netlink.c
>     > +++ b/lib/netlink.c
>     > @@ -981,3 +981,31 @@ nl_attr_find_nested(const struct nlattr *nla, 
> uint16_t type)
>     >  {
>     >      return nl_attr_find__(nl_attr_get(nla), nl_attr_get_size(nla), 
> type);
>     >  }
>     > +
>     > +/* Wraps existing Netlink attributes with their data into Netlink 
> attribute
>     > + * making them nested. The offset species where the Netlink attributes 
> start
>     > + * within the buffer. The tailing data are moved further to make space 
> for the
>     > + * nested header. */
>     > +void
>     > +nl_msg_wrap_unspec(struct ofpbuf *buf, uint16_t type, uint32_t offset,
>     > +                   size_t size)
>     > +{
>     > +    nl_msg_reserve(buf, NLA_HDRLEN);
>     > +
>     > +    uint8_t *src = (uint8_t *) buf->data + offset;
>     > +    uint8_t *dst = src + NLA_HDRLEN;
>     > +    uint32_t tail_data_len = buf->size - offset;
>     > +
>     > +    memmove(dst, src, tail_data_len);
> 
>     This may result in writing outside of the allocated space, IIUC.
> 
> 
> It shouldn't be, there is a call to reserve space for the header and

Oh, I missed the nl_msg_reserve, sorry.

> AFAICT only the header is inserted in addition.
>  
> 
> 
>     > +
>     > +    /* Reset size so we can add header. */
>     > +    nl_msg_reset_size(buf, offset);
>     > +    uint32_t nested_offset = nl_msg_start_nested(buf, type);
>     > +
>     > +    /* Move past the size of nested data and end the nested header. */
>     > +    nl_msg_reset_size(buf, buf->size + size);
>     > +    nl_msg_end_non_empty_nested(buf, nested_offset);
>     > +
>     > +    /* Move the buffer back to the end after all data. */
>     > +    nl_msg_reset_size(buf, buf->size + (tail_data_len - size));
>     > +}
>     > diff --git a/lib/netlink.h b/lib/netlink.h
>     > index e9050c31b..6a6ce20c3 100644
>     > --- a/lib/netlink.h
>     > +++ b/lib/netlink.h
>     > @@ -248,5 +248,7 @@ const struct nlattr *nl_attr_find(const struct 
> ofpbuf *, size_t hdr_len,
>     >  const struct nlattr *nl_attr_find_nested(const struct nlattr *, 
> uint16_t type);
>     >  const struct nlattr *nl_attr_find__(const struct nlattr *attrs, size_t 
> size,
>     >                                      uint16_t type);
>     > +void nl_msg_wrap_unspec(struct ofpbuf *buf, uint16_t type, uint32_t 
> offset,
>     > +                        size_t size);
>     > 
>     >  #endif /* netlink.h */
>     > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
>     > index fda802e83..2f32e9581 100644
>     > --- a/ofproto/ofproto-dpif-xlate.c
>     > +++ b/ofproto/ofproto-dpif-xlate.c
>     > @@ -191,6 +191,22 @@ struct xport {
>     >      struct lldp *lldp;               /* LLDP handle or null. */
>     >  };
>     > 
>     > +struct patch_port_ctx {
>     > +    /* The array of actions after crossing patch port boundary. */
>     > +    struct patch_port_actions **patch_port_actions;
>     > +    size_t n_alloc;
>     > +    size_t n_size;
>     > +    uint16_t depth;
> 
>     It's hard to track the difference between and the meaning of the
>     n_size and deapth.  It should be explained better in the comments.
> 
> 
> I'll add some. Basically the depth helps us with nested patch port crossing.
>  
> 
> 
>     > +};
>     > +
>     > +struct patch_port_actions {
>     > +    /* ofpbuf odp_actions offset from start. */
>     > +    uint32_t offset;
>     > +    /* len of the nla section in ofpbuf's odp_actions. */
> 
>     s/len/Length/ or even 'Size'?
> 
> 
> Done.
>  
> 
> 
>     > +    uint32_t size;
>     > +    bool is_reversible;
>     > +};
>     > +
>     >  struct xlate_ctx {
>     >      struct xlate_in *xin;
>     >      struct xlate_out *xout;
>     > @@ -409,6 +425,8 @@ struct xlate_ctx {
>     >      struct ofpbuf action_set;   /* Action set. */
>     > 
>     >      enum xlate_error error;     /* Translation failed. */
>     > +
>     > +    struct patch_port_ctx patch_port_ctx;
>     >  };
>     > 
>     >  /* Structure to track VLAN manipulation */
>     > @@ -458,6 +476,99 @@ const char *xlate_strerror(enum xlate_error error)
>     >  static void xlate_action_set(struct xlate_ctx *ctx);
>     >  static void xlate_commit_actions(struct xlate_ctx *ctx);
>     > 
>     > +static struct patch_port_actions *
>     > +patch_port_ctx_current_actions(struct patch_port_ctx *ctx)
> 
>     Maybe it will be better to not call the variable 'ctx' in these functions.
>     It creates confusion with the common xlate_ctx, especially because both
>     structures has equally named fields.
> 
> 
> Yeah I agree that the whole context in context is not very nice,
> I didn't have any better idea so I am open to name suggestions.

You may just call it patch_port_ctx, I guess.
Not sure if this datastructure is necessary if we will examine
actions right after patch port output though.

>  
> 
> 
>     > +{
>     > +    if (!ctx->depth) {
>     > +        return NULL;
>     > +    }
>     > +
>     > +    return ctx->patch_port_actions[ctx->n_size - ctx->depth];
>     > +}
>     > +
>     > +static void
>     > +patch_port_ctx_start(struct patch_port_ctx *ctx, uint32_t offset)
>     > +{
>     > +    if (ctx->n_size == ctx->n_alloc) {
>     > +        ctx->patch_port_actions =
>     > +            x2nrealloc(ctx->patch_port_actions, &ctx->n_alloc,
>     > +                       sizeof *ctx->patch_port_actions);
>     > +    }
>     > +
>     > +    struct patch_port_actions *actions =
>     > +        xmalloc(sizeof (struct patch_port_actions));
> 
>     sizeof *actions
> 
> 
> Done.
>  
> 
> 
>     > +    actions->offset = offset;
>     > +    actions->size = 0;
>     > +    actions->is_reversible = true;
>     > +
>     > +    ctx->patch_port_actions[ctx->n_size++] = actions;
>     > +    ctx->depth++;
>     > +}
>     > +
>     > +static void
>     > +patch_port_ctx_end(struct patch_port_ctx *ctx, uint32_t end_offset)
>     > +{
>     > +    struct patch_port_actions *actions = 
> patch_port_ctx_current_actions(ctx);
>     > +
>     > +    if (!actions) {
>     > +        return;
>     > +    }
>     > +
>     > +    actions->size = end_offset - actions->offset;
>     > +    ctx->depth--;
>     > +}
>     > +
>     > +static void
>     > +patch_port_ctx_destroy(struct patch_port_ctx *ctx)
>     > +{
>     > +    for (size_t i = 0; i < ctx->n_size; i++) {
>     > +        free(ctx->patch_port_actions[i]);
>     > +    }
>     > +    free(ctx->patch_port_actions);
>     > +}
>     > +
>     > +static void
>     > +patch_port_ctx_check_reversible(struct patch_port_ctx *ctx,
>     > +                                const struct ofpact *a)
>     > +{
>     > +    struct patch_port_actions *actions = 
> patch_port_ctx_current_actions(ctx);
>     > +
>     > +    if (!actions) {
>     > +        return;
>     > +    }
>     > +    actions->is_reversible = actions->is_reversible ? 
> ofpact_is_reversible(a)
>     > +                                                    : false;
>     > +
>     > +}
>     > +
>     > +static void
>     > +ctx_patch_port_context_wrap_clone(struct xlate_ctx *xlate_ctx)
>     > +{
>     > +    struct patch_port_ctx ctx = xlate_ctx->patch_port_ctx;
> 
>     I think, it's better to keep 'struct xlate_ctx *ctx' and give a different
>     name to patch_port_ctx instead.
> 
> 
> Sounds reasonable.
>  
> 
> 
>     > +
>     > +    for (size_t i = 0; i < ctx.n_size; i++) {
>     > +        struct patch_port_actions *actions = ctx.patch_port_actions[i];
>     > +        uint32_t offset = actions->offset;
>     > +        uint32_t size = actions->size;
>     > +
>     > +        /* We don't have to clone if it would be the last action, or 
> we don't
>     > +         * have any data to wrap, or the actions are reversible. */
>     > +        if (xlate_ctx->odp_actions->size == offset + size || !size
>     > +            || actions->is_reversible) {
>     > +            continue;
>     > +        }
>     > +
>     > +        if (xlate_ctx->xbridge->support.clone) {        /* Use clone 
> action */
>     > +            nl_msg_wrap_unspec(xlate_ctx->odp_actions, 
> OVS_ACTION_ATTR_CLONE,
>     > +                               offset, size);
>     > +        } else if (xlate_ctx->xbridge->support.sample_nesting > 3) {
>     > +            /* Use sample action as datapath clone. */
>     > +            nl_msg_wrap_unspec(xlate_ctx->odp_actions, 
> OVS_SAMPLE_ATTR_ACTIONS,
>     > +                               offset, size);
>     > +        }
>     > +    }
>     > +}
>     > +
>     >  static void
>     >  patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
>     >                    struct xport *out_dev, bool is_last_action);
>     > @@ -3920,7 +4031,7 @@ patch_port_output(struct xlate_ctx *ctx, const 
> struct xport *in_dev,
>     >              xport_rstp_forward_state(out_dev)) {
>     > 
>     >              xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, 
> true,
>     > -                               false, is_last_action, 
> clone_xlate_actions);
>     > +                               false, is_last_action, 
> do_xlate_actions);
>     >              if (!ctx->freezing) {
>     >                  xlate_action_set(ctx);
>     >              }
>     > @@ -3935,7 +4046,7 @@ patch_port_output(struct xlate_ctx *ctx, const 
> struct xport *in_dev,
>     >              mirror_mask_t old_mirrors2 = ctx->mirrors;
>     > 
>     >              xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, 
> true,
>     > -                               false, is_last_action, 
> clone_xlate_actions);
>     > +                               false, is_last_action, 
> do_xlate_actions);
>     >              ctx->mirrors = old_mirrors2;
>     >              ctx->base_flow = old_base_flow;
>     >              ctx->odp_actions->size = old_size;
>     > @@ -5338,7 +5449,19 @@ xlate_output_action(struct xlate_ctx *ctx, 
> ofp_port_t port,
>     >      case OFPP_LOCAL:
>     >      default:
>     >          if (port != ctx->xin->flow.in_port.ofp_port) {
>     > +            struct xport *xport = get_ofp_port(ctx->xbridge, port);
>     > +
>     > +            if (xport && xport->peer) {
>     > +                patch_port_ctx_start(&ctx->patch_port_ctx,
>     > +                                     ctx->odp_actions->size);
>     > +            }
>     > +
>     >              compose_output_action(ctx, port, NULL, is_last_action, 
> truncate);
>     > +
>     > +            if (xport && xport->peer) {
>     > +                patch_port_ctx_end(&ctx->patch_port_ctx,
>     > +                                   ctx->odp_actions->size);
>     > +            }
> 
>     This only covers the direct output to a patch port, but we can have
>     packets enter patch port during the flood or processing of the NORMAL
>     pipeline.  And since you removed cloning from the patch_port_output(),
>     this may cause issues.
> 
> 
> 
> Ah makes sense, so if I understand that correctly more fitting would be to 
> record the context in
> patch_port_output right?

Right.  Maybe not the patch_port_output() itself, but fucntions
that call it, not sure.

>  
> 
> 
>     >          } else {
>     >              xlate_report_info(ctx, "skipping output to input port");
>     >          }
>     > @@ -5755,68 +5878,7 @@ reversible_actions(const struct ofpact *ofpacts, 
> size_t ofpacts_len)
>     >      const struct ofpact *a;
>     > 
>     >      OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
>     > -        switch (a->type) {
>     > -        case OFPACT_BUNDLE:
>     > -        case OFPACT_CLEAR_ACTIONS:
>     > -        case OFPACT_CLONE:
>     > -        case OFPACT_CONJUNCTION:
>     > -        case OFPACT_CONTROLLER:
>     > -        case OFPACT_CT_CLEAR:
>     > -        case OFPACT_DEBUG_RECIRC:
>     > -        case OFPACT_DEBUG_SLOW:
>     > -        case OFPACT_DEC_MPLS_TTL:
>     > -        case OFPACT_DEC_TTL:
>     > -        case OFPACT_ENQUEUE:
>     > -        case OFPACT_EXIT:
>     > -        case OFPACT_FIN_TIMEOUT:
>     > -        case OFPACT_GOTO_TABLE:
>     > -        case OFPACT_GROUP:
>     > -        case OFPACT_LEARN:
>     > -        case OFPACT_MULTIPATH:
>     > -        case OFPACT_NOTE:
>     > -        case OFPACT_OUTPUT:
>     > -        case OFPACT_OUTPUT_REG:
>     > -        case OFPACT_POP_MPLS:
>     > -        case OFPACT_POP_QUEUE:
>     > -        case OFPACT_PUSH_MPLS:
>     > -        case OFPACT_PUSH_VLAN:
>     > -        case OFPACT_REG_MOVE:
>     > -        case OFPACT_RESUBMIT:
>     > -        case OFPACT_SAMPLE:
>     > -        case OFPACT_SET_ETH_DST:
>     > -        case OFPACT_SET_ETH_SRC:
>     > -        case OFPACT_SET_FIELD:
>     > -        case OFPACT_SET_IP_DSCP:
>     > -        case OFPACT_SET_IP_ECN:
>     > -        case OFPACT_SET_IP_TTL:
>     > -        case OFPACT_SET_IPV4_DST:
>     > -        case OFPACT_SET_IPV4_SRC:
>     > -        case OFPACT_SET_L4_DST_PORT:
>     > -        case OFPACT_SET_L4_SRC_PORT:
>     > -        case OFPACT_SET_MPLS_LABEL:
>     > -        case OFPACT_SET_MPLS_TC:
>     > -        case OFPACT_SET_MPLS_TTL:
>     > -        case OFPACT_SET_QUEUE:
>     > -        case OFPACT_SET_TUNNEL:
>     > -        case OFPACT_SET_VLAN_PCP:
>     > -        case OFPACT_SET_VLAN_VID:
>     > -        case OFPACT_STACK_POP:
>     > -        case OFPACT_STACK_PUSH:
>     > -        case OFPACT_STRIP_VLAN:
>     > -        case OFPACT_UNROLL_XLATE:
>     > -        case OFPACT_WRITE_ACTIONS:
>     > -        case OFPACT_WRITE_METADATA:
>     > -        case OFPACT_CHECK_PKT_LARGER:
>     > -        case OFPACT_DELETE_FIELD:
>     > -            break;
>     > -
>     > -        case OFPACT_CT:
>     > -        case OFPACT_METER:
>     > -        case OFPACT_NAT:
>     > -        case OFPACT_OUTPUT_TRUNC:
>     > -        case OFPACT_ENCAP:
>     > -        case OFPACT_DECAP:
>     > -        case OFPACT_DEC_NSH_TTL:
>     > +        if (!ofpact_is_reversible(a)) {
>     >              return false;
>     >          }
>     >      }
>     > @@ -6992,6 +7054,8 @@ do_xlate_actions(const struct ofpact *ofpacts, 
> size_t ofpacts_len,
>     >              ds_destroy(&s);
>     >          }
>     > 
>     > +        patch_port_ctx_check_reversible(&ctx->patch_port_ctx, a);
>     > +
>     >          switch (a->type) {
>     >          case OFPACT_OUTPUT:
>     >              xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
>     > @@ -7696,6 +7760,11 @@ xlate_actions(struct xlate_in *xin, struct 
> xlate_out *xout)
>     >      uint64_t frozen_actions_stub[1024 / 8];
>     >      uint64_t actions_stub[256 / 8];
>     >      struct ofpbuf scratch_actions = 
> OFPBUF_STUB_INITIALIZER(actions_stub);
>     > +    struct patch_port_ctx patch_port_ctx = {
>     > +        .n_size = 0,
>     > +        .n_alloc = 0,
>     > +        .depth = 0,
>     > +    };
>     >      struct xlate_ctx ctx = {
>     >          .xin = xin,
>     >          .xout = xout,
>     > @@ -7740,6 +7809,7 @@ xlate_actions(struct xlate_in *xin, struct 
> xlate_out *xout)
>     > 
>     >          .action_set_has_group = false,
>     >          .action_set = OFPBUF_STUB_INITIALIZER(action_set_stub),
>     > +        .patch_port_ctx = patch_port_ctx
> 
>     Please, add a comma at the end.
> 
> 
> Done.
>  
> 
> 
>     >      };
>     > 
>     >      /* 'base_flow' reflects the packet as it came in, but we need it 
> to reflect
>     > @@ -8074,6 +8144,7 @@ xlate_actions(struct xlate_in *xin, struct 
> xlate_out *xout)
>     >      }
>     > 
>     >      xlate_wc_finish(&ctx);
>     > +    ctx_patch_port_context_wrap_clone(&ctx);
>     > 
>     >  exit:
>     >      /* Reset the table to what it was when we came in. If we only 
> fetched
>     > @@ -8085,6 +8156,7 @@ exit:
>     >      ofpbuf_uninit(&ctx.frozen_actions);
>     >      ofpbuf_uninit(&scratch_actions);
>     >      ofpbuf_delete(ctx.encap_data);
>     > +    patch_port_ctx_destroy(&ctx.patch_port_ctx);
> 
>     This is called only once at the very end of the flow translation process.
>     But what if we have several bridges connected with patch ports?  AFAIU,
>     we need to call it right after finishing the translation in the other 
> bridge,
>     so we can create clones for every patch port we're traversing, including
>     nested cases.  I.e. we need to handle any types of tree-like processing
>     including output to multiple outher bridges (output:patch1,patch2,patch3)
>     and traversal of the same packet through multiple bridges accumulating
>     changes:
>       br1: output:<actions>,patch-br2,<actions>
>       br2: output:<actions>,patch-br3,<packet mods>,patch-br3,<actions>
>       br3: output:<actions>,patch-br4,<actions>
>     Or even traversing between bridges in a circle through patch ports (it's
>     a valid case as long as packet gets modified on the way).
> 
> 
> Nested calls are recorded in the context, that's what the depth is for.
> In the end it's in a flat array, but every patch port output should be 
> recorded even
> if there is a multiple crossing, some of them nested. Also it probably 
> deserves
> test case with crossing to multiple bridges and nested calls.

Oh, OK.  The patch port context structure is not really straightforward
without extensive comments.  Test cases would be really nice, yes.

But wouldn't the first call to nl_msg_wrap_unspec() mess up all the
offsets for later ones?

>  
> 
> 
>     > 
>     >      /* Make sure we return a "drop flow" in case of an error. */
>     >      if (ctx.error) {
>     > diff --git a/tests/ofproto-dpif.at <http://ofproto-dpif.at> 
> b/tests/ofproto-dpif.at <http://ofproto-dpif.at>
>     > index 2e91ae1a1..780ed24cc 100644
>     > --- a/tests/ofproto-dpif.at <http://ofproto-dpif.at>
>     > +++ b/tests/ofproto-dpif.at <http://ofproto-dpif.at>
>     > @@ -8913,7 +8913,8 @@ OVS_VSWITCHD_START(
>     >  AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br1 'meter=1 pktps stats 
> bands=type=drop rate=2'])
>     >  AT_CHECK([ovs-ofctl del-flows br0])
>     >  AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 
> in_port=local,ip,actions=2,1])
>     > -AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 
> in_port=1,ip,actions=meter:1,3])
>     > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 
> "in_port=1,ip,actions=resubmit(,7)"])
>     > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 
> table=7,in_port=1,ip,actions=meter:1,3])
> 
>     I think, we should create separate test cases.  Maybe also cases
>     for each non-reversible action we have.
> 
> 
> That would be a great idea to test that it actually works as intended.
>  
> 
> 
>     > 
>     >  AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 
> 'in_port(100),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=10.1.1.22,dst=10.0.0.3,proto=6,tos=0,ttl=64,frag=no),tcp(src=53295,dst=8080)'],
>  [0], [stdout])
>     >  AT_CHECK([tail -1 stdout], [0],
>     > @@ -8923,6 +8924,41 @@ AT_CHECK([tail -1 stdout], [0],
>     >  OVS_VSWITCHD_STOP
>     >  AT_CLEANUP
>     > 
>     > +
>     > +AT_SETUP([ofproto-dpif - patch ports - no additional clone])
>     > +OVS_VSWITCHD_START(
>     > +  [add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 -- \
>     > +   add-port br0 p1 -- set Interface p1 type=patch \
>     > +                                       options:peer=p2 
> ofport_request=2 -- \
>     > +   add-br br1 -- \
>     > +   set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
>     > +   set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
>     > +                  fail-mode=secure -- \
>     > +   add-port br1 p2 -- set Interface p2 type=patch \
>     > +                                       options:peer=p1 -- \
>     > +   add-port br1 p3 -- set Interface p3 type=dummy ofport_request=3])
>     > +
>     > +AT_DATA([flows-br0.txt], [dnl
>     > 
> +priority=10,tcp,action=push:NXM_OF_IN_PORT[],resubmit(,65),pop:NXM_OF_IN_PORT[]
>     > +table=65,priority=10,ip,in_port=p0,action=p1
>     > +])
>     > +
>     > +AT_DATA([flows-br1.txt], [dnl
>     > +priority=100,in_port=p2,tcp,ct_state=-trk,action=ct(table=0,zone=1)
>     > +priority=100,in_port=p2,tcp,ct_state=+trk+est,ct_zone=1,action=p3
>     > +])
>     > +
>     > +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows-br0.txt])
>     > +AT_CHECK([ovs-ofctl --bundle add-flows br1 flows-br1.txt])
>     > +
>     > +ovs-appctl ofproto/trace br0 in_port=p0,tcp --ct-next 'trk,est'
>     > +AT_CHECK([ovs-appctl ofproto/trace br0 in_port=p0,tcp --ct-next 
> 'trk,est' | \
>     > +          grep  "Datapath actions:" | grep -q clone],
>     > +         [1], [], [],
>     > +         [ovs-appctl ofproto/trace br0 in_port=p0,tcp --ct-next 
> 'trk,est'])
>     > +OVS_TRAFFIC_VSWITCHD_STOP
>     > +AT_CLEANUP
>     > +
>     >  dnl 
> ----------------------------------------------------------------------
>     >  AT_BANNER([ofproto-dpif -- megaflows])
>     > 
> 
> 
> I will rework it to check if the final nlattr contains any irreversible 
> actions. It actually should make this
> whole process easier.
> 
> Thanks,
> Ales
> 
> -- 
> 
> Ales Musil
> 
> Senior Software Engineer - OVN Core
> 
> Red Hat EMEA <https://www.redhat.com>
> 
> [email protected] <mailto:[email protected]>    IM: amusil
> 
> <https://red.ht/sig>
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to