On 6 Jan 2026, at 12:55, Ilya Maximets wrote:

> There are multiple logical and coding issues in this functionality.
> They are semi-related, so it's easier to fix them all at once instead
> of trying to untangle the changes into separate commits.
>
> The issues are:
>
> - The implementation of a non-masked variant of the action is putting
> the base flow values into the action instead of the updated key, so
> the packet never gets actually updated.
>
> - But this is OK, because the non-masked variant is mostly a dead
> code, since 'fully_masked' is always false, as mdtype and np are always
> zeroed out before calling commit_nsh().  So, the only way to hit this
> code is to manually turn off support for masked set.
>
> - The put_nsh_key() function can be called for a mask, but it checks
> the mdtype value as if it was a key regardless.  And since mdtype is
> always zeroed out in the mask, the match on the context is never
> propagated to the datapath flow, even if added during the action
> commit.
>
> - The key and the base are compared for being the same twice, once
> in the commit_set_nsh_action() and again in commit_nsh().
>
> - commit_set_nsh_action() is using an assumption that mdtype is
> changing in the mask whenever the mask is changing.  This is fine in
> the current code, but isn't how it supposed to work in general.
>
> - The masked variant of the action is generating a set with an empty
> mask in case the context didn't change.  Same for the base header.
> This works in userspace, but kernel generally fails the validation on
> set() with an empty mask.  The attribute should just not be provided
> in this case.
>
> - For some reason the printing function initializes the mask to an
> exact match, printing out missing attributes in full as all-zero,
> while they should not be printed if not present.
>
> - The masked variant duplicates the part of commit_masked_set_action()
> twice, while this code can be re-used without much trouble.
>
> Fixing all the issues above and making the code look more like it
> looks for other fields and header types.  Completely removing the
> get/put_nsh_key() functions as we should not be clearing the context
> anyway.  If we somehow end up with the MD1 context on an MD2 packet,
> it means that we have a bug somewhere else that should be fixed.
>
> The tests updated to cover the problematic functionality.
>
> There are still more issues here.  The main one is that we need to
> actually probe the datapath before using this action, as not all the
> kernel versions support NSH and support for set(nsh) was recently
> removed entirely from the Linux kernel due to being completely wrong.
> This is the reason for not adding any system tests here, as they
> would fail or crash the kernel.
>
> Will work on probing and the re-introduction of the support in the
> kernel separately.
>
> Fixes: 3d2fbd70bda5 ("userspace: Add support for NSH MD1 match fields")
> Fixes: f59cb331c481 ("nsh: rework NSH netlink keys and actions")
> Fixes: 81fdabb94dd7 ("nsh: fix nested mask for OVS_KEY_ATTR_NSH")
> Signed-off-by: Ilya Maximets <[email protected]>\

Thanks Ilya for fixing this mess. I have one comment below, as I got confused :)

//Eelco

> ---
>  lib/odp-util.c | 185 ++++++++++++++++++-------------------------------
>  tests/nsh.at   |  61 +++++++++++++---
>  2 files changed, 117 insertions(+), 129 deletions(-)
>
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index ee1868202..fbbfa426f 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -1050,7 +1050,7 @@ format_odp_set_nsh(struct ds *ds, const struct nlattr 
> *attr)
>      struct ovs_key_nsh nsh_mask;
>
>      memset(&nsh, 0, sizeof nsh);
> -    memset(&nsh_mask, 0xff, sizeof nsh_mask);
> +    memset(&nsh_mask, 0, sizeof nsh_mask);
>
>      NL_NESTED_FOR_EACH (a, left, attr) {
>          enum ovs_nsh_key_attr type = nl_attr_type(a);
> @@ -6333,10 +6333,6 @@ static void get_arp_key(const struct flow *, struct 
> ovs_key_arp *);
>  static void put_arp_key(const struct ovs_key_arp *, struct flow *);
>  static void get_nd_key(const struct flow *, struct ovs_key_nd *);
>  static void put_nd_key(const struct ovs_key_nd *, struct flow *);
> -static void get_nsh_key(const struct flow *flow, struct ovs_key_nsh *nsh,
> -                        bool is_mask);
> -static void put_nsh_key(const struct ovs_key_nsh *nsh, struct flow *flow,
> -                        bool is_mask);
>
>  /* These share the same layout. */
>  union ovs_key_tp {
> @@ -7936,16 +7932,13 @@ commit_set_action(struct ofpbuf *odp_actions, enum 
> ovs_key_attr key_type,
>      nl_msg_end_nested(odp_actions, offset);
>  }
>
> -/* Masked set actions have a mask following the data within the netlink
> - * attribute.  The unmasked bits in the data will be cleared as the data
> - * is copied to the action. */
> -void
> -commit_masked_set_action(struct ofpbuf *odp_actions,
> -                         enum ovs_key_attr key_type,
> -                         const void *key_, const void *mask_, size_t 
> key_size)
> +/* Commit one attribute for a masked set action.  Masked set actions have
> + * a mask following the data within the netlink attribute.  The unmasked bits
> + * in the data will be cleared as the data is copied to the attribute. */
> +static void
> +commit_masked_attribute(struct ofpbuf *odp_actions, int key_type,
> +                        const void *key_, const void *mask_, size_t key_size)
>  {
> -    size_t offset = nl_msg_start_nested(odp_actions,
> -                                        OVS_ACTION_ATTR_SET_MASKED);
>      char *data = nl_msg_put_unspec_uninit(odp_actions, key_type, key_size * 
> 2);
>      const char *key = key_, *mask = mask_;
>
> @@ -7954,6 +7947,17 @@ commit_masked_set_action(struct ofpbuf *odp_actions,
>      while (key_size--) {
>          *data++ = *key++ & *mask++;
>      }
> +}
> +
> +/* A helper to commit a masked set action for a single attribute. */
> +void
> +commit_masked_set_action(struct ofpbuf *odp_actions,
> +                         enum ovs_key_attr key_type,
> +                         const void *key_, const void *mask_, size_t 
> key_size)
> +{
> +    size_t offset = nl_msg_start_nested(odp_actions,
> +                                        OVS_ACTION_ATTR_SET_MASKED);
> +    commit_masked_attribute(odp_actions, key_type, key_, mask_, key_size);
>      nl_msg_end_nested(odp_actions, offset);
>  }
>
> @@ -8541,116 +8545,65 @@ commit_set_nw_action(const struct flow *flow, struct 
> flow *base,
>      return 0;
>  }
>
> -static inline void
> -get_nsh_key(const struct flow *flow, struct ovs_key_nsh *nsh, bool is_mask)
> -{
> -    *nsh = flow->nsh;
> -    if (!is_mask) {
> -        if (nsh->mdtype != NSH_M_TYPE1) {
> -            memset(nsh->context, 0, sizeof(nsh->context));
> -        }
> -    }
> -}
> -
> -static inline void
> -put_nsh_key(const struct ovs_key_nsh *nsh, struct flow *flow,
> -            bool is_mask OVS_UNUSED)
> -{
> -    flow->nsh = *nsh;
> -    if (flow->nsh.mdtype != NSH_M_TYPE1) {
> -        memset(flow->nsh.context, 0, sizeof(flow->nsh.context));
> -    }
> -}
> -
>  static bool
> -commit_nsh(const struct ovs_key_nsh * flow_nsh, bool use_masked_set,
> -           const struct ovs_key_nsh *key, struct ovs_key_nsh *base,
> -           struct ovs_key_nsh *mask, size_t size,
> +commit_nsh(bool use_masked_set, const struct ovs_key_nsh *key,
> +           struct ovs_key_nsh *base, struct ovs_key_nsh *mask, size_t size,
>             struct ofpbuf *odp_actions)
>  {
> -    enum ovs_key_attr attr = OVS_KEY_ATTR_NSH;
> -
> -    if (memcmp(key, base, size)  == 0) {
> +    if (!memcmp(key, base, size)) {
>          /* Mask bits are set when we have either read or set the 
> corresponding
>           * values.  Masked bits will be exact-matched, no need to set them
>           * if the value did not actually change. */
>          return false;
>      }
>
> -    bool fully_masked = odp_mask_is_exact(attr, mask, size);
> +    bool fully_masked = odp_mask_is_exact(OVS_KEY_ATTR_NSH, mask, size);
> +    size_t set_ofs;
>
>      if (use_masked_set && !fully_masked) {
> -        size_t nsh_key_ofs;
> -        struct ovs_nsh_key_base nsh_base;
> -        struct ovs_nsh_key_base nsh_base_mask;
> -        struct ovs_nsh_key_md1 md1;
> -        struct ovs_nsh_key_md1 md1_mask;
> -        size_t offset = nl_msg_start_nested(odp_actions,
> -                                            OVS_ACTION_ATTR_SET_MASKED);
> -
> -        nsh_base.flags = key->flags;
> -        nsh_base.ttl = key->ttl;
> -        nsh_base.mdtype = key->mdtype;
> -        nsh_base.np = key->np;
> -        nsh_base.path_hdr = key->path_hdr;
> -
> -        nsh_base_mask.flags = mask->flags;
> -        nsh_base_mask.ttl = mask->ttl;
> -        nsh_base_mask.mdtype = mask->mdtype;
> -        nsh_base_mask.np = mask->np;
> -        nsh_base_mask.path_hdr = mask->path_hdr;
> -
> -        /* OVS_KEY_ATTR_NSH keys */
> -        nsh_key_ofs = nl_msg_start_nested(odp_actions, OVS_KEY_ATTR_NSH);
> -
> -        /* put value and mask for OVS_NSH_KEY_ATTR_BASE */
> -        char *data = nl_msg_put_unspec_uninit(odp_actions,
> -                                              OVS_NSH_KEY_ATTR_BASE,
> -                                              2 * sizeof(nsh_base));
> -        const char *lkey = (char *)&nsh_base, *lmask = (char 
> *)&nsh_base_mask;
> -        size_t lkey_size = sizeof(nsh_base);
> -
> -        while (lkey_size--) {
> -            *data++ = *lkey++ & *lmask++;
> -        }
> -        lmask = (char *)&nsh_base_mask;
> -        memcpy(data, lmask, sizeof(nsh_base_mask));
> -
> -        switch (key->mdtype) {
> -        case NSH_M_TYPE1:
> -            memcpy(md1.context, key->context, sizeof key->context);
> -            memcpy(md1_mask.context, mask->context, sizeof mask->context);
> -
> -            /* put value and mask for OVS_NSH_KEY_ATTR_MD1 */
> -            data = nl_msg_put_unspec_uninit(odp_actions,
> -                                            OVS_NSH_KEY_ATTR_MD1,
> -                                            2 * sizeof(md1));
> -            lkey = (char *)&md1;
> -            lmask = (char *)&md1_mask;
> -            lkey_size = sizeof(md1);
> -
> -            while (lkey_size--) {
> -                *data++ = *lkey++ & *lmask++;
> -            }
> -            lmask = (char *)&md1_mask;
> -            memcpy(data, lmask, sizeof(md1_mask));
> -            break;
> -        case NSH_M_TYPE2:
> -        default:
> -            /* No match support for other MD formats yet. */
> -            break;
> +        struct ovs_nsh_key_base nsh_base = {
> +            .flags = key->flags,
> +            .ttl = key->ttl,
> +            /* 'mdtype' and 'np' are not writable. */
> +            .path_hdr = key->path_hdr,
> +        };
> +        struct ovs_nsh_key_base nsh_base_mask = {
> +            .flags = mask->flags,
> +            .ttl = mask->ttl,
> +            /* 'mdtype' and 'np' are not writable. */
> +            .path_hdr = mask->path_hdr,
> +        };
> +        size_t nsh_ofs;
> +
> +        set_ofs = nl_msg_start_nested(odp_actions, 
> OVS_ACTION_ATTR_SET_MASKED);
> +        nsh_ofs = nl_msg_start_nested(odp_actions, OVS_KEY_ATTR_NSH);
> +
> +        if (!is_all_zeros(&nsh_base_mask, sizeof nsh_base_mask)) {
> +            commit_masked_attribute(odp_actions, OVS_NSH_KEY_ATTR_BASE,
> +                                    &nsh_base, &nsh_base_mask,
> +                                    sizeof nsh_base);
>          }
>
> -        nl_msg_end_nested(odp_actions, nsh_key_ofs);
> +        /* No match support for other MD formats yet. */
> +        if (key->mdtype == NSH_M_TYPE1
> +            && !is_all_zeros(mask->context, sizeof *mask->context)) {

Is the sizeof correct here? This points to an array element, so it would only 
give 4 bytes, not 4×4. I think this is what the old code uses?

> +            commit_masked_attribute(odp_actions, OVS_NSH_KEY_ATTR_MD1,
> +                                    key->context, mask->context,
> +                                    sizeof key->context);
> +        }
>
> -        nl_msg_end_nested(odp_actions, offset);
> +        nl_msg_end_nested(odp_actions, nsh_ofs);
> +        nl_msg_end_nested(odp_actions, set_ofs);
>      } else {
> +        /* Overwriting the whole header.  Make sure that fields that wasn't
> +         * in the mask are indeed the same as in the committed action. */
>          if (!fully_masked) {
>              memset(mask, 0xff, size);
>          }
> -        size_t offset = nl_msg_start_nested(odp_actions, 
> OVS_ACTION_ATTR_SET);
> -        nsh_key_to_attr(odp_actions, flow_nsh, NULL, 0, false);
> -        nl_msg_end_nested(odp_actions, offset);
> +
> +        set_ofs = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SET);
> +        nsh_key_to_attr(odp_actions, key, NULL, 0, false);
> +        nl_msg_end_nested(odp_actions, set_ofs);
>      }
>      memcpy(base, key, size);
>      return true;
> @@ -8664,8 +8617,7 @@ commit_set_nsh_action(const struct flow *flow, struct 
> flow *base_flow,
>  {
>      struct ovs_key_nsh key, mask, base;
>
> -    if (flow->dl_type != htons(ETH_TYPE_NSH) ||
> -        !memcmp(&base_flow->nsh, &flow->nsh, sizeof base_flow->nsh)) {
> +    if (flow->dl_type != htons(ETH_TYPE_NSH)) {
>          return;
>      }
>
> @@ -8673,18 +8625,13 @@ commit_set_nsh_action(const struct flow *flow, struct 
> flow *base_flow,
>      ovs_assert(flow->nsh.mdtype == base_flow->nsh.mdtype &&
>                 flow->nsh.np == base_flow->nsh.np);
>
> -    get_nsh_key(flow, &key, false);
> -    get_nsh_key(base_flow, &base, false);
> -    get_nsh_key(&wc->masks, &mask, true);
> -    mask.mdtype = 0;     /* Not writable. */
> -    mask.np = 0;         /* Not writable. */
> +    key = flow->nsh;
> +    base = base_flow->nsh;
> +    mask = wc->masks.nsh;
>
> -    if (commit_nsh(&base_flow->nsh, use_masked, &key, &base, &mask,
> -            sizeof key, odp_actions)) {
> -        put_nsh_key(&base, base_flow, false);
> -        if (mask.mdtype != 0) { /* Mask was changed by commit(). */
> -            put_nsh_key(&mask, &wc->masks, true);
> -        }
> +    if (commit_nsh(use_masked, &key, &base, &mask, sizeof key, odp_actions)) 
> {
> +        base_flow->nsh = base;
> +        or_bytes(&wc->masks.nsh, &mask, sizeof wc->masks.nsh);
>      }
>  }
>
> diff --git a/tests/nsh.at b/tests/nsh.at
> index 022540dd6..d9f80fc02 100644
> --- a/tests/nsh.at
> +++ b/tests/nsh.at
> @@ -4,7 +4,7 @@ AT_BANNER([network service header (NSH)])
>  ###   Simple NSH matching test case
>  ### -----------------------------------------------------------------
>
> -AT_SETUP([nsh - matching])
> +AT_SETUP([nsh - match and set])
>
>  OVS_VSWITCHD_START([dnl
>      set bridge br0 datapath_type=dummy \
> @@ -13,7 +13,8 @@ OVS_VSWITCHD_START([dnl
>      add-port br0 p2 -- set Interface p2 type=dummy ofport_request=2])
>
>  AT_DATA([flows.txt], [dnl
> -    
> table=0,in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,actions=set_field:0x2->nsh_flags,set_field:254->nsh_si,set_field:0x44332211->nsh_c1,2
> +table=0,in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,dnl
> +actions=set_field:0x2->nsh_flags,set_field:254->nsh_si,set_field:0x44332211->nsh_c1,load:0x77->nsh_c3[[8..15]],2
>  ])
>
>  AT_CHECK([
> @@ -21,13 +22,48 @@ AT_CHECK([
>      ovs-ofctl -Oopenflow13 add-flows br0 flows.txt
>      ovs-ofctl -Oopenflow13 dump-flows br0 | ofctl_strip | sort | grep actions
>  ], [0], [dnl
> - 
> in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344
>  
> actions=set_field:2->nsh_flags,set_field:254->nsh_si,set_field:0x44332211->nsh_c1,output:2
> +[ 
> in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344]dnl
> +[ 
> actions=set_field:2->nsh_flags,set_field:254->nsh_si,set_field:0x44332211->nsh_c1,set_field:0x7700/0xff00->nsh_c3,output:2]
>  ])
>
> -AT_CHECK([
> -    ovs-appctl ofproto/trace br0 
> 'in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00'
> -], [0], [dnl
> -Flow: 
> in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=0,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0,nw_frag=no
> +m4_define([NSH_HEADER], [m4_join([,],
> +  [nsh_flags=0,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255],
> +  
> [nsh_c1=0x11223344,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00])])
> +
> +m4_define([NSH_HEADER2], [m4_join([,],
> +  [nsh_flags=2,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=254],
> +  
> [nsh_c1=0x44332211,nsh_c2=0x55667788,nsh_c3=0x99aa77cc,nsh_c4=0xddeeff00])])
> +
> +AT_CHECK([ovs-appctl ofproto/trace br0 
> 'in_port=1,eth_type=0x894f,NSH_HEADER'], [0], [dnl
> +Flow: in_port=1,vlan_tci=0x0000,dnl
> +dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,NSH_HEADER,dnl
> +nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0,nw_frag=no
> +
> +bridge("br0")
> +-------------
> + 0. 
> in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,
>  priority 32768
> +    set_field:2->nsh_flags
> +    set_field:254->nsh_si
> +    set_field:0x44332211->nsh_c1
> +    set_field:0x7700/0xff00->nsh_c3
> +    output:2
> +
> +Final flow: in_port=1,vlan_tci=0x0000,dnl
> +dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,NSH_HEADER2,dnl
> +nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0,nw_frag=no
> +Megaflow: recirc_id=0,eth,in_port=1,dl_type=0x894f,dnl
> +nsh_flags=0,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,dnl
> +nsh_c1=0x11223344,nsh_c3=0xbb00/0xff00
> +Datapath actions: 
> set(nsh(flags=2,ttl=63,spi=0x123456,si=254,c1=0x44332211,c3=0x7700/0x0000ff00)),2
> +])
> +
> +dnl Check that non-masked action variant is also correct.
> +AT_CHECK([ovs-appctl dpif/set-dp-features br0 masked_set_action false])
> +
> +AT_CHECK([ovs-appctl ofproto/trace br0 
> 'in_port=1,eth_type=0x894f,NSH_HEADER'], [0], [dnl
> +Flow: in_port=1,vlan_tci=0x0000,dnl
> +dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,NSH_HEADER,dnl
> +nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0,nw_frag=no
>
>  bridge("br0")
>  -------------
> @@ -35,11 +71,16 @@ bridge("br0")
>      set_field:2->nsh_flags
>      set_field:254->nsh_si
>      set_field:0x44332211->nsh_c1
> +    set_field:0x7700/0xff00->nsh_c3
>      output:2
>
> -Final flow: 
> in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=2,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=254,nsh_c1=0x44332211,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0,nw_frag=no
> -Megaflow: 
> recirc_id=0,eth,in_port=1,dl_type=0x894f,nsh_flags=0,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344
> -Datapath actions: 
> set(nsh(flags=2,ttl=63,spi=0x123456,si=254,c1=0x44332211)),2
> +Final flow: in_port=1,vlan_tci=0x0000,dnl
> +dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,NSH_HEADER2,dnl
> +nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0,nw_frag=no
> +Megaflow: recirc_id=0,eth,in_port=1,dl_type=0x894f,dnl
> +nsh_flags=0,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,dnl
> +nsh_c1=0x11223344,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00
> +Datapath actions: 
> set(nsh(flags=2,ttl=63,mdtype=1,np=3,spi=0x123456,si=254,c1=0x44332211,c2=0x55667788,c3=0x99aa77cc,c4=0xddeeff00)),2
>  ])
>
>  OVS_VSWITCHD_STOP
> -- 
> 2.52.0
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to