> This commit adds the AVX512 implementation of the push_vlan action.
> The implementation here is auto-validated by the miniflow
> extract autovalidator, hence its correctness can be easily
> tested and verified.
> 
> Signed-off-by: Emma Finn <[email protected]>

Hi Emma, thanks for the patch.

I think given the conversation on patch 7 of this series would help me work 
through this better. Overall I understand the majority of the new action logic 
and design but just have a few questions that we can discuss on patch 7 that 
will no doubt help me complete review here.

Thanks
Ian
> ---
>  lib/odp-execute-avx512.c  | 62 +++++++++++++++++++++++++++++++++++----
>  lib/odp-execute-private.c |  1 +
>  lib/odp-execute.c         | 24 +++++++++------
>  3 files changed, 72 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
> index fcf27f070..03c0fd446 100644
> --- a/lib/odp-execute-avx512.c
> +++ b/lib/odp-execute-avx512.c
> @@ -43,6 +43,13 @@ static inline void ALWAYS_INLINE
>  avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
>  {
>      /* update packet size/data pointers */
> +    if (resize_by_bytes >= 0) {
> +        dp_packet_prealloc_headroom(b, resize_by_bytes);
> +    } else {
> +        ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >=
> +                    -resize_by_bytes);
> +    }
> +
>      dp_packet_set_data(b, (char *) dp_packet_data(b) - resize_by_bytes);
>      dp_packet_set_size(b, dp_packet_size(b) + resize_by_bytes);
> 
> @@ -50,9 +57,9 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int
> resize_by_bytes)
>      const __m128i v_zeros = _mm_setzero_si128();
>      const __m128i v_u16_max = _mm_cmpeq_epi16(v_zeros, v_zeros);
> 
> -    /* Only these lanes can be incremented for push-VLAN action. */
> +    /* Only these lanes can be incremented/decremented for L2. */
>      const uint8_t k_lanes = 0b1110;
> -    __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN);
> +    __m128i v_offset = _mm_set1_epi16(abs(resize_by_bytes));
> 
>      /* Load packet and compare with UINT16_MAX */
>      void *adjust_ptr = &b->l2_pad_size;
> @@ -60,9 +67,17 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int
> resize_by_bytes)
>      __mmask8 k_cmp = _mm_mask_cmpneq_epu16_mask(k_lanes, v_adjust_src,
>                                                      v_u16_max);
> 
> -    /* Add VLAN_HEADER_LEN using compare mask, store results. */
> -    __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
> -                                              v_adjust_src, v_offset);
> +    /* Update VLAN_HEADER_LEN using compare mask, store results. */
> +    __m128i v_adjust_wip;
> +
> +    if (resize_by_bytes >= 0) {
> +        v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp,
> +                                            v_adjust_src, v_offset);
> +    } else {
> +        v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
> +                                            v_adjust_src, v_offset);
> +    }
> +
>      _mm_storeu_si128(adjust_ptr, v_adjust_wip);
> 
>  }
> @@ -80,7 +95,6 @@ avx512_eth_pop_vlan(struct dp_packet *packet)
>                                              16 - VLAN_HEADER_LEN);
>          _mm_storeu_si128((void *) veh, v_realign);
>          avx512_dp_packet_resize_l2(packet, -VLAN_HEADER_LEN);
> -
>      }
>  }
> 
> @@ -96,6 +110,41 @@ action_avx512_pop_vlan(void *dp OVS_UNUSED, struct
> dp_packet_batch *batch,
>      }
>  }
> 
> +static inline void ALWAYS_INLINE
> +avx512_eth_push_vlan(struct dp_packet *packet, ovs_be16 tpid, ovs_be16 tci)
> +{
> +    avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN);
> +
> +    /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */
> +    char *pkt_data = (char *) dp_packet_data(packet);
> +    const uint16_t tci_proc = tci & htons(~VLAN_CFI);
> +    const uint32_t tpid_tci = (tci_proc << 16) | tpid;
> +
> +    static const uint8_t vlan_push_shuffle_mask[16] = {
> +    4, 5, 6, 7, 8, 9, 10, 11,
> +    12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF
> +    };
> +
> +    __m128i v_ether = _mm_loadu_si128((void *) pkt_data);
> +    __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask);
> +    __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index);
> +    __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3);
> +     _mm_storeu_si128((void *) pkt_data, v_vlan_hdr);
> +}
> +
> +static void
> +action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch
> *batch,
> +                       const struct nlattr *a,
> +                       bool should_steal OVS_UNUSED)
> +{
> +    struct dp_packet *packet;
> +    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        avx512_eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> +    }
> +}
> +
>  /* Probe functions to check ISA requirements. */
>  static int32_t
>  avx512_isa_probe(uint32_t needs_vbmi)
> @@ -136,6 +185,7 @@ action_avx512_init(struct odp_execute_action_impl
> *self)
>  {
>      avx512_isa_probe(0);
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
> +    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan;
> 
>      return 0;
>  }
> diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c
> index 175a80159..607f0fa94 100644
> --- a/lib/odp-execute-private.c
> +++ b/lib/odp-execute-private.c
> @@ -218,6 +218,7 @@ int32_t
>  action_autoval_init(struct odp_execute_action_impl *self)
>  {
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic;
> +    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic;
> 
>      return 0;
>  }
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index 1bc9fae09..40f71fa96 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -842,6 +842,19 @@ action_pop_vlan(void *dp OVS_UNUSED, struct
> dp_packet_batch *batch,
>      }
>  }
> 
> +static void
> +action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch,
> +                const struct nlattr *a OVS_UNUSED,
> +                bool should_steal OVS_UNUSED)
> +{
> +    struct dp_packet *packet;
> +    const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> +
> +    DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +        eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> +    }
> +}
> +
>  /* Implementation of the scalar actions impl init function. Build up the
>   * array of func ptrs here.
>   */
> @@ -849,6 +862,7 @@ int32_t
>  odp_action_scalar_init(struct odp_execute_action_impl *self)
>  {
>      self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan;
> +    self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan;
> 
>      return 0;
>  }
> @@ -991,15 +1005,6 @@ odp_execute_actions(void *dp, struct
> dp_packet_batch *batch, bool steal,
>              break;
>          }
> 
> -        case OVS_ACTION_ATTR_PUSH_VLAN: {
> -            const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
> -
> -            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> -                eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> -            }
> -            break;
> -        }
> -
>          case OVS_ACTION_ATTR_PUSH_MPLS: {
>              const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
> 
> @@ -1133,6 +1138,7 @@ odp_execute_actions(void *dp, struct
> dp_packet_batch *batch, bool steal,
>          case OVS_ACTION_ATTR_OUTPUT:
>          case OVS_ACTION_ATTR_LB_OUTPUT:
>          case OVS_ACTION_ATTR_POP_VLAN:
> +        case OVS_ACTION_ATTR_PUSH_VLAN:
>          case OVS_ACTION_ATTR_TUNNEL_PUSH:
>          case OVS_ACTION_ATTR_TUNNEL_POP:
>          case OVS_ACTION_ATTR_USERSPACE:
> --
> 2.25.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to