This commit adds the AVX512 implementation of the push_vlan action. The implementation here is auto-validated by the miniflow extract autovalidator, hence its correctness can be easily tested and verified.
Signed-off-by: Emma Finn <[email protected]> --- NEWS | 1 + lib/odp-execute-avx512.c | 74 ++++++++++++++++++++++++++++++++++----- lib/odp-execute-private.c | 1 + lib/odp-execute.c | 24 ++++++++----- 4 files changed, 83 insertions(+), 17 deletions(-) diff --git a/NEWS b/NEWS index f5032bdd0..af1759685 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,7 @@ Post-v2.16.0 actions implementation at build time. * Add AVX512 implementation of actions. * Add support for an AVX512 optimized version of pop_vlan action. + * Add support for an AVX512 optimized version of push_vlan action. - Python: * For SSL support, the use of the pyOpenSSL library has been replaced with the native 'ssl' module. diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c index 8bbfd5203..3d850026a 100644 --- a/lib/odp-execute-avx512.c +++ b/lib/odp-execute-avx512.c @@ -39,10 +39,22 @@ BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) + MEMBER_SIZEOF(struct dp_packet, l3_ofs) == offsetof(struct dp_packet, l4_ofs)); +enum OPERATION { + OP_ADD, + OP_SUB, +}; + static inline void ALWAYS_INLINE -avx512_dp_packet_resize_l2(struct dp_packet *b, int increment) +avx512_dp_packet_resize_l2(struct dp_packet *b, int increment, + enum OPERATION op) { /* update packet size/data pointers */ + if (increment >= 0) { + dp_packet_prealloc_headroom(b, increment); + } else { + ovs_assert(dp_packet_size(b) - dp_packet_l2_pad_size(b) >= -increment); + } + dp_packet_set_data(b, (char *) dp_packet_data(b) - increment); dp_packet_set_size(b, dp_packet_size(b) + increment); @@ -50,9 +62,9 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int increment) const __m128i v_zeros = _mm_setzero_si128(); const __m128i v_u16_max = _mm_cmpeq_epi16(v_zeros, v_zeros); - /* Only these lanes can be incremented for push-VLAN action. */ + /* Only these lanes can be incremented/decremented for L2. */ const uint8_t k_lanes = 0b1110; - __m128i v_offset = _mm_set1_epi16(VLAN_HEADER_LEN); + __m128i v_offset = _mm_set1_epi16(abs(increment)); /* Load packet and compare with UINT16_MAX */ void *adjust_ptr = &b->l2_pad_size; @@ -60,9 +72,19 @@ avx512_dp_packet_resize_l2(struct dp_packet *b, int increment) __mmask8 k_cmp = _mm_mask_cmpneq_epu16_mask(k_lanes, v_adjust_src, v_u16_max); - /* Add VLAN_HEADER_LEN using compare mask, store results. */ - __m128i v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, - v_adjust_src, v_offset); + /* Update VLAN_HEADER_LEN using compare mask, store results. */ + __m128i v_adjust_wip; + switch (op) { + case OP_ADD: + v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp, + v_adjust_src, v_offset); + break; + case OP_SUB: + v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp, + v_adjust_src, v_offset); + break; + } + _mm_storeu_si128(adjust_ptr, v_adjust_wip); } @@ -79,8 +101,7 @@ avx512_eth_pop_vlan(struct dp_packet *packet) __m128i v_realign = _mm_alignr_epi8(v_ether, _mm_setzero_si128(), 16 - VLAN_HEADER_LEN); _mm_storeu_si128((void *) veh, v_realign); - avx512_dp_packet_resize_l2(packet, -VLAN_HEADER_LEN); - + avx512_dp_packet_resize_l2(packet, -VLAN_HEADER_LEN, OP_SUB); } } @@ -96,6 +117,42 @@ action_avx512_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, } } +static inline void ALWAYS_INLINE +avx512_eth_push_vlan(struct dp_packet *packet, ovs_be16 tpid, ovs_be16 tci) +{ + avx512_dp_packet_resize_l2(packet, VLAN_HEADER_LEN, OP_ADD); + + /* Build up the VLAN TCI/TPID, and merge with the moving of Ether. */ + char *new_pkt_data = (char *) dp_packet_data(packet); + + const uint16_t tci_proc = tci & htons(~VLAN_CFI); + const uint32_t tpid_tci = (tci_proc << 16) | tpid; + __m128i v_ether = _mm_loadu_si128((void *) new_pkt_data); + + static const uint8_t vlan_push_shuffle_mask[16] = { + 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF + }; + + __m128i v_index = _mm_loadu_si128((void *) vlan_push_shuffle_mask); + __m128i v_shift = _mm_shuffle_epi8(v_ether, v_index); + __m128i v_vlan_hdr = _mm_insert_epi32(v_shift, tpid_tci, 3); + _mm_storeu_si128((void *) new_pkt_data, v_vlan_hdr); +} + +static void +action_avx512_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, + const struct nlattr *a, + bool should_steal OVS_UNUSED) +{ + struct dp_packet *packet; + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); + + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + avx512_eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); + } +} + /* Probe functions to check ISA requirements. */ static int32_t avx512_isa_probe(uint32_t needs_vbmi) @@ -136,6 +193,7 @@ action_avx512_init(struct odp_execute_action_impl *self) { avx512_isa_probe(0); self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_avx512_push_vlan; return 0; } diff --git a/lib/odp-execute-private.c b/lib/odp-execute-private.c index 7c58d90d2..0bff66b50 100644 --- a/lib/odp-execute-private.c +++ b/lib/odp-execute-private.c @@ -218,6 +218,7 @@ int32_t action_autoval_init(struct odp_execute_action_impl *self) { self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_autoval_generic; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_autoval_generic; return 0; } diff --git a/lib/odp-execute.c b/lib/odp-execute.c index 1bc9fae09..40f71fa96 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -842,6 +842,19 @@ action_pop_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, } } +static void +action_push_vlan(void *dp OVS_UNUSED, struct dp_packet_batch *batch, + const struct nlattr *a OVS_UNUSED, + bool should_steal OVS_UNUSED) +{ + struct dp_packet *packet; + const struct ovs_action_push_vlan *vlan = nl_attr_get(a); + + DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { + eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); + } +} + /* Implementation of the scalar actions impl init function. Build up the * array of func ptrs here. */ @@ -849,6 +862,7 @@ int32_t odp_action_scalar_init(struct odp_execute_action_impl *self) { self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_pop_vlan; + self->funcs[OVS_ACTION_ATTR_PUSH_VLAN] = action_push_vlan; return 0; } @@ -991,15 +1005,6 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, break; } - case OVS_ACTION_ATTR_PUSH_VLAN: { - const struct ovs_action_push_vlan *vlan = nl_attr_get(a); - - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { - eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); - } - break; - } - case OVS_ACTION_ATTR_PUSH_MPLS: { const struct ovs_action_push_mpls *mpls = nl_attr_get(a); @@ -1133,6 +1138,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, case OVS_ACTION_ATTR_OUTPUT: case OVS_ACTION_ATTR_LB_OUTPUT: case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_TUNNEL_PUSH: case OVS_ACTION_ATTR_TUNNEL_POP: case OVS_ACTION_ATTR_USERSPACE: -- 2.25.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
