This patch introduces avx512 optimized function pointer for IP/UDP traffic type and supporting functions in dpif-netdev-extract-avx512.
Signed-off-by: Harry van Haaren <[email protected]> Co-authored-by: Kumar Amber <[email protected]> Signed-off-by: Kumar Amber <[email protected]> --- lib/automake.mk | 1 + lib/dpdk.c | 1 + lib/dpif-netdev-extract-avx512.c | 218 ++++++++++++++++++++++++++++++ lib/dpif-netdev-private-extract.c | 5 + lib/dpif-netdev-private-extract.h | 11 ++ 5 files changed, 236 insertions(+) create mode 100644 lib/dpif-netdev-extract-avx512.c diff --git a/lib/automake.mk b/lib/automake.mk index 783730fc7..8e2d84e29 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -113,6 +113,7 @@ lib_libopenvswitch_la_SOURCES = \ lib/dpif-netdev-lookup-generic.c \ lib/dpif-netdev.c \ lib/dpif-netdev.h \ + lib/dpif-netdev-extract-avx512.c \ lib/dpif-netdev-extract-study.c \ lib/dpif-netdev-private-dfc.h \ lib/dpif-netdev-private-dpcls.h \ diff --git a/lib/dpdk.c b/lib/dpdk.c index a9494a40f..e0c76abe3 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -657,6 +657,7 @@ dpdk_get_cpu_has_isa(const char *arch, const char *feature) CHECK_CPU_FEATURE(feature, "avx512f", RTE_CPUFLAG_AVX512F); CHECK_CPU_FEATURE(feature, "avx512vpopcntdq", RTE_CPUFLAG_AVX512VPOPCNTDQ); CHECK_CPU_FEATURE(feature, "bmi2", RTE_CPUFLAG_BMI2); + CHECK_CPU_FEATURE(feature, "avx512bw", RTE_CPUFLAG_AVX512BW); #endif VLOG_WARN("Unknown CPU arch,feature: %s,%s. Returning not supported.\n", diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c new file mode 100644 index 000000000..169775f4b --- /dev/null +++ b/lib/dpif-netdev-extract-avx512.c @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2021 Intel. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <config.h> +#include <errno.h> +#include <immintrin.h> +#include <stdint.h> +#include <string.h> + +#include "dpdk.h" +#include "dpif-netdev-private-dpcls.h" +#include "dpif-netdev-private-extract.h" +#include "flow.h" + +/* This file contains optimized implementations of miniflow_extract() + * for specific common traffic patterns. The optimizations allow for + * quick probing of a specific packet type, and if a match with a specific + * type is found, a shuffle like proceedure builds up the required miniflow. + * + * The functionality here can be easily auto-validated and tested against the + * scalar miniflow_extract() function. As such, manual review of the code by + * the community (although welcome) is not required. Confidence in the + * correctness of the code can be had from the autovalidation. + */ + +/* Generator for EtherType masks and values. */ +#define PATTERN_ETHERTYPE_GEN(type_b0, type_b1) \ + 0, 0, 0, 0, 0, 0, /* Ether MAC DST */ \ + 0, 0, 0, 0, 0, 0, /* Ether MAC SRC */ \ + type_b0, type_b1, /* EtherType */ + +#define PATTERN_ETHERTYPE_MASK PATTERN_ETHERTYPE_GEN(0xFF, 0xFF) +#define PATTERN_ETHERTYPE_IPV4 PATTERN_ETHERTYPE_GEN(0x08, 0x00) + +/* Generator for checking IPv4 ver, ihl, and proto */ +#define PATTERN_IPV4_GEN(VER_IHL, FLAG_OFF_B0, FLAG_OFF_B1, PROTO) \ + VER_IHL, /* Version and IHL */ \ + 0, 0, 0, /* DSCP, ECN, Total Lenght */ \ + 0, 0, /* Identification */ \ + /* Flags/Fragment offset: don't match MoreFrag (MF) or FragOffset */ \ + FLAG_OFF_B0, FLAG_OFF_B1, \ + 0, /* TTL */ \ + PROTO, /* Protocol */ \ + 0, 0, /* Header checksum */ \ + 0, 0, 0, 0, /* Src IP */ \ + 0, 0, 0, 0, /* Dst IP */ + +#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xFE, 0xFF, 0xFF) +#define PATTERN_IPV4_UDP PATTERN_IPV4_GEN(0x45, 0, 0, 0x11) + +#define NU 0 +#define PATTERN_IPV4_UDP_SHUFFLE \ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NU, NU, /* Ether */ \ + 26, 27, 28, 29, 30, 31, 32, 33, NU, NU, NU, NU, 20, 15, 22, 23, /* IPv4 */ \ + 34, 35, 36, 37, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, /* UDP */ + +/* Masks for Ether()/IP()/UDP() traffic */ +static const uint8_t eth_ip_udp_mask[64] = { + PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK +}; +static const uint8_t eth_ip_udp_values[64] = { + PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_UDP +}; +static const uint8_t eth_ip_udp_shuf[64] = { + PATTERN_IPV4_UDP_SHUFFLE +}; + +static inline __m512i +__attribute__((target("avx512bw"))) +_mm512_maskz_permutex2var_epi8_skx(__mmask64 k_mask, + __m512i v_data_0, + __m512i v_shuf_idxs, + __m512i v_data_1) +{ + /* Manipulate shuffle indexes for u16 size. */ + __mmask64 k_mask_odd_lanes = 0xAAAAAAAAAAAAAAAA; + /* clear away ODD lane bytes. Cannot be done above due to no u8 shift */ + __m512i v_shuf_idx_evn = _mm512_mask_blend_epi8(k_mask_odd_lanes, + v_shuf_idxs, _mm512_setzero_si512()); + v_shuf_idx_evn = _mm512_srli_epi16(v_shuf_idx_evn, 1); + + __m512i v_shuf_idx_odd = _mm512_srli_epi16(v_shuf_idxs, 9); + + /* Shuffle each half at 16-bit width */ + __m512i v_shuf1 = _mm512_permutex2var_epi16(v_data_0, v_shuf_idx_evn, + v_data_1); + __m512i v_shuf2 = _mm512_permutex2var_epi16(v_data_0, v_shuf_idx_odd, + v_data_1); + + /* Find if the shuffle index was odd, via mask and compare */ + uint16_t index_odd_mask = 0x1; + const __m512i v_index_mask_u16 = _mm512_set1_epi16(index_odd_mask); + + /* EVEN lanes, find if u8 index was odd, result as u16 bitmask */ + __m512i v_idx_even_masked = _mm512_and_si512(v_shuf_idxs, + v_index_mask_u16); + __mmask32 evn_rotate_mask = _mm512_cmpeq_epi16_mask(v_idx_even_masked, + v_index_mask_u16); + + /* ODD lanes, find if u8 index was odd, result as u16 bitmask */ + __m512i v_shuf_idx_srli8 = _mm512_srli_epi16(v_shuf_idxs, 8); + __m512i v_idx_odd_masked = _mm512_and_si512(v_shuf_idx_srli8, + v_index_mask_u16); + __mmask32 odd_rotate_mask = _mm512_cmpeq_epi16_mask(v_idx_odd_masked, + v_index_mask_u16); + odd_rotate_mask = ~odd_rotate_mask; + + /* Rotate and blend results from each index */ + __m512i v_shuf_res_evn = _mm512_mask_srli_epi16(v_shuf1, evn_rotate_mask, + v_shuf1, 8); + __m512i v_shuf_res_odd = _mm512_mask_slli_epi16(v_shuf2, odd_rotate_mask, + v_shuf2, 8); + + /* If shuffle index was odd, blend shifted version */ + __m512i v_shuf_result = _mm512_mask_blend_epi8(k_mask_odd_lanes, + v_shuf_res_evn, v_shuf_res_odd); + + __m512i v_zeros = _mm512_setzero_si512(); + __m512i v_result_kmskd = _mm512_mask_blend_epi8(k_mask, v_zeros, + v_shuf_result); + + return v_result_kmskd; +} + +static inline void +__attribute__((target("avx512bw"))) +avx512_ipv4_udp_store(const uint8_t *pkt, struct miniflow *mf, + uint32_t in_port) +{ + int64_t u0b = 0x18a0000000000000; + int64_t u1b = 0x0000000000040401; + __m128i v_bits = {u0b, u1b}; + + /* Store mf Bits */ + uint64_t *bits = (void *)&mf->map.bits[0]; + uint64_t *blocks = miniflow_values(mf); + _mm_storeu_si128((void *) bits, v_bits); + + /* Load packet and shuffle */ + __m512i v_pkt0 = _mm512_loadu_si512(&pkt[0]); + __m512i v_eth_ip_udp_shuf = _mm512_loadu_si512(eth_ip_udp_shuf); + + /* Shuffle pkt and store blocks */ + __mmask64 k_shufzero = 0b0000111111110000111111110011111111111111; + __m512i v_zeros = _mm512_setzero_si512(); + __m512i v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shufzero, + v_pkt0, v_eth_ip_udp_shuf, v_zeros); + + _mm512_storeu_si512(&blocks[2], v_blk0); + + uint64_t inp = ((uint64_t) in_port) << 32; + blocks[0] = inp; +} + +static inline uint32_t +__attribute__((target("avx512bw"))) +avx512_ipv4_udp_probe(const uint8_t *pkt, uint32_t len) +{ + /* Packet data is masked to known IPv4/UDP parse length. */ + uint64_t klen = UINT64_MAX; + if (len < 64) { + klen = (1ULL << len) - 1; + } + + __m512i v_pkt0 = _mm512_maskz_loadu_epi8(klen, &pkt[0]); + __m512i v_eth_ip_udp_mask = _mm512_loadu_si512(eth_ip_udp_mask); + __m512i v_eth_ip_udp_vals = _mm512_loadu_si512(eth_ip_udp_values); + __m512i v_pkt0_masked = _mm512_and_si512(v_pkt0, v_eth_ip_udp_mask); + __mmask64 k_cmp = _mm512_cmpeq_epi8_mask(v_pkt0_masked, v_eth_ip_udp_vals); + + return (k_cmp == -1); +} + +uint32_t +__attribute__((target("avx512bw"))) +mfex_avx512_ipv4_udp(struct dp_packet_batch *packets, + struct netdev_flow_key *keys, + uint32_t keys_size OVS_UNUSED, odp_port_t in_port, + void *pmd_handle OVS_UNUSED) +{ + uint32_t hitmask = 0; + struct dp_packet *packet; + DP_PACKET_BATCH_FOR_EACH (i, packet, packets) { + const uint32_t size = dp_packet_size(packet); + const uint8_t *pkt = dp_packet_data(packet); + uint32_t match = avx512_ipv4_udp_probe(pkt, size); + if (match) { + avx512_ipv4_udp_store(pkt, &keys[i].mf, in_port); + hitmask |= 1 << i; + } + } + return hitmask; +} + +int32_t +mfex_avx512_probe(void) +{ + int avx512f_available = dpdk_get_cpu_has_isa("x86_64", "avx512f"); + int bmi2_available = dpdk_get_cpu_has_isa("x86_64", "bmi2"); + int avx512bw_available = dpdk_get_cpu_has_isa("x86_64", "avx512bw"); + if (!avx512f_available || !avx512bw_available || !bmi2_available) { + return -ENOTSUP; + } + + return 0; +} diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c index 76c24c2f8..060c1939a 100644 --- a/lib/dpif-netdev-private-extract.c +++ b/lib/dpif-netdev-private-extract.c @@ -47,6 +47,11 @@ static struct dpif_miniflow_extract_impl mfex_impls[] = { .extract_func = mfex_study_traffic, .name = "study", }, + { + .probe = mfex_avx512_probe, + .extract_func = mfex_avx512_ipv4_udp, + .name = "avx512_ip_udp", + }, }; BUILD_ASSERT_DECL(MFEX_IMPLS_MAX_SIZE > ARRAY_SIZE(mfex_impls)); diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h index 3ada413bb..e7b45c2b1 100644 --- a/lib/dpif-netdev-private-extract.h +++ b/lib/dpif-netdev-private-extract.h @@ -118,4 +118,15 @@ mfex_study_traffic(struct dp_packet_batch *packets, uint32_t keys_size, odp_port_t in_port, void *pmd_handle); +/* Probe function to detect CPU ISA for SKX. */ +int32_t +mfex_avx512_probe(void); + +/* Traffic specific AVX512 Eth/Ipv4/Udp traffic type for SKX. */ +uint32_t +mfex_avx512_ipv4_udp(struct dp_packet_batch *packets, + struct netdev_flow_key *keys, + uint32_t keys_size, odp_port_t in_port, + void *pmd_handle); + #endif /* DPIF_NETDEV_AVX512_EXTRACT */ -- 2.25.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
