This functionality was deprecated in 3.7 due to lack of use, testing and maintenance. It's time to remove it.
With that change we no longer need the internal dpif-netdev-private.h header as dpif.netdev.c is the only user. So it is removed. dpif-netdev.c now consumes all the other private headers directly. Signed-off-by: Ilya Maximets <[email protected]> --- Documentation/topics/dpdk/bridge.rst | 32 --- Documentation/topics/testing.rst | 11 - NEWS | 1 + acinclude.m4 | 20 -- configure.ac | 1 - lib/automake.mk | 7 +- lib/dpif-netdev-avx512.c | 392 --------------------------- lib/dpif-netdev-private-dpif.c | 171 ------------ lib/dpif-netdev-private-dpif.h | 88 ------ lib/dpif-netdev-private-thread.h | 8 - lib/dpif-netdev-private.h | 52 ---- lib/dpif-netdev-unixctl.man | 6 - lib/dpif-netdev.c | 147 ++-------- tests/pmd.at | 16 -- 14 files changed, 18 insertions(+), 934 deletions(-) delete mode 100644 lib/dpif-netdev-avx512.c delete mode 100644 lib/dpif-netdev-private-dpif.c delete mode 100644 lib/dpif-netdev-private-dpif.h delete mode 100644 lib/dpif-netdev-private.h diff --git a/Documentation/topics/dpdk/bridge.rst b/Documentation/topics/dpdk/bridge.rst index 3c7896f28..ab09f89f1 100644 --- a/Documentation/topics/dpdk/bridge.rst +++ b/Documentation/topics/dpdk/bridge.rst @@ -253,35 +253,3 @@ match with the numbers in the provided command output Please send an email to the OVS mailing list [email protected] with the output of the ``dp-extra-info:miniflow_bits(4,1)`` values. - -Datapath Interface Performance ------------------------------- - -.. note:: - - The AVX512 Datapath Interface Performance feature is deprecated and will be - removed in a future release. - -The datapath interface (DPIF) is responsible for taking packets through the -major components of the userspace datapath; such as packet parsing, caches and -datapath classifier lookups. - -Just like with the datapath classifier, SIMD instructions can be applied to the -datapath interface implementation to improve performance. - -OVS provides multiple implementations of the userspace datapath interface. -Available implementations can be listed with the following command:: - - $ ovs-appctl dpif-netdev/dpif-impl-get - Available DPIF implementations: - dpif_scalar (pmds: none) - dpif_avx512 (pmds: 1,2,6,7) - -By default, ``dpif_scalar`` is used. Implementations can be selected by -name:: - - $ ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512 - DPIF implementation set to dpif_avx512. - - $ ovs-appctl dpif-netdev/dpif-impl-set dpif_scalar - DPIF implementation set to dpif_scalar. diff --git a/Documentation/topics/testing.rst b/Documentation/topics/testing.rst index 6c835c663..e3b06321a 100644 --- a/Documentation/topics/testing.rst +++ b/Documentation/topics/testing.rst @@ -364,17 +364,6 @@ options are used:: Compile OVS in debug mode to have `ovs_assert` statements error out if there is a mismatch in the datapath classifier lookup. -Since the AVX512 implementation of the datapath interface is disabled by -default, a compile time option is available in order to test it with the OVS -unit test suite:: - - $ ./configure --enable-dpif-default-avx512 - -The following line should be seen in the configuration log when the above -option is used:: - - checking whether DPIF AVX512 is default implementation... yes - .. note:: Run all the available testsuites including `make check`, `make check-system-userspace` and `make check-dpdk` to ensure the optimal diff --git a/NEWS b/NEWS index 6e982517c..c828ae301 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,7 @@ Post-v3.7.0 datapath are now removed: * AVX512-optimized action handling. * AVX512-optimized packet parsing (miniflow extraction). + * AVX512-optimized DPIF input processing. - Windows: * The previously deprecated Windows kernel datapath implementation is now fully removed as well as support for running or building userspace diff --git a/acinclude.m4 b/acinclude.m4 index 43c8e9d03..58d5b9df8 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -36,26 +36,6 @@ AC_DEFUN([OVS_CHECK_DPCLS_AUTOVALIDATOR], [ fi ]) -dnl Set OVS DPIF default implementation at configure time for running the unit -dnl tests on the whole codebase without modifying tests per DPIF impl -AC_DEFUN([OVS_CHECK_DPIF_AVX512_DEFAULT], [ - AC_ARG_ENABLE([dpif-default-avx512], - [AS_HELP_STRING([--enable-dpif-default-avx512], - [Enable DPIF AVX512 implementation as default.])], - [dpifavx512=yes],[dpifavx512=no]) - AC_MSG_CHECKING([whether DPIF AVX512 is default implementation]) - if test "$dpifavx512" != yes; then - AC_MSG_RESULT([no]) - else - AC_DEFINE([DPIF_AVX512_DEFAULT], [1], - [DPIF AVX512 is a default implementation of the userspace - datapath interface.]) - AC_MSG_RESULT([yes]) - AC_MSG_WARN( - [Explicit AVX512 feature support will be deprecated in the next release.]) - fi -]) - dnl OVS_CHECK_AVX512 dnl dnl Checks if compiler and binutils supports various AVX512 ISA. diff --git a/configure.ac b/configure.ac index 99816302d..1a790adb8 100644 --- a/configure.ac +++ b/configure.ac @@ -187,7 +187,6 @@ OVS_ENABLE_WERROR_TOP OVS_ENABLE_SPARSE OVS_CTAGS_IDENTIFIERS OVS_CHECK_DPCLS_AUTOVALIDATOR -OVS_CHECK_DPIF_AVX512_DEFAULT OVS_CHECK_AVX512 AC_ARG_VAR(KARCH, [Kernel Architecture String]) diff --git a/lib/automake.mk b/lib/automake.mk index 61d664334..954a62778 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -29,14 +29,12 @@ lib_libopenvswitchavx512_la_CFLAGS = \ -mbmi2 \ -fPIC \ $(AM_CFLAGS) -lib_libopenvswitchavx512_la_SOURCES = \ - lib/dpif-netdev-avx512.c if HAVE_AVX512BW if HAVE_AVX512VL lib_libopenvswitchavx512_la_CFLAGS += \ -mavx512bw \ -mavx512vl -lib_libopenvswitchavx512_la_SOURCES += \ +lib_libopenvswitchavx512_la_SOURCES = \ lib/dpif-netdev-lookup-avx512-gather.c endif # HAVE_AVX512VL endif # HAVE_AVX512BW @@ -123,11 +121,8 @@ lib_libopenvswitch_la_SOURCES = \ lib/dpif-netdev-private-dfc.c \ lib/dpif-netdev-private-dfc.h \ lib/dpif-netdev-private-dpcls.h \ - lib/dpif-netdev-private-dpif.c \ - lib/dpif-netdev-private-dpif.h \ lib/dpif-netdev-private-flow.h \ lib/dpif-netdev-private-thread.h \ - lib/dpif-netdev-private.h \ lib/dpif-netdev-perf.c \ lib/dpif-netdev-perf.h \ lib/dpif-offload.c \ diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c deleted file mode 100644 index b530889d6..000000000 --- a/lib/dpif-netdev-avx512.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Copyright (c) 2021 Intel Corporation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifdef __x86_64__ -/* Sparse cannot handle the AVX512 instructions. */ -#if !defined(__CHECKER__) - -#include <config.h> - -#include "dpif-netdev.h" -#include "dpif-netdev-perf.h" -#include "dpif-netdev-private.h" -#include "dpif-offload.h" - -#include <errno.h> -#include <immintrin.h> - -#include "dp-packet.h" -#include "netdev.h" - -/* Each AVX512 register (zmm register in assembly notation) can contain up to - * 512 bits, which is equivalent to 8 uint64_t variables. This is the maximum - * number of miniflow blocks that can be processed in a single pass of the - * AVX512 code at a time. - */ -#define NUM_U64_IN_ZMM_REG (8) - -/* Structure to contain per-packet metadata that must be attributed to the - * dp netdev flow. This is unfortunate to have to track per packet, however - * it's a bit awkward to maintain them in a performant way. This structure - * helps to keep two variables on a single cache line per packet. - */ -struct pkt_flow_meta { - uint16_t bytes; - uint16_t tcp_flags; -}; - -/* Structure of heap allocated memory for DPIF internals. */ -struct dpif_userdata { - OVS_ALIGNED_VAR(CACHE_LINE_SIZE) - struct netdev_flow_key keys[NETDEV_MAX_BURST]; - OVS_ALIGNED_VAR(CACHE_LINE_SIZE) - struct netdev_flow_key *key_ptrs[NETDEV_MAX_BURST]; - OVS_ALIGNED_VAR(CACHE_LINE_SIZE) - struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST]; -}; - -int32_t -dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, - struct dp_packet_batch *packets, - odp_port_t in_port) -{ - /* Allocate DPIF userdata. */ - if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) { - pmd->netdev_input_func_userdata = - xmalloc_pagealign(sizeof(struct dpif_userdata)); - } - - struct dpif_userdata *ud = pmd->netdev_input_func_userdata; - struct netdev_flow_key *keys = ud->keys; - struct netdev_flow_key **key_ptrs = ud->key_ptrs; - struct pkt_flow_meta *pkt_meta = ud->pkt_meta; - - /* The AVX512 DPIF implementation handles rules in a way that is optimized - * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is - * achieved by separating the rule arrays. Bitmasks are kept for each - * packet, indicating if it matched in the HWOL/EMC/SMC array or DPCLS - * array. Later the two arrays are merged by AVX-512 expand instructions. - */ - - /* Stores the computed output: a rule pointer for each packet. */ - /* Used initially for HWOL/EMC/SMC and Simple Match. */ - struct dpcls_rule *rules[NETDEV_MAX_BURST]; - /* Used for DPCLS. */ - struct dpcls_rule *dpcls_rules[NETDEV_MAX_BURST]; - - uint32_t dpcls_key_idx = 0; - - for (uint32_t i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) { - _mm512_storeu_si512(&rules[i], _mm512_setzero_si512()); - _mm512_storeu_si512(&dpcls_rules[i], _mm512_setzero_si512()); - } - - const size_t batch_size = dp_packet_batch_size(packets); - - /* Prefetch 2 packets ahead when processing. This was found to perform best - * through testing. */ - const uint32_t prefetch_ahead = 2; - const uint32_t initial_prefetch = MIN(prefetch_ahead, batch_size); - for (int i = 0; i < initial_prefetch; i++) { - struct dp_packet *packet = packets->packets[i]; - OVS_PREFETCH(dp_packet_data(packet)); - pkt_metadata_prefetch_init(&packet->md); - } - - const bool simple_match_enabled = dp_netdev_simple_match_enabled(pmd, - in_port); - /* Check if EMC or SMC are enabled. */ - struct dfc_cache *cache = &pmd->flow_cache; - const uint32_t hwol_enabled = dpif_offload_enabled(); - const uint32_t emc_enabled = pmd->ctx.emc_insert_min != 0; - const uint32_t smc_enabled = pmd->ctx.smc_enable_db; - - uint32_t n_simple_hit = 0; - uint32_t emc_hits = 0; - uint32_t smc_hits = 0; - uint32_t phwol_hits = 0; - - /* A 1 bit in this mask indicates a hit, so no DPCLS lookup on the pkt. */ - uint32_t hwol_emc_smc_hitmask = 0; - uint32_t smc_hitmask = 0; - - /* The below while loop is based on the 'iter' variable which has a number - * of bits set representing packets that we want to process - * (HWOL->MFEX->EMC->SMC). As each packet is processed, we clear (set to 0) - * the bit representing that packet using '_blsr_u64()'. The - * 'raw_ctz()' will give us the correct index into the 'packets', - * 'pkt_meta', 'keys' and 'rules' arrays. - * - * For one iteration of the while loop, here's some pseudocode as an - * example where 'iter' is represented in binary: - * - * while (iter) { // iter = 1100 - * uint32_t i = raw_ctz(iter); // i = 2 - * iter = _blsr_u64(iter); // iter = 1000 - * // do all processing (HWOL->MFEX->EMC->SMC) - * } - */ - - uint32_t lookup_pkts_bitmask = (UINT64_C(1) << batch_size) - 1; - - if (simple_match_enabled) { - struct dp_packet *packet; - - DP_PACKET_BATCH_FOR_EACH (i, packet, packets) { - struct dp_netdev_flow *f = NULL; - ovs_be16 vlan_tci = 0; - ovs_be16 dl_type = 0; - uint8_t nw_frag = 0; - - if (i + prefetch_ahead < batch_size) { - struct dp_packet **dp_packets = packets->packets; - - /* Prefetch next packet data and metadata. */ - OVS_PREFETCH(dp_packet_data(dp_packets[i + prefetch_ahead])); - pkt_metadata_prefetch_init( - &dp_packets[i + prefetch_ahead]->md); - } - - pkt_metadata_init(&packet->md, in_port); - - pkt_meta[i].tcp_flags = parse_tcp_flags(packet, &dl_type, &nw_frag, - &vlan_tci); - - f = dp_netdev_simple_match_lookup(pmd, in_port, dl_type, - nw_frag, vlan_tci); - if (!f) { - /* Any miss in Simple Match means an upcall is needed. Fall - * back to the scalar DPIF to do this. */ - return -1; - } - - pkt_meta[i].bytes = dp_packet_size(packet); - rules[i] = &f->cr; - n_simple_hit++; - hwol_emc_smc_hitmask |= (UINT32_C(1) << i); - } - - goto action_stage; - } - - uint32_t iter = lookup_pkts_bitmask; - while (iter) { - uint32_t i = raw_ctz(iter); - iter = _blsr_u64(iter); - - if (i + prefetch_ahead < batch_size) { - struct dp_packet **dp_packets = packets->packets; - /* Prefetch next packet data and metadata. */ - OVS_PREFETCH(dp_packet_data(dp_packets[i + prefetch_ahead])); - pkt_metadata_prefetch_init(&dp_packets[i + prefetch_ahead]->md); - } - - /* Get packet pointer from bitmask and packet md. */ - struct dp_packet *packet = packets->packets[i]; - pkt_metadata_init(&packet->md, in_port); - - struct dp_netdev_flow *f = NULL; - struct netdev_flow_key *key = &keys[i]; - - /* Check for a partial hardware offload match. */ - if (hwol_enabled) { - if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, &f))) { - /* Packet restoration failed and it was dropped, do not - * continue processing. */ - continue; - } - if (f) { - rules[i] = &f->cr; - pkt_meta[i].tcp_flags = parse_tcp_flags(packet, - NULL, NULL, NULL); - pkt_meta[i].bytes = dp_packet_size(packet); - phwol_hits++; - hwol_emc_smc_hitmask |= (UINT32_C(1) << i); - continue; - } - } - - /* Do a scalar miniflow extract into keys. */ - miniflow_extract(packet, &key->mf); - - /* Cache TCP and byte values for all packets. */ - pkt_meta[i].bytes = dp_packet_size(packet); - pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf); - - key->len = netdev_flow_key_size(miniflow_n_values(&key->mf)); - key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, &key->mf); - - if (emc_enabled) { - f = emc_lookup(&cache->emc_cache, key); - - if (f) { - rules[i] = &f->cr; - emc_hits++; - hwol_emc_smc_hitmask |= (UINT32_C(1) << i); - continue; - } - } - - if (smc_enabled) { - f = smc_lookup_single(pmd, packet, key); - if (f) { - rules[i] = &f->cr; - smc_hits++; - smc_hitmask |= (UINT32_C(1) << i); - continue; - } - } - - /* The flow pointer was not found in HWOL/EMC/SMC, so add it to the - * dpcls input keys array for batch lookup later. - */ - key_ptrs[dpcls_key_idx] = &keys[i]; - dpcls_key_idx++; - } - - hwol_emc_smc_hitmask |= smc_hitmask; - uint32_t hwol_emc_smc_missmask = ~hwol_emc_smc_hitmask; - - /* DPCLS handles any packets missed by HWOL/EMC/SMC. It operates on the - * key_ptrs[] for input miniflows to match, storing results in the - * dpcls_rules[] array. - */ - if (dpcls_key_idx > 0) { - struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port); - if (OVS_UNLIKELY(!cls)) { - return -1; - } - bool any_miss = - !dpcls_lookup(cls, (const struct netdev_flow_key **) key_ptrs, - dpcls_rules, dpcls_key_idx, NULL); - if (OVS_UNLIKELY(any_miss)) { - return -1; - } - - /* Merge DPCLS rules and HWOL/EMC/SMC rules. */ - uint32_t dpcls_idx = 0; - for (int i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) { - /* Indexing here is somewhat complicated due to DPCLS output rule - * load index depending on the hitmask of HWOL/EMC/SMC. More - * packets from HWOL/EMC/SMC bitmask means less DPCLS rules are - * used. - */ - __m512i v_cache_rules = _mm512_loadu_si512(&rules[i]); - __m512i v_merged_rules = - _mm512_mask_expandloadu_epi64(v_cache_rules, - ~hwol_emc_smc_hitmask, - &dpcls_rules[dpcls_idx]); - _mm512_storeu_si512(&rules[i], v_merged_rules); - - /* Update DPCLS load index and bitmask for HWOL/EMC/SMC hits. - * There are NUM_U64_IN_ZMM_REG output pointers per register, - * subtract the HWOL/EMC/SMC lanes equals the number of DPCLS rules - * consumed. - */ - uint32_t hitmask_FF = (hwol_emc_smc_hitmask & 0xFF); - dpcls_idx += NUM_U64_IN_ZMM_REG - __builtin_popcountll(hitmask_FF); - hwol_emc_smc_hitmask = - (hwol_emc_smc_hitmask >> NUM_U64_IN_ZMM_REG); - } - } - - /* At this point we have a 1:1 pkt to rules mapping, so update EMC/SMC - * if required. - */ - /* Insert SMC and DPCLS hits into EMC. */ - if (emc_enabled) { - uint32_t emc_insert_mask = smc_hitmask | hwol_emc_smc_missmask; - emc_insert_mask &= lookup_pkts_bitmask; - emc_probabilistic_insert_batch(pmd, keys, &rules[0], emc_insert_mask); - } - /* Insert DPCLS hits into SMC. */ - if (smc_enabled) { - uint32_t smc_insert_mask = hwol_emc_smc_missmask; - smc_insert_mask &= lookup_pkts_bitmask; - smc_insert_batch(pmd, keys, &rules[0], smc_insert_mask); - } - - /* At this point we don't return error anymore, so commit stats here. */ - pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_PHWOL_HIT, phwol_hits); - pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, emc_hits); - pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, smc_hits); - pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT, - dpcls_key_idx); - pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_LOOKUP, - dpcls_key_idx); -action_stage: - pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_RECV, batch_size); - pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SIMPLE_HIT, - n_simple_hit); - - /* Initialize the "Action Batch" for each flow handled below. */ - struct dp_packet_batch action_batch; - action_batch.trunc = 0; - - while (lookup_pkts_bitmask) { - uint32_t rule_pkt_idx = raw_ctz(lookup_pkts_bitmask); - uint64_t needle = (uintptr_t) rules[rule_pkt_idx]; - - /* Parallel compare NUM_U64_IN_ZMM_REG flow* 's to the needle, create a - * bitmask. - */ - uint32_t batch_bitmask = 0; - for (uint32_t j = 0; j < NETDEV_MAX_BURST; j += NUM_U64_IN_ZMM_REG) { - /* Pre-calculate store addr. */ - uint32_t num_pkts_in_batch = __builtin_popcountll(batch_bitmask); - void *store_addr = &action_batch.packets[num_pkts_in_batch]; - - /* Search for identical flow* in burst, update bitmask. */ - __m512i v_needle = _mm512_set1_epi64(needle); - __m512i v_hay = _mm512_loadu_si512(&rules[j]); - __mmask8 k_cmp_bits = _mm512_cmpeq_epi64_mask(v_needle, v_hay); - uint32_t cmp_bits = k_cmp_bits; - batch_bitmask |= cmp_bits << j; - - /* Compress and store the batched packets. */ - struct dp_packet **packets_ptrs = &packets->packets[j]; - __m512i v_pkt_ptrs = _mm512_loadu_si512(packets_ptrs); - _mm512_mask_compressstoreu_epi64(store_addr, cmp_bits, v_pkt_ptrs); - } - - /* Strip all packets in this batch from the lookup_pkts_bitmask. */ - lookup_pkts_bitmask &= (~batch_bitmask); - action_batch.count = __builtin_popcountll(batch_bitmask); - - /* Loop over all packets in this batch, to gather the byte and tcp_flag - * values, and pass them to the execute function. It would be nice to - * optimize this away, however it is not easy to refactor in dpif. - */ - uint32_t bytes = 0; - uint16_t tcp_flags = 0; - uint32_t bitmask_iter = batch_bitmask; - for (int i = 0; i < action_batch.count; i++) { - uint32_t idx = raw_ctz(bitmask_iter); - bitmask_iter = _blsr_u64(bitmask_iter); - - bytes += pkt_meta[idx].bytes; - tcp_flags |= pkt_meta[idx].tcp_flags; - } - - dp_netdev_batch_execute(pmd, &action_batch, rules[rule_pkt_idx], - bytes, tcp_flags); - } - - return 0; -} - -#endif -#endif diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c deleted file mode 100644 index ef4cee2ba..000000000 --- a/lib/dpif-netdev-private-dpif.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2021 Intel Corporation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <config.h> - -#include "dpif-netdev-private-dpif.h" -#include "dpif-netdev-private-thread.h" - -#include <errno.h> -#include <string.h> - -#include "cpu.h" -#include "openvswitch/dynamic-string.h" -#include "openvswitch/vlog.h" -#include "util.h" - -VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl); -#define DPIF_NETDEV_IMPL_AVX512_CHECK (__x86_64__ && HAVE_AVX512F \ - && HAVE_LD_AVX512_GOOD && __SSE4_2__) - -enum dpif_netdev_impl_info_idx { - DPIF_NETDEV_IMPL_SCALAR, - DPIF_NETDEV_IMPL_AVX512 -}; - -#if DPIF_NETDEV_IMPL_AVX512_CHECK -static int32_t -dp_netdev_input_outer_avx512_probe(void) -{ - if (!cpu_has_isa(OVS_CPU_ISA_X86_AVX512F) - || !cpu_has_isa(OVS_CPU_ISA_X86_BMI2)) { - return -ENOTSUP; - } - - return 0; -} -#endif - -/* Actual list of implementations goes here. */ -static struct dpif_netdev_impl_info_t dpif_impls[] = { - /* The default scalar C code implementation. */ - [DPIF_NETDEV_IMPL_SCALAR] = { .input_func = dp_netdev_input, - .probe = NULL, - .name = "dpif_scalar", }, - -#if DPIF_NETDEV_IMPL_AVX512_CHECK - /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */ - [DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_outer_avx512, - .probe = dp_netdev_input_outer_avx512_probe, - .name = "dpif_avx512", }, -#endif -}; - -static dp_netdev_input_func default_dpif_func; - -dp_netdev_input_func -dp_netdev_impl_get_default(void) -{ - /* For the first call, this will be NULL. Compute the compile time default. - */ - if (!default_dpif_func) { - int dpif_idx = DPIF_NETDEV_IMPL_SCALAR; - -/* Configure-time overriding to run test suite on all implementations. */ -#if DPIF_NETDEV_IMPL_AVX512_CHECK -#ifdef DPIF_AVX512_DEFAULT - dp_netdev_input_func_probe probe; - - /* Check if the compiled default is compatible. */ - probe = dpif_impls[DPIF_NETDEV_IMPL_AVX512].probe; - if (!probe || !probe()) { - dpif_idx = DPIF_NETDEV_IMPL_AVX512; - } -#endif -#endif - - VLOG_INFO("Default DPIF implementation is %s.\n", - dpif_impls[dpif_idx].name); - default_dpif_func = dpif_impls[dpif_idx].input_func; - } - - return default_dpif_func; -} - -void -dp_netdev_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list, - size_t n) -{ - /* Add all dpif functions to reply string. */ - ds_put_cstr(reply, "Available DPIF implementations:\n"); - - for (uint32_t i = 0; i < ARRAY_SIZE(dpif_impls); i++) { - ds_put_format(reply, " %s (pmds: ", dpif_impls[i].name); - - for (size_t j = 0; j < n; j++) { - struct dp_netdev_pmd_thread *pmd = pmd_list[j]; - if (pmd->core_id == NON_PMD_CORE_ID) { - continue; - } - - if (pmd->netdev_input_func == dpif_impls[i].input_func) { - ds_put_format(reply, "%u,", pmd->core_id); - } - } - - ds_chomp(reply, ','); - - if (ds_last(reply) == ' ') { - ds_put_cstr(reply, "none"); - } - - ds_put_cstr(reply, ")\n"); - } -} - -/* This function checks all available DPIF implementations, and selects the - * returns the function pointer to the one requested by "name". - */ -static int32_t -dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *out_func) -{ - ovs_assert(name); - ovs_assert(out_func); - - uint32_t i; - - for (i = 0; i < ARRAY_SIZE(dpif_impls); i++) { - if (strcmp(dpif_impls[i].name, name) == 0) { - /* Probe function is optional - so check it is set before exec. */ - if (dpif_impls[i].probe) { - int probe_err = dpif_impls[i].probe(); - if (probe_err) { - *out_func = NULL; - return probe_err; - } - } - *out_func = dpif_impls[i].input_func; - return 0; - } - } - - return -EINVAL; -} - -int32_t -dp_netdev_impl_set_default_by_name(const char *name) -{ - dp_netdev_input_func new_default; - - int32_t err = dp_netdev_impl_get_by_name(name, &new_default); - - if (!err) { - default_dpif_func = new_default; - } - - return err; - -} diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h deleted file mode 100644 index cf331cec7..000000000 --- a/lib/dpif-netdev-private-dpif.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2021 Intel Corporation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef DPIF_NETDEV_PRIVATE_DPIF_H -#define DPIF_NETDEV_PRIVATE_DPIF_H 1 - -#include "openvswitch/types.h" - -/* Forward declarations to avoid including files. */ -struct dp_netdev_pmd_thread; -struct dp_packet_batch; -struct ds; - -/* Typedef for DPIF functions. - * Returns whether all packets were processed successfully. - */ -typedef int32_t (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd, - struct dp_packet_batch *packets, - odp_port_t port_no); - -/* Probe a DPIF implementation. This allows the implementation to validate CPU - * ISA availability. Returns -ENOTSUP if not available, returns 0 if valid to - * use. - */ -typedef int32_t (*dp_netdev_input_func_probe)(void); - -/* Structure describing each available DPIF implementation. */ -struct dpif_netdev_impl_info_t { - /* Function pointer to execute to have this DPIF implementation run. */ - dp_netdev_input_func input_func; - /* Function pointer to execute to check the CPU ISA is available to run. If - * not necessary, it must be set to NULL which implies that it is always - * valid to use. */ - dp_netdev_input_func_probe probe; - /* Name used to select this DPIF implementation. */ - const char *name; -}; - -/* This function returns all available implementations to the caller. */ -void -dp_netdev_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list, - size_t n); - -/* Returns the default DPIF which is first ./configure selected, but can be - * overridden at runtime. */ -dp_netdev_input_func dp_netdev_impl_get_default(void); - -/* Overrides the default DPIF with the user set DPIF. */ -int32_t dp_netdev_impl_set_default_by_name(const char *name); - -bool -dp_netdev_simple_match_enabled(const struct dp_netdev_pmd_thread *pmd, - odp_port_t in_port); - -uint64_t -dp_netdev_simple_match_mark(odp_port_t in_port, ovs_be16 dl_type, - uint8_t nw_frag, ovs_be16 vlan_tci); -struct dp_netdev_flow * -dp_netdev_simple_match_lookup(const struct dp_netdev_pmd_thread *pmd, - odp_port_t in_port, ovs_be16 dl_type, - uint8_t nw_frag, ovs_be16 vlan_tci); - -/* Available DPIF implementations below. */ -int32_t -dp_netdev_input(struct dp_netdev_pmd_thread *pmd, - struct dp_packet_batch *packets, - odp_port_t in_port); - -/* AVX512 enabled DPIF implementation function. */ -int32_t -dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, - struct dp_packet_batch *packets, - odp_port_t in_port); - -#endif /* netdev-private.h */ diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h index 1ab3e586a..bc76c86d2 100644 --- a/lib/dpif-netdev-private-thread.h +++ b/lib/dpif-netdev-private-thread.h @@ -21,7 +21,6 @@ #include "dpif.h" #include "dpif-netdev-perf.h" #include "dpif-netdev-private-dfc.h" -#include "dpif-netdev-private-dpif.h" #include <stdbool.h> #include <stdint.h> @@ -30,7 +29,6 @@ #include "cmap.h" #include "dpif-netdev-private-dfc.h" -#include "dpif-netdev-private-dpif.h" #include "dpif-netdev-perf.h" #include "openvswitch/thread.h" @@ -120,12 +118,6 @@ struct dp_netdev_pmd_thread { /* Current context of the PMD thread. */ struct dp_netdev_pmd_thread_ctx ctx; - /* Function pointer to call for dp_netdev_input() functionality. */ - ATOMIC(dp_netdev_input_func) netdev_input_func; - - /* Pointer for per-DPIF implementation scratch space. */ - void *netdev_input_func_userdata; - struct seq *reload_seq; uint64_t last_reload_seq; diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h deleted file mode 100644 index 029b23a22..000000000 --- a/lib/dpif-netdev-private.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc. - * Copyright (c) 2019 Intel Corporation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef DPIF_NETDEV_PRIVATE_H -#define DPIF_NETDEV_PRIVATE_H 1 - -/* This header includes the various dpif-netdev components' header - * files in the appropriate order. Unfortunately there is a strict - * requirement in the include order due to dependences between components. - * E.g: - * DFC/EMC/SMC requires the netdev_flow_key struct - * PMD thread requires DFC_flow struct - * - */ -#include "dpif-netdev-private-flow.h" -#include "dpif-netdev-private-dpcls.h" -#include "dpif-netdev-private-dfc.h" -#include "dpif-netdev-private-thread.h" - -/* Allow other implementations to lookup the DPCLS instances. */ -struct dpcls * -dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, - odp_port_t in_port); - -/* Allow other implementations to execute actions on a batch. */ -void -dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd, - struct dp_packet_batch *packets, - struct dpcls_rule *rule, - uint32_t bytes, - uint16_t tcp_flags); - -int -dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd, - struct dp_packet *packet, - struct dp_netdev_flow **flow); - -#endif /* dpif-netdev-private.h */ diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man index 69b3b1a4a..2b2450884 100644 --- a/lib/dpif-netdev-unixctl.man +++ b/lib/dpif-netdev-unixctl.man @@ -241,9 +241,3 @@ priority, \fIprio\fR, which should be a positive integer value. The highest priority lookup function is used for classification. The number of affected dpcls ports and subtables is returned. -. -.IP "\fBdpif-netdev/dpif-impl-get\fR -Lists the DPIF implementations that are available. -. -.IP "\fBdpif-netdev/dpif-impl-set\fR \fIdpif_impl\fR" -Sets the DPIF to be used to \fIdpif_impl\fR. By default "dpif_scalar" is used. diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 2a39bf4a1..55507f797 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -16,9 +16,6 @@ #include <config.h> #include "dpif-netdev.h" -#include "dpif-netdev-private.h" -#include "dpif-netdev-private-dfc.h" -#include "dpif-offload.h" #include <ctype.h> #include <errno.h> @@ -47,6 +44,11 @@ #include "dpif.h" #include "dpif-netdev-lookup.h" #include "dpif-netdev-perf.h" +#include "dpif-netdev-private-dfc.h" +#include "dpif-netdev-private-dpcls.h" +#include "dpif-netdev-private-flow.h" +#include "dpif-netdev-private-thread.h" +#include "dpif-offload.h" #include "dpif-provider.h" #include "dummy.h" #include "fat-rwlock.h" @@ -486,6 +488,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, const struct flow *flow, const struct nlattr *actions, size_t actions_len); +static void dp_netdev_input(struct dp_netdev_pmd_thread *, + struct dp_packet_batch *, odp_port_t port_no); static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *, struct dp_packet_batch *); @@ -559,9 +563,8 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd, bool purge); static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd, struct tx_port *tx); -inline struct dpcls * -dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, - odp_port_t in_port); +static inline struct dpcls *dp_netdev_pmd_lookup_dpcls( + struct dp_netdev_pmd_thread *pmd, odp_port_t in_port); static void dp_netdev_request_reconfigure(struct dp_netdev *dp); static inline bool @@ -1021,91 +1024,6 @@ dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc OVS_UNUSED, ds_destroy(&reply); } -static void -dpif_netdev_impl_get(struct unixctl_conn *conn, int argc OVS_UNUSED, - const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) -{ - struct ds reply = DS_EMPTY_INITIALIZER; - struct shash_node *node; - - ovs_mutex_lock(&dp_netdev_mutex); - SHASH_FOR_EACH (node, &dp_netdevs) { - struct dp_netdev_pmd_thread **pmd_list; - struct dp_netdev *dp = node->data; - size_t n; - - /* Get PMD threads list, required to get the DPIF impl used by each PMD - * thread. */ - sorted_poll_thread_list(dp, &pmd_list, &n); - dp_netdev_impl_get(&reply, pmd_list, n); - free(pmd_list); - } - ovs_mutex_unlock(&dp_netdev_mutex); - unixctl_command_reply(conn, ds_cstr(&reply)); - ds_destroy(&reply); -} - -static void -dpif_netdev_impl_set(struct unixctl_conn *conn, int argc OVS_UNUSED, - const char *argv[], void *aux OVS_UNUSED) -{ - /* This function requires just one parameter, the DPIF name. */ - const char *dpif_name = argv[1]; - struct shash_node *node; - - static const char *error_description[2] = { - "Unknown DPIF implementation", - "CPU doesn't support the required instruction for", - }; - - ovs_mutex_lock(&dp_netdev_mutex); - int32_t err = dp_netdev_impl_set_default_by_name(dpif_name); - - if (err) { - struct ds reply = DS_EMPTY_INITIALIZER; - ds_put_format(&reply, "DPIF implementation not available: %s %s.\n", - error_description[ (err == -ENOTSUP) ], dpif_name); - const char *reply_str = ds_cstr(&reply); - unixctl_command_reply_error(conn, reply_str); - VLOG_ERR("%s", reply_str); - ds_destroy(&reply); - ovs_mutex_unlock(&dp_netdev_mutex); - return; - } - - SHASH_FOR_EACH (node, &dp_netdevs) { - struct dp_netdev *dp = node->data; - - /* Get PMD threads list, required to get DPCLS instances. */ - size_t n; - struct dp_netdev_pmd_thread **pmd_list; - sorted_poll_thread_list(dp, &pmd_list, &n); - - for (size_t i = 0; i < n; i++) { - struct dp_netdev_pmd_thread *pmd = pmd_list[i]; - if (pmd->core_id == NON_PMD_CORE_ID) { - continue; - } - - /* Initialize DPIF function pointer to the newly configured - * default. */ - atomic_store_relaxed(&pmd->netdev_input_func, - dp_netdev_impl_get_default()); - }; - - free(pmd_list); - } - ovs_mutex_unlock(&dp_netdev_mutex); - - /* Reply with success to command. */ - struct ds reply = DS_EMPTY_INITIALIZER; - ds_put_format(&reply, "DPIF implementation set to %s.\n", dpif_name); - const char *reply_str = ds_cstr(&reply); - unixctl_command_reply(conn, reply_str); - VLOG_INFO("%s", reply_str); - ds_destroy(&reply); -} - static void dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) @@ -1382,13 +1300,6 @@ dpif_netdev_init(void) unixctl_command_register("dpif-netdev/subtable-lookup-prio-get", NULL, 0, 0, dpif_netdev_subtable_lookup_get, NULL); - unixctl_command_register("dpif-netdev/dpif-impl-set", - "dpif_implementation_name", - 1, 1, dpif_netdev_impl_set, - NULL); - unixctl_command_register("dpif-netdev/dpif-impl-get", "", - 0, 0, dpif_netdev_impl_get, - NULL); return 0; } @@ -2139,7 +2050,7 @@ void dp_netdev_flow_unref(struct dp_netdev_flow *flow) } } -inline struct dpcls * +static inline struct dpcls * dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, odp_port_t in_port) { @@ -3077,7 +2988,7 @@ dp_netdev_get_mega_ufid(const struct match *match, ovs_u128 *mega_ufid) odp_flow_key_hash(&key, sizeof key, mega_ufid); } -uint64_t +static uint64_t dp_netdev_simple_match_mark(odp_port_t in_port, ovs_be16 dl_type, uint8_t nw_frag, ovs_be16 vlan_tci) { @@ -3117,7 +3028,7 @@ dp_netdev_simple_match_mark(odp_port_t in_port, ovs_be16 dl_type, | (OVS_FORCE uint16_t) (vlan_tci & htons(VLAN_VID_MASK | VLAN_CFI)); } -struct dp_netdev_flow * +static struct dp_netdev_flow * dp_netdev_simple_match_lookup(const struct dp_netdev_pmd_thread *pmd, odp_port_t in_port, ovs_be16 dl_type, uint8_t nw_frag, ovs_be16 vlan_tci) @@ -3138,7 +3049,7 @@ dp_netdev_simple_match_lookup(const struct dp_netdev_pmd_thread *pmd, return found ? flow : NULL; } -bool +static bool dp_netdev_simple_match_enabled(const struct dp_netdev_pmd_thread *pmd, odp_port_t in_port) { @@ -4752,10 +4663,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, } /* Process packet batch. */ - int ret = pmd->netdev_input_func(pmd, &batch, port_no); - if (ret) { - dp_netdev_input(pmd, &batch, port_no); - } + dp_netdev_input(pmd, &batch, port_no); /* Assign processing cycles to rx queue. */ cycles = cycle_timer_stop(&pmd->perf_stats, &timer); @@ -6963,9 +6871,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, pmd_init_max_sleep(dp, pmd); - /* Initialize DPIF function pointer to the default configured version. */ - atomic_init(&pmd->netdev_input_func, dp_netdev_impl_get_default()); - /* init the 'flow_cache' since there is no * actual thread created for NON_PMD_CORE_ID. */ if (core_id == NON_PMD_CORE_ID) { @@ -7004,7 +6909,6 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) seq_destroy(pmd->reload_seq); ovs_mutex_destroy(&pmd->port_mutex); ovs_mutex_destroy(&pmd->bond_mutex); - free(pmd->netdev_input_func_userdata); free(pmd); } @@ -7368,24 +7272,6 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, actions->actions, actions->size); } -void -dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd, - struct dp_packet_batch *packets, - struct dpcls_rule *rule, - uint32_t bytes, - uint16_t tcp_flags) -{ - /* Gets action* from the rule. */ - struct dp_netdev_flow *flow = dp_netdev_flow_cast(rule); - struct dp_netdev_actions *actions = dp_netdev_flow_get_actions(flow); - - dp_netdev_flow_used(flow, dp_packet_batch_size(packets), bytes, - tcp_flags, pmd->ctx.now / 1000); - const uint32_t steal = 1; - dp_netdev_execute_actions(pmd, packets, steal, &flow->flow, - actions->actions, actions->size); -} - static inline void dp_netdev_queue_batches(struct dp_packet *pkt, struct dp_netdev_flow *flow, uint16_t tcp_flags, @@ -7514,7 +7400,7 @@ smc_lookup_single(struct dp_netdev_pmd_thread *pmd, return NULL; } -inline int +static inline int dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet, struct dp_netdev_flow **flow) @@ -7993,13 +7879,12 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, } } -int32_t +static void dp_netdev_input(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, odp_port_t port_no) { dp_netdev_input__(pmd, packets, false, port_no); - return 0; } static void diff --git a/tests/pmd.at b/tests/pmd.at index 19698f3e9..677d0feb1 100644 --- a/tests/pmd.at +++ b/tests/pmd.at @@ -1182,22 +1182,6 @@ AT_CHECK([ovs-appctl dpctl/del-dp dummy@dp0], [0], [dnl OVS_VSWITCHD_STOP AT_CLEANUP -AT_SETUP([PMD - dpif configuration]) -OVS_VSWITCHD_START([], [], [], [--dummy-numa 0,0]) -AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dummy-pmd]) - -AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_scalar], [0], [dnl -DPIF implementation set to dpif_scalar. -]) - -AT_CHECK([ovs-vsctl show], [], [stdout]) -AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-get | grep "dpif_scalar"], [], [dnl - dpif_scalar (pmds: 0) -]) - -OVS_VSWITCHD_STOP -AT_CLEANUP - AT_SETUP([PMD - dpcls configuration]) OVS_VSWITCHD_START([], [], [], [--dummy-numa 0,0]) AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dummy-pmd]) -- 2.53.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
