The handler and CPU mapping in upcalls are incorrect, and this is specially noticeable systems with cpu isolation enabled.
Say we have a 12 core system where only every even number CPU is enabled C0, C2, C4, C6, C8, C10 This means we will create an array of size 6 that will be sent to kernel that is populated with sockets [S0, S1, S2, S3, S4, S5] The problem is when the kernel does an upcall it checks the socket array via the index of the CPU, effectively adding additional load on some CPUs while leaving no work on other CPUs. e.g. C0 indexes to S0 C2 indexes to S2 (should be S1) C4 indexes to S4 (should be S2) Modulo of 6 (size of socket array) is applied, so we wrap back to S0 C6 indexes to S0 (should be S3) C8 indexes to S2 (should be S4) C10 indexes to S4 (should be S5) Effectively sockets S0, S2, S4 get overloaded while sockets S1, S3, S5 get no work assigned to them This leads to the kernel to throw the following message: "openvswitch: cpu_id mismatch with handler threads" To fix this we send the kernel a corrected array of sockets the size of all CPUs in the system. In the above example we would create a corrected array as follows: [S0, S1, S1, S2, S2, S3, S3, S4, S4, S5, S5, S0] This guarantees that regardless of which CPU a packet comes in the kernel will correctly map it to the correct socket Co-authored-by: Aaron Conole <[email protected]> Signed-off-by: Michael Santana <[email protected]> --- lib/dpif-netlink.c | 28 +++++++++++++++++++++++----- lib/ovs-thread.c | 35 +++++++++++++++++++++++++++++++++++ lib/ovs-thread.h | 1 + 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 71e35ccdd..1196956e1 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -803,11 +803,13 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids, struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); struct dpif_netlink_dp request, reply; struct ofpbuf *bufp; - int error; + + int *kernel_cores; + uint32_t *corrected, n_corrected; + int error, i, cur; int n_cores; - n_cores = count_cpu_cores(); - ovs_assert(n_cores == n_upcall_pids); + n_cores = count_total_cores(NULL, 0); VLOG_DBG("Dispatch mode(per-cpu): Number of CPUs is %d", n_cores); dpif_netlink_dp_init(&request); @@ -817,7 +819,21 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids, request.user_features = dpif->user_features | OVS_DP_F_DISPATCH_UPCALL_PER_CPU; - request.upcall_pids = upcall_pids; + kernel_cores = xcalloc(n_cores, sizeof(int)); + corrected = xcalloc(n_cores, sizeof(uint32_t)); + + count_total_cores(kernel_cores, n_cores); + cur = 0; + for (i = 0; i < n_cores; i++) { + uint32_t pid; + pid = upcall_pids[cur]; + corrected[i] = pid; + + if (kernel_cores[i] != -1) { + cur = (cur + 1) % n_upcall_pids; + } + } + request.upcall_pids = corrected; request.n_upcall_pids = n_cores; error = dpif_netlink_dp_transact(&request, &reply, &bufp); @@ -825,9 +841,11 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const uint32_t *upcall_pids, dpif->user_features = reply.user_features; ofpbuf_delete(bufp); if (!dpif_netlink_upcall_per_cpu(dpif)) { - return -EOPNOTSUPP; + error = -EOPNOTSUPP; } } + free(corrected); + free(kernel_cores); return error; } diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c index 805cba622..7feb63f37 100644 --- a/lib/ovs-thread.c +++ b/lib/ovs-thread.c @@ -663,6 +663,41 @@ count_cpu_cores(void) return n_cores > 0 ? n_cores : 0; } +int +count_total_cores(int *core_map, size_t n_core_map) { + long int n_cores; + +#ifndef _WIN32 + n_cores = sysconf(_SC_NPROCESSORS_CONF); +#ifdef __linux__ + if (core_map && n_cores > 0) { + cpu_set_t *set = CPU_ALLOC(n_cores); + + memset(core_map, -1, n_core_map * sizeof(int)); + + if (set) { + size_t size = CPU_ALLOC_SIZE(n_cores); + + if (!sched_getaffinity(0, size, set)) { + for (; n_core_map > 0; n_core_map -= 1) { + core_map[n_core_map - 1] = -1; + if (CPU_ISSET_S(n_core_map - 1, size, set)) { + core_map[n_core_map - 1] = n_core_map - 1; + } + } + } + CPU_FREE(set); + } + } +#endif +#else + n_cores = 0; + errno = ENOTSUP; +#endif + + return n_cores > 0 ? n_cores : 0; +} + /* Returns 'true' if current thread is PMD thread. */ bool thread_is_pmd(void) diff --git a/lib/ovs-thread.h b/lib/ovs-thread.h index 3b444ccdc..af59abd72 100644 --- a/lib/ovs-thread.h +++ b/lib/ovs-thread.h @@ -522,6 +522,7 @@ bool may_fork(void); /* Useful functions related to threading. */ int count_cpu_cores(void); +int count_total_cores(int *, size_t); bool thread_is_pmd(void); #endif /* ovs-thread.h */ -- 2.33.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
