The handler and CPU mapping in upcalls are incorrect, and this is
specially noticeable systems with cpu isolation enabled.

Say we have a 12 core system where only every even number CPU is enabled
C0, C2, C4, C6, C8, C10

This means we will create an array of size 6 that will be sent to
kernel that is populated with sockets [S0, S1, S2, S3, S4, S5]

The problem is when the kernel does an upcall it checks the socket array
via the index of the CPU, effectively adding additional load on some
CPUs while leaving no work on other CPUs.

e.g.

C0  indexes to S0
C2  indexes to S2 (should be S1)
C4  indexes to S4 (should be S2)

Modulo of 6 (size of socket array) is applied, so we wrap back to S0
C6  indexes to S0 (should be S3)
C8  indexes to S2 (should be S4)
C10 indexes to S4 (should be S5)

Effectively sockets S0, S2, S4 get overloaded while sockets S1, S3, S5
get no work assigned to them

This leads to the kernel to throw the following message:
"openvswitch: cpu_id mismatch with handler threads"

To fix this we send the kernel a corrected array of sockets the size
of all CPUs in the system. In the above example we would create a
corrected array as follows:
[S0, S1, S1, S2, S2, S3, S3, S4, S4, S5, S5, S0]

This guarantees that regardless of which CPU a packet comes in the kernel
will correctly map it to the correct socket

Co-authored-by: Aaron Conole <[email protected]>
signed-off-by: Aaron Conole <[email protected]>
Signed-off-by: Michael Santana <[email protected]>
---
 lib/dpif-netlink.c | 28 +++++++++++++++++++++++-----
 lib/ovs-thread.c   | 35 +++++++++++++++++++++++++++++++++++
 lib/ovs-thread.h   |  1 +
 3 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 71e35ccdd..1dcfbf040 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -803,11 +803,13 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const 
uint32_t *upcall_pids,
     struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
     struct dpif_netlink_dp request, reply;
     struct ofpbuf *bufp;
-    int error;
+
+    int *kernel_cores;
+    uint32_t *corrected;
+    int error, i, cur;
     int n_cores;
 
-    n_cores = count_cpu_cores();
-    ovs_assert(n_cores == n_upcall_pids);
+    n_cores = count_total_cores(NULL, 0);
     VLOG_DBG("Dispatch mode(per-cpu): Number of CPUs is %d", n_cores);
 
     dpif_netlink_dp_init(&request);
@@ -817,7 +819,21 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const 
uint32_t *upcall_pids,
     request.user_features = dpif->user_features |
                             OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
 
-    request.upcall_pids = upcall_pids;
+    kernel_cores = xcalloc(n_cores, sizeof(int));
+    corrected = xcalloc(n_cores, sizeof(uint32_t));
+
+    count_total_cores(kernel_cores, n_cores);
+    cur = 0;
+    for (i = 0; i < n_cores; i++) {
+        uint32_t pid;
+        pid = upcall_pids[cur];
+        corrected[i] = pid;
+
+        if (kernel_cores[i] != -1) {
+            cur = (cur + 1) % n_upcall_pids;
+        }
+    }
+    request.upcall_pids = corrected;
     request.n_upcall_pids = n_cores;
 
     error = dpif_netlink_dp_transact(&request, &reply, &bufp);
@@ -825,9 +841,11 @@ dpif_netlink_set_handler_pids(struct dpif *dpif_, const 
uint32_t *upcall_pids,
         dpif->user_features = reply.user_features;
         ofpbuf_delete(bufp);
         if (!dpif_netlink_upcall_per_cpu(dpif)) {
-            return -EOPNOTSUPP;
+            error = -EOPNOTSUPP;
         }
     }
+    free(corrected);
+    free(kernel_cores);
     return error;
 }
 
diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
index 805cba622..7feb63f37 100644
--- a/lib/ovs-thread.c
+++ b/lib/ovs-thread.c
@@ -663,6 +663,41 @@ count_cpu_cores(void)
     return n_cores > 0 ? n_cores : 0;
 }
 
+int
+count_total_cores(int *core_map, size_t n_core_map) {
+    long int n_cores;
+
+#ifndef _WIN32
+    n_cores = sysconf(_SC_NPROCESSORS_CONF);
+#ifdef __linux__
+    if (core_map && n_cores > 0) {
+        cpu_set_t *set = CPU_ALLOC(n_cores);
+
+        memset(core_map, -1, n_core_map * sizeof(int));
+
+        if (set) {
+            size_t size = CPU_ALLOC_SIZE(n_cores);
+
+            if (!sched_getaffinity(0, size, set)) {
+                for (; n_core_map > 0; n_core_map -= 1) {
+                    core_map[n_core_map - 1] = -1;
+                    if (CPU_ISSET_S(n_core_map - 1, size, set)) {
+                        core_map[n_core_map - 1] = n_core_map - 1;
+                    }
+                }
+            }
+            CPU_FREE(set);
+        }
+    }
+#endif
+#else
+    n_cores = 0;
+    errno = ENOTSUP;
+#endif
+
+    return n_cores > 0 ? n_cores : 0;
+}
+
 /* Returns 'true' if current thread is PMD thread. */
 bool
 thread_is_pmd(void)
diff --git a/lib/ovs-thread.h b/lib/ovs-thread.h
index 3b444ccdc..af59abd72 100644
--- a/lib/ovs-thread.h
+++ b/lib/ovs-thread.h
@@ -522,6 +522,7 @@ bool may_fork(void);
 /* Useful functions related to threading. */
 
 int count_cpu_cores(void);
+int count_total_cores(int *, size_t);
 bool thread_is_pmd(void);
 
 #endif /* ovs-thread.h */
-- 
2.33.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to