From: David Marchand <[email protected]>

The default Linux timer slack groups timer expiries into
50 uS intervals.

With some traffic patterns this can mean that returning to process
packets after a sleep takes too long and packets are dropped.

Add a helper to util.c and set use it to reduce the timer slack
for PMD threads, so that sleeps with smaller resolutions can be done
to prevent sleeping for too long.

Fixes: de3bbdc479a9 ("dpif-netdev: Add PMD load based sleeping.")
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2023-January/401121.html
Reported-by: Ilya Maximets <[email protected]>
Signed-off-by: David Marchand <[email protected]>
Co-authored-by: Kevin Traynor <[email protected]>
Signed-off-by: Kevin Traynor <[email protected]>
---
 Documentation/topics/dpdk/pmd.rst |  5 -----
 lib/dpif-netdev.c                 |  4 ++++
 lib/util.c                        | 16 ++++++++++++++++
 lib/util.h                        |  1 +
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/Documentation/topics/dpdk/pmd.rst 
b/Documentation/topics/dpdk/pmd.rst
index 604ac3f6b..0c3bb717f 100644
--- a/Documentation/topics/dpdk/pmd.rst
+++ b/Documentation/topics/dpdk/pmd.rst
@@ -374,9 +374,4 @@ system configuration (e.g. enabling processor C-states) and 
workloads.
     rate.
 
-.. note::
-
-    By default Linux kernel groups timer expirations and this can add an
-    overhead of up to 50 microseconds to a requested timer expiration.
-
 .. _ovs-vswitchd(8):
     http://openvswitch.org/support/dist-docs/ovs-vswitchd.8.html
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a47d54c6f..4f06e3f4e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -172,4 +172,7 @@ static struct odp_support dp_netdev_support = {
 #define PMD_RCU_QUIESCE_INTERVAL 10000LL
 
+/* Timer resolution for PMD threads in nanoseconds. */
+#define PMD_TIMER_RES_NS 1000
+
 /* Number of pkts Rx on an interface that will stop pmd thread sleeping. */
 #define PMD_SLEEP_THRESH (NETDEV_MAX_BURST / 2)
@@ -6963,4 +6966,5 @@ pmd_thread_main(void *f_)
     dfc_cache_init(&pmd->flow_cache);
     pmd_alloc_static_tx_qid(pmd);
+    set_timer_resolution(PMD_TIMER_RES_NS);
 
 reload:
diff --git a/lib/util.c b/lib/util.c
index 7576eb06e..96a71550d 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -26,4 +26,7 @@
 #include <stdlib.h>
 #include <string.h>
+#ifdef __linux__
+#include <sys/prctl.h>
+#endif
 #include <sys/stat.h>
 #include <unistd.h>
@@ -2420,4 +2423,17 @@ xnanosleep_no_quiesce(uint64_t nanoseconds)
 }
 
+#if __linux__
+void
+set_timer_resolution(unsigned long nanoseconds)
+{
+    prctl(PR_SET_TIMERSLACK, nanoseconds);
+}
+#else
+void
+set_timer_resolution(unsigned long nanoseconds OVS_UNUSED)
+{
+}
+#endif
+
 /* Determine whether standard output is a tty or not. This is useful to decide
  * whether to use color output or not when --color option for utilities is set
diff --git a/lib/util.h b/lib/util.h
index f35f33021..62801e85f 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -595,4 +595,5 @@ void xsleep(unsigned int seconds);
 void xnanosleep(uint64_t nanoseconds);
 void xnanosleep_no_quiesce(uint64_t nanoseconds);
+void set_timer_resolution(unsigned long nanoseconds);
 
 bool is_stdout_a_tty(void);
-- 
2.39.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to