Re: [ovs-dev] [PATCH v4 1/2] Optimize the poll loop for poll_immediate_wake()

Anton Ivanov Tue, 03 Aug 2021 02:34:05 -0700


On 03/08/2021 00:29, Gaëtan Rivet wrote:

On Tue, Jul 20, 2021, at 17:06, [email protected] wrote:

From: Anton Ivanov <[email protected]>


If we are not obtaining any useful information out of the poll(),
such as is a fd busy or not, we do not need to do a poll() if
an immediate_wake() has been requested.

This cuts out all the pollfd hash additions, forming the poll
arguments and the actual poll() after a call to
poll_immediate_wake()

Signed-off-by: Anton Ivanov <[email protected]>

Hi Anton,

Patch generally looks good to me. A few nits below.

---
  lib/poll-loop.c | 80 ++++++++++++++++++++++++++++++++-----------------
  lib/timeval.c   | 13 +++++++-
  2 files changed, 65 insertions(+), 28 deletions(-)

diff --git a/lib/poll-loop.c b/lib/poll-loop.c
index 4e751ff2c..9452f6e22 100644
--- a/lib/poll-loop.c
+++ b/lib/poll-loop.c
@@ -107,6 +107,13 @@ poll_create_node(int fd, HANDLE wevent, short int
events, const char *where)

COVERAGE_INC(poll_create_node);+ if (loop->timeout_when == LLONG_MIN) {

+        /* There was a previous request to bail out of this poll loop.
+         * There is no point to engage in yack shaving a poll hmap.
+         */
+        return;
+    }
+
      /* Both 'fd' and 'wevent' cannot be set. */
      ovs_assert(!fd != !wevent);

@@ -181,8 +188,21 @@ poll_wevent_wait_at(HANDLE wevent, const char *where)

  void
  poll_timer_wait_at(long long int msec, const char *where)
  {
-    long long int now = time_msec();
+    long long int now;
      long long int when;
+    struct poll_loop *loop = poll_loop();
+
+    /* We have an outstanding request for an immediate wake -
+     * either explicit (from poll_immediate_wake()) or as a
+     * result of a previous timeout calculation which gave
+     * a negative timeout.
+     * No point trying to recalculate the timeout.
+     */
+    if (loop->timeout_when == LLONG_MIN) {
+        return;
+    }
+
+    now = time_msec();

if (msec <= 0) {

          /* Wake up immediately. */
@@ -229,7 +249,7 @@ poll_timer_wait_until_at(long long int when, const
char *where)
  void
  poll_immediate_wake_at(const char *where)
  {
-    poll_timer_wait_at(0, where);
+    poll_timer_wait_at(LLONG_MIN, where);
  }

/* Logs, if appropriate, that the poll loop was awakened by an event

@@ -320,49 +340,55 @@ poll_block(void)
  {
      struct poll_loop *loop = poll_loop();
      struct poll_node *node;
-    struct pollfd *pollfds;
+    struct pollfd *pollfds = NULL;
      HANDLE *wevents = NULL;
      int elapsed;
-    int retval;
+    int retval = 0;
      int i;

- /* Register fatal signal events before actually doing any real work for

-     * poll_block. */
-    fatal_signal_wait();

if (loop->timeout_when == LLONG_MIN) {

          COVERAGE_INC(poll_zero_timeout);
+    } else {
+        /* Register fatal signal events only if we intend to do real work for
+         * poll_block.
+         */
+        fatal_signal_wait();

When should we avoid registering fatal signal events?
Even for immediate wake, time_poll() will execute for general
upkeep (coverage + RCU), should a fatal signal handler not
be installed in that case?


The fatal signal events are handled in ALL cases in the fatal_signal_run() 
which is towards the end of the poll loop.

fatal_singal_wait only adds the pipe to the fds. If we are not polling on that 
fd anyway, no point adding it either.

timewarp_run();

-    pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
+    if (loop->timeout_when != LLONG_MIN) {
+        pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);

#ifdef _WIN32

-    wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
+        wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
  #endif

- /* Populate with all the fds and events. */

-    i = 0;
-    HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
-        pollfds[i] = node->pollfd;
+        /* Populate with all the fds and events. */
+        i = 0;
+        HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
+            pollfds[i] = node->pollfd;
  #ifdef _WIN32
-        wevents[i] = node->wevent;
-        if (node->pollfd.fd && node->wevent) {
-            short int wsa_events = 0;
-            if (node->pollfd.events & POLLIN) {
-                wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
+            wevents[i] = node->wevent;
+            if (node->pollfd.fd && node->wevent) {
+                short int wsa_events = 0;
+                if (node->pollfd.events & POLLIN) {
+                    wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
+                }
+                if (node->pollfd.events & POLLOUT) {
+                    wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
+                }
+                WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
              }
-            if (node->pollfd.events & POLLOUT) {
-                wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
-            }
-            WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
-        }
  #endif
-        i++;
-    }
+            i++;
+        }

- retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,

-                       loop->timeout_when, &elapsed);
+        retval = time_poll(pollfds, hmap_count(&loop->poll_nodes),
wevents,
+                           loop->timeout_when, &elapsed);
+    } else {
+        retval = time_poll(NULL, 0, NULL, loop->timeout_when,
&elapsed);
+    }
      if (retval < 0) {
          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
          VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
diff --git a/lib/timeval.c b/lib/timeval.c
index 193c7bab1..ef09a15e0 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -305,6 +305,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
HANDLE *handles OVS_UNUSED,
      for (;;) {
          long long int now = time_msec();
          int time_left;
+        retval = 0;

if (now >= timeout_when) {

              time_left = 0;
@@ -323,7 +324,14 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
HANDLE *handles OVS_UNUSED,
          }

#ifndef _WIN32

-        retval = poll(pollfds, n_pollfds, time_left);
+        /* If timeout_when is LLONG_MIN, we should skip the poll().
+         * We do not skip on time_left == 0, because we may have
+         * ended up with time_left = 0 after a poll with valid
+         * pollfds was interrupted and restarted.
+         */
+        if (timeout_when != LLONG_MIN) {

The _WIN32 case checks 'n_pollfds != 0' before calling.
Should both be aligned? Maybe the _WIN32 should avoid the call if
'n_pollfds == 0 || timeout_when == LLONG_MIN'?

Sure. I try to avoid touching the WIN32 code as I do not have an environment to 
test it.

+            retval = poll(pollfds, n_pollfds, time_left);
+        }
          if (retval < 0) {
              retval = -errno;
          }
@@ -331,6 +339,9 @@ time_poll(struct pollfd *pollfds, int n_pollfds,
HANDLE *handles OVS_UNUSED,
          if (n_pollfds > MAXIMUM_WAIT_OBJECTS) {
              VLOG_ERR("Cannot handle more than maximum wait objects\n");
          } else if (n_pollfds != 0) {
+            /* If we are doing an immediate_wake shortcut n_pollfds is
+             * zero, so we skip the actual call.
+             */

I find this sentence hard to read.
I would suggest instead:

   /* n_pollfds can be zero on immediate_wake.
    * In that case the call is not necessary. */

              retval = WaitForMultipleObjects(n_pollfds, handles, FALSE,
                                              time_left);
          }
--
2.20.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v4 1/2] Optimize the poll loop for poll_immediate_wake()

Reply via email to