Re: [ovs-dev] [PATCH v4 1/2] Optimize the poll loop for poll_immediate_wake()

Gaëtan Rivet Mon, 02 Aug 2021 16:30:36 -0700

On Tue, Jul 20, 2021, at 17:06, [email protected] wrote:
> From: Anton Ivanov <[email protected]>
> 
> If we are not obtaining any useful information out of the poll(),
> such as is a fd busy or not, we do not need to do a poll() if
> an immediate_wake() has been requested.
> 
> This cuts out all the pollfd hash additions, forming the poll
> arguments and the actual poll() after a call to
> poll_immediate_wake()
> 
> Signed-off-by: Anton Ivanov <[email protected]>


Hi Anton,

Patch generally looks good to me. A few nits below.

> ---
>  lib/poll-loop.c | 80 ++++++++++++++++++++++++++++++++-----------------
>  lib/timeval.c   | 13 +++++++-
>  2 files changed, 65 insertions(+), 28 deletions(-)
> 
> diff --git a/lib/poll-loop.c b/lib/poll-loop.c
> index 4e751ff2c..9452f6e22 100644
> --- a/lib/poll-loop.c
> +++ b/lib/poll-loop.c
> @@ -107,6 +107,13 @@ poll_create_node(int fd, HANDLE wevent, short int 
> events, const char *where)
>  
>      COVERAGE_INC(poll_create_node);
>  
> +    if (loop->timeout_when == LLONG_MIN) {
> +        /* There was a previous request to bail out of this poll loop.
> +         * There is no point to engage in yack shaving a poll hmap.
> +         */
> +        return;
> +    }
> +
>      /* Both 'fd' and 'wevent' cannot be set. */
>      ovs_assert(!fd != !wevent);
>  
> @@ -181,8 +188,21 @@ poll_wevent_wait_at(HANDLE wevent, const char *where)
>  void
>  poll_timer_wait_at(long long int msec, const char *where)
>  {
> -    long long int now = time_msec();
> +    long long int now;
>      long long int when;
> +    struct poll_loop *loop = poll_loop();
> +
> +    /* We have an outstanding request for an immediate wake -
> +     * either explicit (from poll_immediate_wake()) or as a
> +     * result of a previous timeout calculation which gave
> +     * a negative timeout.
> +     * No point trying to recalculate the timeout.
> +     */
> +    if (loop->timeout_when == LLONG_MIN) {
> +        return;
> +    }
> +
> +    now = time_msec();
>  
>      if (msec <= 0) {
>          /* Wake up immediately. */
> @@ -229,7 +249,7 @@ poll_timer_wait_until_at(long long int when, const 
> char *where)
>  void
>  poll_immediate_wake_at(const char *where)
>  {
> -    poll_timer_wait_at(0, where);
> +    poll_timer_wait_at(LLONG_MIN, where);
>  }
>  
>  /* Logs, if appropriate, that the poll loop was awakened by an event
> @@ -320,49 +340,55 @@ poll_block(void)
>  {
>      struct poll_loop *loop = poll_loop();
>      struct poll_node *node;
> -    struct pollfd *pollfds;
> +    struct pollfd *pollfds = NULL;
>      HANDLE *wevents = NULL;
>      int elapsed;
> -    int retval;
> +    int retval = 0;
>      int i;
>  
> -    /* Register fatal signal events before actually doing any real work for
> -     * poll_block. */
> -    fatal_signal_wait();
>  
>      if (loop->timeout_when == LLONG_MIN) {
>          COVERAGE_INC(poll_zero_timeout);
> +    } else {
> +        /* Register fatal signal events only if we intend to do real work for
> +         * poll_block.
> +         */
> +        fatal_signal_wait();

When should we avoid registering fatal signal events?
Even for immediate wake, time_poll() will execute for general
upkeep (coverage + RCU), should a fatal signal handler not
be installed in that case?

>      }
>  
>      timewarp_run();
> -    pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
> +    if (loop->timeout_when != LLONG_MIN) {
> +        pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
>  
>  #ifdef _WIN32
> -    wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
> +        wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
>  #endif
>  
> -    /* Populate with all the fds and events. */
> -    i = 0;
> -    HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
> -        pollfds[i] = node->pollfd;
> +        /* Populate with all the fds and events. */
> +        i = 0;
> +        HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
> +            pollfds[i] = node->pollfd;
>  #ifdef _WIN32
> -        wevents[i] = node->wevent;
> -        if (node->pollfd.fd && node->wevent) {
> -            short int wsa_events = 0;
> -            if (node->pollfd.events & POLLIN) {
> -                wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
> +            wevents[i] = node->wevent;
> +            if (node->pollfd.fd && node->wevent) {
> +                short int wsa_events = 0;
> +                if (node->pollfd.events & POLLIN) {
> +                    wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
> +                }
> +                if (node->pollfd.events & POLLOUT) {
> +                    wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
> +                }
> +                WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
>              }
> -            if (node->pollfd.events & POLLOUT) {
> -                wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
> -            }
> -            WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
> -        }
>  #endif
> -        i++;
> -    }
> +            i++;
> +        }
>  
> -    retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
> -                       loop->timeout_when, &elapsed);
> +        retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), 
> wevents,
> +                           loop->timeout_when, &elapsed);
> +    } else {
> +        retval = time_poll(NULL, 0, NULL, loop->timeout_when, 
> &elapsed);
> +    }
>      if (retval < 0) {
>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>          VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
> diff --git a/lib/timeval.c b/lib/timeval.c
> index 193c7bab1..ef09a15e0 100644
> --- a/lib/timeval.c
> +++ b/lib/timeval.c
> @@ -305,6 +305,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, 
> HANDLE *handles OVS_UNUSED,
>      for (;;) {
>          long long int now = time_msec();
>          int time_left;
> +        retval = 0;
>  
>          if (now >= timeout_when) {
>              time_left = 0;
> @@ -323,7 +324,14 @@ time_poll(struct pollfd *pollfds, int n_pollfds, 
> HANDLE *handles OVS_UNUSED,
>          }
>  
>  #ifndef _WIN32
> -        retval = poll(pollfds, n_pollfds, time_left);
> +        /* If timeout_when is LLONG_MIN, we should skip the poll().
> +         * We do not skip on time_left == 0, because we may have
> +         * ended up with time_left = 0 after a poll with valid
> +         * pollfds was interrupted and restarted.
> +         */
> +        if (timeout_when != LLONG_MIN) {

The _WIN32 case checks 'n_pollfds != 0' before calling.
Should both be aligned? Maybe the _WIN32 should avoid the call if
'n_pollfds == 0 || timeout_when == LLONG_MIN'?

> +            retval = poll(pollfds, n_pollfds, time_left);
> +        }
>          if (retval < 0) {
>              retval = -errno;
>          }
> @@ -331,6 +339,9 @@ time_poll(struct pollfd *pollfds, int n_pollfds, 
> HANDLE *handles OVS_UNUSED,
>          if (n_pollfds > MAXIMUM_WAIT_OBJECTS) {
>              VLOG_ERR("Cannot handle more than maximum wait objects\n");
>          } else if (n_pollfds != 0) {
> +            /* If we are doing an immediate_wake shortcut n_pollfds is
> +             * zero, so we skip the actual call.
> +             */

I find this sentence hard to read.
I would suggest instead:

  /* n_pollfds can be zero on immediate_wake.
   * In that case the call is not necessary. */

>              retval = WaitForMultipleObjects(n_pollfds, handles, FALSE,
>                                              time_left);
>          }
> -- 
> 2.20.1
> 
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 


-- 
Gaetan Rivet
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v4 1/2] Optimize the poll loop for poll_immediate_wake()

Reply via email to