On Wed, May 29, 2019 at 03:13:45PM +0200, Phil Sutter wrote:
> Use of select() after the first call to mnl_socket_recvfrom() was
> incorrect, FD_SET() was called after the call to select() returned. This
> effectively turned the FD_ISSET() check into a noop (always true
> condition).

Good catch.

> Rewrite the receive loop using mnl_nft_event_listener() as an example:
> 
> * Combine the two calls to FD_ZERO(), FD_SET() and select() into one at
>   loop start.
> * Check ENOBUFS condition and warn the user, also upon other errors.
> * Continue on ENOBUFS, it is not a permanent error.
> 
> Signed-off-by: Phil Sutter <[email protected]>
> ---
>  src/mnl.c | 39 ++++++++++++++++++++++-----------------
>  1 file changed, 22 insertions(+), 17 deletions(-)
> 
> diff --git a/src/mnl.c b/src/mnl.c
> index 06280aa2cb50a..4fbfd059c0228 100644
> --- a/src/mnl.c
> +++ b/src/mnl.c
> @@ -299,34 +299,39 @@ int mnl_batch_talk(struct netlink_ctx *ctx, struct 
> list_head *err_list)
>       if (ret == -1)
>               return -1;
>  
> -     FD_ZERO(&readfds);
> -     FD_SET(fd, &readfds);
> +     while (true) {
> +             FD_ZERO(&readfds);
> +             FD_SET(fd, &readfds);
>  
> -     /* receive and digest all the acknowledgments from the kernel. */
> -     ret = select(fd+1, &readfds, NULL, NULL, &tv);
> -     if (ret == -1)
> -             return -1;
> +             /* receive and digest all the acknowledgments from the kernel. 
> */
> +             ret = select(fd + 1, &readfds, NULL, NULL, &tv);
> +             if (ret < 0)
> +                     return -1;
>  
> -     while (ret > 0 && FD_ISSET(fd, &readfds)) {
> -             struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buf;
> +             if (!FD_ISSET(fd, &readfds))
> +                     break;
>  
>               ret = mnl_socket_recvfrom(nl, rcv_buf, sizeof(rcv_buf));
> -             if (ret == -1)
> -                     return -1;
> +             if (ret < 0) {
> +                     if (errno == ENOBUFS) {
> +                             nft_print(&ctx->nft->output,
> +                                       "# ERROR: We lost some netlink 
> events!\n");

Probably better handling this from nft_netlink().

Could you just fix the problem you report above? Then, we make another
pass on this ENOBUFS error.

> +                             continue;
> +                     }
> +                     nft_print(&ctx->nft->output, "# ERROR: %s\n",
> +                               strerror(errno));
> +                     err = ret;
> +                     break;
> +             }
>  
>               ret = mnl_cb_run(rcv_buf, ret, 0, portid, 
> &netlink_echo_callback, ctx);
>               /* Continue on error, make sure we get all acknowledgments */
>               if (ret == -1) {
> +                     struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buf;
> +
>                       mnl_err_list_node_add(err_list, errno, nlh->nlmsg_seq);
>                       err = -1;
>               }
> -
> -             ret = select(fd+1, &readfds, NULL, NULL, &tv);
> -             if (ret == -1)
> -                     return -1;
> -
> -             FD_ZERO(&readfds);
> -             FD_SET(fd, &readfds);
>       }
>       return err;
>  }
> -- 
> 2.21.0
> 

Reply via email to