On Fri, Feb 13, 2026 at 3:11 PM Cindy Lu <[email protected]> wrote:
>
> Implement the initialization logic for AF_PACKET based netdev
> endpoints in filter-redirector:
>
> 1. filter_redirector_netdev_setup(): Creates and binds AF_PACKET
>    sockets to the specified TAP interface. For in_netdev, the socket
>    is used to receive packets; for out_netdev, it is used to send.
>
> 2. filter_redirector_netdev_read(): Async handler for reading packets
>    from the in_netdev AF_PACKET socket. Packets are forwarded through
>    the redirector chain.
>
> 3. Updated cleanup to properly close AF_PACKET sockets and free
>    associated buffers.
>
> 4. Modified allow_send_when_stopped logic to consider both chardev
>    and netdev output endpoints, and to only enable when the
>    redirector is active (status=on).
>
> 5. VM state change handler now manages the AF_PACKET read handler
>    activation based on VM running state and enable_when_stopped.
>
> Signed-off-by: Cindy Lu <[email protected]>
> ---
>  net/filter-mirror.c | 241 ++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 224 insertions(+), 17 deletions(-)
>
> diff --git a/net/filter-mirror.c b/net/filter-mirror.c
> index 37035f3892..f8001612ec 100644
> --- a/net/filter-mirror.c
> +++ b/net/filter-mirror.c
> @@ -26,6 +26,13 @@
>  #include "qemu/sockets.h"
>  #include "block/aio-wait.h"
>  #include "system/runstate.h"
> +#include "net/tap.h"
> +#include "net/tap_int.h"
> +
> +#include <sys/socket.h>
> +#include <net/if.h>
> +#include <linux/if_packet.h>
> +#include <netinet/if_ether.h>
>
>  typedef struct MirrorState MirrorState;
>  DECLARE_INSTANCE_CHECKER(MirrorState, FILTER_MIRROR,
> @@ -42,6 +49,10 @@ struct MirrorState {
>      char *outdev;
>      char *in_netdev;
>      char *out_netdev;
> +    NetClientState *out_net;
> +    int in_netfd;
> +    uint8_t *in_netbuf;
> +    int out_netfd;
>      CharFrontend chr_in;
>      CharFrontend chr_out;
>      SocketReadState rs;
> @@ -172,6 +183,17 @@ static int redirector_chr_can_read(void *opaque)
>      return REDIRECTOR_MAX_LEN;
>  }
>
> +static bool filter_redirector_input_active(NetFilterState *nf, bool enable)
> +{
> +    MirrorState *s = FILTER_REDIRECTOR(nf);
> +
> +    if (!enable) {
> +        return false;
> +    }
> +
> +    return runstate_is_running() || s->enable_when_stopped;
> +}
> +
>  static void redirector_chr_read(void *opaque, const uint8_t *buf, int size)
>  {
>      NetFilterState *nf = opaque;
> @@ -208,6 +230,40 @@ static void redirector_chr_event(void *opaque, 
> QEMUChrEvent event)
>      }
>  }
>
> +static void filter_redirector_netdev_read(void *opaque)
> +{
> +    NetFilterState *nf = opaque;
> +    MirrorState *s = FILTER_REDIRECTOR(nf);
> +    struct sockaddr_ll sll;
> +    socklen_t sll_len;
> +    ssize_t len;
> +
> +    if (!s->in_netbuf || s->in_netfd < 0) {
> +        return;
> +    }
> +
> +    for (;;) {
> +        sll_len = sizeof(sll);
> +        len = recvfrom(s->in_netfd, s->in_netbuf, REDIRECTOR_MAX_LEN, 0,
> +                       (struct sockaddr *)&sll, &sll_len);
> +        if (len <= 0) {
> +            break;
> +        }
> +
> +        if (sll.sll_pkttype != PACKET_OUTGOING) {
> +            continue;
> +        }
> +
> +        redirector_to_filter(nf, s->in_netbuf, len);
> +    }
> +
> +    if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK &&
> +        errno != EINTR) {
> +        error_report("filter redirector read in_netdev failed(%s)",
> +                     strerror(errno));
> +    }
> +}
> +
>  static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
>                                           NetClientState *sender,
>                                           unsigned flags,
> @@ -268,7 +324,19 @@ static void filter_redirector_cleanup(NetFilterState *nf)
>
>      qemu_chr_fe_deinit(&s->chr_in, false);
>      qemu_chr_fe_deinit(&s->chr_out, false);
> -    qemu_del_vm_change_state_handler(s->vmsentry);
> +    if (s->vmsentry) {
> +        qemu_del_vm_change_state_handler(s->vmsentry);
> +        s->vmsentry = NULL;
> +    }
> +    if (s->in_netfd >= 0) {
> +        qemu_set_fd_handler(s->in_netfd, NULL, NULL, NULL);
> +        close(s->in_netfd);
> +        s->in_netfd = -1;
> +    }
> +    if (s->out_netfd >= 0) {
> +        close(s->out_netfd);
> +        s->out_netfd = -1;
> +    }
>
>      if (nf->netdev) {
>          nf->netdev->allow_send_when_stopped = 0;
> @@ -320,13 +388,13 @@ 
> filter_redirector_refresh_allow_send_when_stopped(NetFilterState *nf)
>
>      /*
>       * Allow sending when stopped if enable_when_stopped is set and we have
> -     * an outdev. This must be independent of nf->on (status) so that packets
> -     * can still flow through the filter chain to other filters even when 
> this
> -     * redirector is disabled. Otherwise, tap_send() will disable read_poll
> -     * when qemu_can_send_packet() returns false, preventing further packet
> -     * processing.
> +     * a redirector output endpoint and the redirector is enabled.
> +     * Keeping this active while redirector status=off can unexpectedly
> +     * drain packets in migration stop windows and perturb vhost ring state.
>       */
> -    nc->allow_send_when_stopped = (s->enable_when_stopped && s->outdev);
> +    nc->allow_send_when_stopped = (nf->on &&
> +                                   s->enable_when_stopped &&
> +                                   (s->outdev || s->out_netdev));
>  }
>
>  static void filter_redirector_vm_state_change(void *opaque, bool running,
> @@ -335,8 +403,16 @@ static void filter_redirector_vm_state_change(void 
> *opaque, bool running,
>      NetFilterState *nf = opaque;
>      MirrorState *s = FILTER_REDIRECTOR(nf);
>      NetClientState *nc = nf->netdev;
> +    bool active = filter_redirector_input_active(nf, nf->on);
> +
> +    if (s->in_netfd >= 0) {
> +        qemu_set_fd_handler(s->in_netfd,
> +                            active ? filter_redirector_netdev_read : NULL,
> +                            NULL,
> +                            active ? nf : NULL);
> +    }
>
> -    if (!running && s->enable_when_stopped && nc->info->read_poll) {
> +    if (!running && nc && s->enable_when_stopped && nc->info->read_poll) {
>          nc->info->read_poll(nc, true);
>      }
>  }
> @@ -362,21 +438,127 @@ static void 
> filter_redirector_maybe_enable_read_poll(NetFilterState *nf)
>      }
>  }
>
> +static bool filter_redirector_netdev_setup(MirrorState *s, Error **errp)
> +{
> +    struct sockaddr_ll sll = { 0 };
> +    char ifname[IFNAMSIZ] = { 0 };
> +    int ifindex;
> +    int fd;
> +    NetClientState *nc;
> +
> +    if (s->in_netdev) {
> +        int tapfd;
> +        nc = qemu_find_netdev(s->in_netdev);
> +        if (!nc) {
> +            error_setg(errp, "in_netdev '%s' not found", s->in_netdev);
> +            return false;
> +        }
> +
> +        if (nc->info->type != NET_CLIENT_DRIVER_TAP) {
> +            error_setg(errp, "in_netdev '%s' must be a TAP netdev",
> +                       s->in_netdev);
> +            return false;
> +        }

This seems inelegant; the netfilter should not be coupled with a
specific netdev type. Any reason for doing this?

> +
> +        tapfd = tap_get_fd(nc);
> +        if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
> +            error_setg(errp, "failed to resolve TAP ifname for in_netdev 
> '%s'",
> +                       s->in_netdev);
> +            return false;
> +        }
> +    } else if (s->out_netdev) {
> +        nc = qemu_find_netdev(s->out_netdev);
> +        if (!nc) {
> +            error_setg(errp, "out_netdev '%s' not found", s->out_netdev);
> +            return false;
> +        }
> +        /*
> +         * out_netdev always uses AF_PACKET.

I think I don't get this.

> For TAP netdev we resolve the
> +         * interface name from tap fd; for non-TAP netdev we interpret
> +         * out_netdev string as host interface name.
> +         */
> +        if (nc->info->type == NET_CLIENT_DRIVER_TAP) {
> +            int tapfd = tap_get_fd(nc);
> +
> +            if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
> +                error_setg(errp,
> +                           "failed to resolve TAP ifname for out_netdev 
> '%s'",
> +                           s->out_netdev);
> +                return false;
> +            }
> +        } else {
> +            snprintf(ifname, sizeof(ifname), "%s", s->out_netdev);
> +        }
> +    }
> +
> +    ifindex = if_nametoindex(ifname);
> +    if (!ifindex) {
> +        error_setg_errno(errp, errno,
> +                         "failed to resolve ifindex for '%s'", ifname);
> +        return false;
> +    }
> +
> +    fd = qemu_socket(AF_PACKET, SOCK_RAW | SOCK_NONBLOCK, htons(ETH_P_ALL));

There must be a misunderstanding; can we simply reuse the -netdev
socket for packet socket?

Another concern, using packet socket requires CAP_NET_ADMIN etc.

> +    if (fd < 0) {
> +        error_setg_errno(errp, errno, "failed to create AF_PACKET socket");
> +        return false;
> +    }
> +
> +    sll.sll_family = AF_PACKET;
> +    sll.sll_ifindex = ifindex;
> +    sll.sll_protocol = htons(ETH_P_ALL);
> +    if (bind(fd, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
> +        error_setg_errno(errp, errno,
> +                         "failed to bind AF_PACKET socket for ifname '%s'",
> +                         ifname);
> +        close(fd);
> +        return false;
> +    }
> +
> +    if (s->in_netdev) {
> +        s->in_netfd = fd;
> +        g_free(s->in_netbuf);
> +        s->in_netbuf = g_malloc(REDIRECTOR_MAX_LEN);
> +    } else {
> +        s->out_netfd = fd;
> +        s->out_net = nc;
> +    }
> +    return true;
> +}
> +

Thanks


Reply via email to