On Fri, Feb 13, 2026 at 3:11 PM Cindy Lu <[email protected]> wrote:
>
> Implement the initialization logic for AF_PACKET based netdev
> endpoints in filter-redirector:
>
> 1. filter_redirector_netdev_setup(): Creates and binds AF_PACKET
> sockets to the specified TAP interface. For in_netdev, the socket
> is used to receive packets; for out_netdev, it is used to send.
>
> 2. filter_redirector_netdev_read(): Async handler for reading packets
> from the in_netdev AF_PACKET socket. Packets are forwarded through
> the redirector chain.
>
> 3. Updated cleanup to properly close AF_PACKET sockets and free
> associated buffers.
>
> 4. Modified allow_send_when_stopped logic to consider both chardev
> and netdev output endpoints, and to only enable when the
> redirector is active (status=on).
>
> 5. VM state change handler now manages the AF_PACKET read handler
> activation based on VM running state and enable_when_stopped.
>
> Signed-off-by: Cindy Lu <[email protected]>
> ---
> net/filter-mirror.c | 241 ++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 224 insertions(+), 17 deletions(-)
>
> diff --git a/net/filter-mirror.c b/net/filter-mirror.c
> index 37035f3892..f8001612ec 100644
> --- a/net/filter-mirror.c
> +++ b/net/filter-mirror.c
> @@ -26,6 +26,13 @@
> #include "qemu/sockets.h"
> #include "block/aio-wait.h"
> #include "system/runstate.h"
> +#include "net/tap.h"
> +#include "net/tap_int.h"
> +
> +#include <sys/socket.h>
> +#include <net/if.h>
> +#include <linux/if_packet.h>
> +#include <netinet/if_ether.h>
>
> typedef struct MirrorState MirrorState;
> DECLARE_INSTANCE_CHECKER(MirrorState, FILTER_MIRROR,
> @@ -42,6 +49,10 @@ struct MirrorState {
> char *outdev;
> char *in_netdev;
> char *out_netdev;
> + NetClientState *out_net;
> + int in_netfd;
> + uint8_t *in_netbuf;
> + int out_netfd;
> CharFrontend chr_in;
> CharFrontend chr_out;
> SocketReadState rs;
> @@ -172,6 +183,17 @@ static int redirector_chr_can_read(void *opaque)
> return REDIRECTOR_MAX_LEN;
> }
>
> +static bool filter_redirector_input_active(NetFilterState *nf, bool enable)
> +{
> + MirrorState *s = FILTER_REDIRECTOR(nf);
> +
> + if (!enable) {
> + return false;
> + }
> +
> + return runstate_is_running() || s->enable_when_stopped;
> +}
> +
> static void redirector_chr_read(void *opaque, const uint8_t *buf, int size)
> {
> NetFilterState *nf = opaque;
> @@ -208,6 +230,40 @@ static void redirector_chr_event(void *opaque,
> QEMUChrEvent event)
> }
> }
>
> +static void filter_redirector_netdev_read(void *opaque)
> +{
> + NetFilterState *nf = opaque;
> + MirrorState *s = FILTER_REDIRECTOR(nf);
> + struct sockaddr_ll sll;
> + socklen_t sll_len;
> + ssize_t len;
> +
> + if (!s->in_netbuf || s->in_netfd < 0) {
> + return;
> + }
> +
> + for (;;) {
> + sll_len = sizeof(sll);
> + len = recvfrom(s->in_netfd, s->in_netbuf, REDIRECTOR_MAX_LEN, 0,
> + (struct sockaddr *)&sll, &sll_len);
> + if (len <= 0) {
> + break;
> + }
> +
> + if (sll.sll_pkttype != PACKET_OUTGOING) {
> + continue;
> + }
> +
> + redirector_to_filter(nf, s->in_netbuf, len);
> + }
> +
> + if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK &&
> + errno != EINTR) {
> + error_report("filter redirector read in_netdev failed(%s)",
> + strerror(errno));
> + }
> +}
> +
> static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
> NetClientState *sender,
> unsigned flags,
> @@ -268,7 +324,19 @@ static void filter_redirector_cleanup(NetFilterState *nf)
>
> qemu_chr_fe_deinit(&s->chr_in, false);
> qemu_chr_fe_deinit(&s->chr_out, false);
> - qemu_del_vm_change_state_handler(s->vmsentry);
> + if (s->vmsentry) {
> + qemu_del_vm_change_state_handler(s->vmsentry);
> + s->vmsentry = NULL;
> + }
> + if (s->in_netfd >= 0) {
> + qemu_set_fd_handler(s->in_netfd, NULL, NULL, NULL);
> + close(s->in_netfd);
> + s->in_netfd = -1;
> + }
> + if (s->out_netfd >= 0) {
> + close(s->out_netfd);
> + s->out_netfd = -1;
> + }
>
> if (nf->netdev) {
> nf->netdev->allow_send_when_stopped = 0;
> @@ -320,13 +388,13 @@
> filter_redirector_refresh_allow_send_when_stopped(NetFilterState *nf)
>
> /*
> * Allow sending when stopped if enable_when_stopped is set and we have
> - * an outdev. This must be independent of nf->on (status) so that packets
> - * can still flow through the filter chain to other filters even when
> this
> - * redirector is disabled. Otherwise, tap_send() will disable read_poll
> - * when qemu_can_send_packet() returns false, preventing further packet
> - * processing.
> + * a redirector output endpoint and the redirector is enabled.
> + * Keeping this active while redirector status=off can unexpectedly
> + * drain packets in migration stop windows and perturb vhost ring state.
> */
> - nc->allow_send_when_stopped = (s->enable_when_stopped && s->outdev);
> + nc->allow_send_when_stopped = (nf->on &&
> + s->enable_when_stopped &&
> + (s->outdev || s->out_netdev));
> }
>
> static void filter_redirector_vm_state_change(void *opaque, bool running,
> @@ -335,8 +403,16 @@ static void filter_redirector_vm_state_change(void
> *opaque, bool running,
> NetFilterState *nf = opaque;
> MirrorState *s = FILTER_REDIRECTOR(nf);
> NetClientState *nc = nf->netdev;
> + bool active = filter_redirector_input_active(nf, nf->on);
> +
> + if (s->in_netfd >= 0) {
> + qemu_set_fd_handler(s->in_netfd,
> + active ? filter_redirector_netdev_read : NULL,
> + NULL,
> + active ? nf : NULL);
> + }
>
> - if (!running && s->enable_when_stopped && nc->info->read_poll) {
> + if (!running && nc && s->enable_when_stopped && nc->info->read_poll) {
> nc->info->read_poll(nc, true);
> }
> }
> @@ -362,21 +438,127 @@ static void
> filter_redirector_maybe_enable_read_poll(NetFilterState *nf)
> }
> }
>
> +static bool filter_redirector_netdev_setup(MirrorState *s, Error **errp)
> +{
> + struct sockaddr_ll sll = { 0 };
> + char ifname[IFNAMSIZ] = { 0 };
> + int ifindex;
> + int fd;
> + NetClientState *nc;
> +
> + if (s->in_netdev) {
> + int tapfd;
> + nc = qemu_find_netdev(s->in_netdev);
> + if (!nc) {
> + error_setg(errp, "in_netdev '%s' not found", s->in_netdev);
> + return false;
> + }
> +
> + if (nc->info->type != NET_CLIENT_DRIVER_TAP) {
> + error_setg(errp, "in_netdev '%s' must be a TAP netdev",
> + s->in_netdev);
> + return false;
> + }
This seems inelegant; the netfilter should not be coupled with a
specific netdev type. Any reason for doing this?
> +
> + tapfd = tap_get_fd(nc);
> + if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
> + error_setg(errp, "failed to resolve TAP ifname for in_netdev
> '%s'",
> + s->in_netdev);
> + return false;
> + }
> + } else if (s->out_netdev) {
> + nc = qemu_find_netdev(s->out_netdev);
> + if (!nc) {
> + error_setg(errp, "out_netdev '%s' not found", s->out_netdev);
> + return false;
> + }
> + /*
> + * out_netdev always uses AF_PACKET.
I think I don't get this.
> For TAP netdev we resolve the
> + * interface name from tap fd; for non-TAP netdev we interpret
> + * out_netdev string as host interface name.
> + */
> + if (nc->info->type == NET_CLIENT_DRIVER_TAP) {
> + int tapfd = tap_get_fd(nc);
> +
> + if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
> + error_setg(errp,
> + "failed to resolve TAP ifname for out_netdev
> '%s'",
> + s->out_netdev);
> + return false;
> + }
> + } else {
> + snprintf(ifname, sizeof(ifname), "%s", s->out_netdev);
> + }
> + }
> +
> + ifindex = if_nametoindex(ifname);
> + if (!ifindex) {
> + error_setg_errno(errp, errno,
> + "failed to resolve ifindex for '%s'", ifname);
> + return false;
> + }
> +
> + fd = qemu_socket(AF_PACKET, SOCK_RAW | SOCK_NONBLOCK, htons(ETH_P_ALL));
There must be a misunderstanding; can we simply reuse the -netdev
socket for packet socket?
Another concern, using packet socket requires CAP_NET_ADMIN etc.
> + if (fd < 0) {
> + error_setg_errno(errp, errno, "failed to create AF_PACKET socket");
> + return false;
> + }
> +
> + sll.sll_family = AF_PACKET;
> + sll.sll_ifindex = ifindex;
> + sll.sll_protocol = htons(ETH_P_ALL);
> + if (bind(fd, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
> + error_setg_errno(errp, errno,
> + "failed to bind AF_PACKET socket for ifname '%s'",
> + ifname);
> + close(fd);
> + return false;
> + }
> +
> + if (s->in_netdev) {
> + s->in_netfd = fd;
> + g_free(s->in_netbuf);
> + s->in_netbuf = g_malloc(REDIRECTOR_MAX_LEN);
> + } else {
> + s->out_netfd = fd;
> + s->out_net = nc;
> + }
> + return true;
> +}
> +
Thanks