[add MST]

On Tue, 29 Nov 2016 12:11:33 -0800, John Fastabend wrote:
> virtio_net XDP support expects receive buffers to be contiguous.
> If this is not the case we enable a slowpath to allow connectivity
> to continue but at a significan performance overhead associated with
> linearizing data. To make it painfully aware to users that XDP is
> running in a degraded mode we throw an xdp buffer error.
> 
> To linearize packets we allocate a page and copy the segments of
> the data, including the header, into it. After this the page can be
> handled by XDP code flow as normal.
> 
> Then depending on the return code the page is either freed or sent
> to the XDP xmit path. There is no attempt to optimize this path.
> 
> Signed-off-by: John Fastabend <john.r.fastab...@intel.com>
> ---
>  drivers/net/virtio_net.c |   70 
> +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 68 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 9604e55..b0ce4ef 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -449,6 +449,56 @@ static struct sk_buff *receive_big(struct net_device 
> *dev,
>       return NULL;
>  }
>  
> +/* The conditions to enable XDP should preclude the underlying device from
> + * sending packets across multiple buffers (num_buf > 1). However per spec
> + * it does not appear to be illegal to do so but rather just against 
> convention.
> + * So in order to avoid making a system unresponsive the packets are pushed
> + * into a page and the XDP program is run. This will be extremely slow and we
> + * push a warning to the user to fix this as soon as possible. Fixing this 
> may
> + * require resolving the underlying hardware to determine why multiple 
> buffers
> + * are being received or simply loading the XDP program in the ingress stack
> + * after the skb is built because there is no advantage to running it here
> + * anymore.
> + */
> +static struct page *xdp_linearize_page(struct receive_queue *rq,
> +                                    u16 num_buf,
> +                                    struct page *p,
> +                                    int offset,
> +                                    unsigned int *len)
> +{
> +     struct page *page = alloc_page(GFP_ATOMIC);
> +     unsigned int page_off = 0;
> +
> +     if (!page)
> +             return NULL;
> +
> +     memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> +     while (--num_buf) {
> +             unsigned int buflen;
> +             unsigned long ctx;
> +             void *buf;
> +             int off;
> +
> +             ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen);
> +             if (unlikely(!ctx))
> +                     goto err_buf;
> +
> +             buf = mergeable_ctx_to_buf_address(ctx);
> +             p = virt_to_head_page(buf);
> +             off = buf - page_address(p);
> +
> +             memcpy(page_address(page) + page_off,
> +                    page_address(p) + off, buflen);
> +             page_off += buflen;

Could malicious user potentially submit a frame bigger than MTU?

> +     }
> +
> +     *len = page_off;
> +     return page;
> +err_buf:
> +     __free_pages(page, 0);
> +     return NULL;
> +}
> +
>  static struct sk_buff *receive_mergeable(struct net_device *dev,
>                                        struct virtnet_info *vi,
>                                        struct receive_queue *rq,
> @@ -469,21 +519,37 @@ static struct sk_buff *receive_mergeable(struct 
> net_device *dev,
>       rcu_read_lock();
>       xdp_prog = rcu_dereference(rq->xdp_prog);
>       if (xdp_prog) {
> +             struct page *xdp_page;
>               u32 act;
>  
>               if (num_buf > 1) {
>                       bpf_warn_invalid_xdp_buffer();
> -                     goto err_xdp;
> +
> +                     /* linearize data for XDP */
> +                     xdp_page = xdp_linearize_page(rq, num_buf,
> +                                                   page, offset, &len);
> +                     if (!xdp_page)
> +                             goto err_xdp;
> +                     offset = len;
> +             } else {
> +                     xdp_page = page;
>               }
>  
> -             act = do_xdp_prog(vi, xdp_prog, page, offset, len);
> +             act = do_xdp_prog(vi, xdp_prog, xdp_page, offset, len);
>               switch (act) {
>               case XDP_PASS:
> +                     if (unlikely(xdp_page != page))
> +                             __free_pages(xdp_page, 0);
>                       break;
>               case XDP_TX:
> +                     if (unlikely(xdp_page != page))
> +                             goto err_xdp;
> +                     rcu_read_unlock();

Only if there is a reason for v4 - this unlock could go to the previous
patch.

Reply via email to