On 08/17/2018 04:08 PM, Tushar Dave wrote:
> Like sockmap (sk_msg), socksg also deals with struct scatterlist
> therefore socksg programs can use existing bpf helper bpf_msg_pull_data
> to access packet data contained in struct scatterlist. While doing some
> prelimnary testing, there are couple of issues found with
> bpf_msg_pull_data that are fixed in this patch.
> 
> Also, there cannot be more than MAX_SKB_FRAGS entries in sg_data
> therefore any checks for sg entry more than MAX_SKB_FRAGS in
> bpf_msg_pull_data() is removed.

In sockmap the scatterlist is used as a ring so the MAX_SKB_FRAGS
check is needed to keep searching through the ring when sg_start
is non-zero.

> 
> Besides that, I also ran into issues while put_page() is invoked.
> e.g.
> [ 450.568723] BUG: Bad page state in process swapper/10 pfn:2021540
> [ 450.575632] page:ffffea0080855000 count:0 mapcount:0
> mapping:ffff88103d006840 index:0xffff882021540000 compound_mapcount: 0
> [ 450.588069] flags: 0x6fffff80008100(slab|head)
> [ 450.593033] raw: 006fffff80008100 dead000000000100 dead000000000200
> ffff88103d006840
> [ 450.601683] raw: ffff882021540000 0000000080080007 00000000ffffffff
> 0000000000000000
> [ 450.610337] page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
> [ 450.617530] bad because of flags: 0x100(slab)
> 
> To avoid above issue, currently put_page() is disabled in this patch
> temporarily. I am working on alternatives so that page allocated via
> slab (in this case) can be freed without any issue.> 
> Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com>
> Acked-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
> ---
>  net/core/filter.c | 61 
> +++++++++++++++++++++++++++++--------------------------
>  1 file changed, 32 insertions(+), 29 deletions(-)
> 
> diff --git a/net/core/filter.c b/net/core/filter.c
> index e427c8e..cc52baa 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2316,7 +2316,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff 
> *msg)
>  BPF_CALL_4(bpf_msg_pull_data,
>          struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
>  {
> -     unsigned int len = 0, offset = 0, copy = 0;
> +     unsigned int len = 0, offset = 0, copy = 0, off = 0;
>       struct scatterlist *sg = msg->sg_data;
>       int first_sg, last_sg, i, shift;
>       unsigned char *p, *to, *from;
> @@ -2330,22 +2330,28 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff 
> *msg)
>       i = msg->sg_start;
>       do {
>               len = sg[i].length;
> -             offset += len;
>               if (start < offset + len)
>                       break;
> +             offset += len;

This looks like a generic fix unrelated to this series.
Can you send that as a bugfix?

>               i++;
> -             if (i == MAX_SKB_FRAGS)
> -                     i = 0;
> -     } while (i != msg->sg_end);
> +     } while (i <= msg->sg_end);
>  

As noted above the MAX_SKB_FRAGS check is needed because
sg_start can be non-zero and sg_end < st_start. In these
cases we need to search the entries at the start of the
array (being used as a ring).

> +     /* return error if start is out of range */
>       if (unlikely(start >= offset + len))
>               return -EINVAL;
>  
> -     if (!msg->sg_copy[i] && bytes <= len)
> -             goto out;
> +     /* return error if i is last entry in sglist and end is out of range */
> +     if (msg->sg_copy[i] && end > offset + len)
> +             return -EINVAL>  
>       first_sg = i;
>  
> +     /* if i is not last entry in sg list and end (i.e start + bytes) is
> +      * within this sg[i] then goto out and calculate data and data_end
> +      */
> +     if (!msg->sg_copy[i] && end <= offset + len)
> +             goto out;
> +>    /* At this point we need to linearize multiple scatterlist
>        * elements or a single shared page. Either way we need to
>        * copy into a linear buffer exclusively owned by BPF. Then
> @@ -2359,11 +2365,14 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff 
> *msg)
>       do {
>               copy += sg[i].length;
>               i++;
> -             if (i == MAX_SKB_FRAGS)
> -                     i = 0;

same as above, need to keep.

> -             if (bytes < copy)
> +             if (end < copy)
>                       break;
> -     } while (i != msg->sg_end);
> +     } while (i <= msg->sg_end);
> +
> +     /* return error if i is last entry in sglist and end is out of range */
> +     if (i > msg->sg_end && end > offset + copy)
> +             return -EINVAL;
> +
>       last_sg = i;
>  
>       if (unlikely(copy < end - start))
> @@ -2373,23 +2382,25 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff 
> *msg)
>       if (unlikely(!page))
>               return -ENOMEM;
>       p = page_address(page);
> -     offset = 0;
>  
>       i = first_sg;
>       do {
>               from = sg_virt(&sg[i]);
>               len = sg[i].length;
> -             to = p + offset;
> +             to = p + off;

Not really sure if the change from offset->off is needed. Looks
like it just makes a bigger diff.

>  
>               memcpy(to, from, len);
> -             offset += len;
> +             off += len;
>               sg[i].length = 0;
> -             put_page(sg_page(&sg[i]));
> +             /* if original page is allocated via slab then put_page
> +              * causes error BUG: Bad page state in process. So temporarily
> +              * disabled put_page.
> +              * Todo: fix it
> +              */
> +             //put_page(sg_page(&sg[i]));
>  
>               i++;
> -             if (i == MAX_SKB_FRAGS)
> -                     i = 0;
> -     } while (i != last_sg);
> +     } while (i < last_sg);
>  
>       sg[first_sg].length = copy;
>       sg_set_page(&sg[first_sg], page, copy, 0);
> @@ -2406,12 +2417,8 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff 
> *msg)
>       do {
>               int move_from;
>  
> -             if (i + shift >= MAX_SKB_FRAGS)
> -                     move_from = i + shift - MAX_SKB_FRAGS;
> -             else
> -                     move_from = i + shift;
> -

Need to keep same as above.

> -             if (move_from == msg->sg_end)
> +             move_from = i + shift;> +               if (move_from > 
> msg->sg_end)
>                       break;
>  
>               sg[i] = sg[move_from];
> @@ -2420,14 +2427,10 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff 
> *msg)
>               sg[move_from].offset = 0;
>  
>               i++;
> -             if (i == MAX_SKB_FRAGS)
> -                     i = 0;
>       } while (1);
>       msg->sg_end -= shift;
> -     if (msg->sg_end < 0)
> -             msg->sg_end += MAX_SKB_FRAGS;
>  out:
> -     msg->data = sg_virt(&sg[i]) + start - offset;
> +     msg->data = sg_virt(&sg[first_sg]) + start - offset;
>       msg->data_end = msg->data + bytes;
>  
>       return 0;
> 

Thanks,
John

Reply via email to