On Thu, 15 Jan 2026 21:02:14 -0800 Bobby Eshleman wrote:
> diff --git a/Documentation/netlink/specs/netdev.yaml 
> b/Documentation/netlink/specs/netdev.yaml
> index 596c306ce52b..a5301b150663 100644
> --- a/Documentation/netlink/specs/netdev.yaml
> +++ b/Documentation/netlink/specs/netdev.yaml
> @@ -562,6 +562,17 @@ attribute-sets:
>          type: u32
>          checks:
>            min: 1
> +      -
> +        name: autorelease
> +        doc: |
> +          Token autorelease mode. If true (1), leaked tokens are 
> automatically
> +          released when the socket closes. If false (0), leaked tokens are 
> only
> +          released when the dmabuf is torn down. Once a binding is created 
> with
> +          a specific mode, all subsequent bindings system-wide must use the
> +          same mode.
> +
> +          Optional. Defaults to false if not specified.
> +        type: u8

if you plan to have more values - u32, if not - flag
u8 is 8b value + 24b of padding, it's only useful for proto fields

>  operations:
>    list:
> @@ -769,6 +780,7 @@ operations:
>              - ifindex
>              - fd
>              - queues
> +            - autorelease
>          reply:
>            attributes:
>              - id

>  static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
> +static DEFINE_MUTEX(devmem_ar_lock);
> +DEFINE_STATIC_KEY_FALSE(tcp_devmem_ar_key);
> +EXPORT_SYMBOL(tcp_devmem_ar_key);

I don't think you need the export, perhaps move the helper in here in
the first place (while keeping the static inline wrapper when devmem=n)?

> +     if (autorelease)
> +             static_branch_enable(&tcp_devmem_ar_key);

This is user-controlled (non-root), right? So I think we need 
the deferred version of key helpers. 

> -     if (direction == DMA_TO_DEVICE) {
> -             binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
> -                                           sizeof(struct net_iov *),
> -                                           GFP_KERNEL);
> -             if (!binding->vec) {
> -                     err = -ENOMEM;
> -                     goto err_unmap;
> -             }
> +     binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
> +                                   sizeof(struct net_iov *),
> +                                   GFP_KERNEL | __GFP_ZERO);

make it a kvcalloc() while we're touching it, pls

> +     if (!binding->vec) {
> +             err = -ENOMEM;
> +             goto err_unmap;
>       }
>  
>       /* For simplicity we expect to make PAGE_SIZE allocations, but the
> @@ -306,25 +386,41 @@ net_devmem_bind_dmabuf(struct net_device *dev,
>                       niov = &owner->area.niovs[i];
>                       niov->type = NET_IOV_DMABUF;
>                       niov->owner = &owner->area;
> +                     atomic_set(&niov->uref, 0);

Isn't it zero'ed during alloc?

>                       page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
>                                                     
> net_devmem_get_dma_addr(niov));
> -                     if (direction == DMA_TO_DEVICE)
> -                             binding->vec[owner->area.base_virtual / 
> PAGE_SIZE + i] = niov;
> +                     binding->vec[owner->area.base_virtual / PAGE_SIZE + i] 
> = niov;
>               }
>  
>               virtual += len;
>       }
>  

> +     if (info->attrs[NETDEV_A_DMABUF_AUTORELEASE])
> +             autorelease =
> +                     !!nla_get_u8(info->attrs[NETDEV_A_DMABUF_AUTORELEASE]);

nla_get_u8_default() 

>       priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
>       if (IS_ERR(priv))
>               return PTR_ERR(priv);

> +static noinline_for_stack int
> +sock_devmem_dontneed_manual_release(struct sock *sk,
> +                                 struct dmabuf_token *tokens,
> +                                 unsigned int num_tokens)
> +{
> +     struct net_iov *niov;
> +     unsigned int i, j;
> +     netmem_ref netmem;
> +     unsigned int token;
> +     int num_frags = 0;
> +     int ret = 0;
> +
> +     if (!sk->sk_devmem_info.binding)
> +             return -EINVAL;
> +
> +     for (i = 0; i < num_tokens; i++) {
> +             for (j = 0; j < tokens[i].token_count; j++) {
> +                     size_t size = sk->sk_devmem_info.binding->dmabuf->size;
> +
> +                     token = tokens[i].token_start + j;
> +                     if (token >= size / PAGE_SIZE)
> +                             break;
> +
> +                     if (++num_frags > MAX_DONTNEED_FRAGS)
> +                             return ret;
> +
> +                     niov = sk->sk_devmem_info.binding->vec[token];
> +                     if (atomic_dec_and_test(&niov->uref)) {

Don't you need something like "atomic dec non zero and test" ?
refcount has refcount_dec_not_one() 🤔️

> +                             netmem = net_iov_to_netmem(niov);
> +                             WARN_ON_ONCE(!napi_pp_put_page(netmem));
> +                     }
> +                     ret++;
> +             }

>  frag_limit_reached:
> -     xa_unlock_bh(&sk->sk_user_frags);
> +     xa_unlock_bh(&sk->sk_devmem_info.frags);

may be worth separating the sk_devmem_info change out for clarity

>       for (k = 0; k < netmem_num; k++)
>               WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));

> @@ -2503,7 +2506,15 @@ void tcp_v4_destroy_sock(struct sock *sk)
>  
>       tcp_release_user_frags(sk);
>  
> -     xa_destroy(&sk->sk_user_frags);
> +     if (!net_devmem_autorelease_enabled() && sk->sk_devmem_info.binding) {
> +             net_devmem_dmabuf_binding_user_put(sk->sk_devmem_info.binding);
> +             net_devmem_dmabuf_binding_put(sk->sk_devmem_info.binding);
> +             sk->sk_devmem_info.binding = NULL;
> +             WARN_ONCE(!xa_empty(&sk->sk_devmem_info.frags),
> +                       "non-empty xarray discovered in autorelease off 
> mode");
> +     }
> +
> +     xa_destroy(&sk->sk_devmem_info.frags);

Let's wrap this up in a helper that'll live in devmem.c

Reply via email to