This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
---
devlink: Allow rate node parents from other devlinks

This commit makes use of the building blocks previously added to
implement cross-device rate nodes.

> diff --git a/include/net/devlink.h b/include/net/devlink.h
> index 3038af6ec017e..8d5ad5d4f1d05 100644
> --- a/include/net/devlink.h
> +++ b/include/net/devlink.h
> @@ -1585,6 +1585,11 @@ struct devlink_ops {
>                                   struct devlink_rate *parent,
>                                   void *priv_child, void *priv_parent,
>                                   struct netlink_ext_ack *extack);
> +     /* Indicates if cross-device rate nodes are supported.
> +      * This also requires a shared common ancestor object all devices that
> +      * could share rate nodes are nested in.
> +      */
> +     bool supported_cross_device_rate_nodes;

When a device acting as a cross-device rate parent is destroyed,
devl_rate_nodes_destroy() iterates the shared rate_list to detach its
children. If it encounters a child rate node belonging to a different device,
it invokes the child driver's ops->rate_node_parent_set or
ops->rate_leaf_parent_set callbacks.

Since the cleanup path only holds the parent's devlink lock and the shared
root devlink lock, does this allow the child driver's rate operations to
execute concurrently with other operations on the child device without holding
the child's devlink instance lock?

[ ... ]
> diff --git a/net/devlink/rate.c b/net/devlink/rate.c
> index 1949746fab291..f243cccc95bee 100644
> --- a/net/devlink/rate.c
> +++ b/net/devlink/rate.c
> @@ -30,19 +30,53 @@ devlink_rate_leaf_get_from_info(struct devlink *devlink, 
> struct genl_info *info)
>       return devlink_rate ?: ERR_PTR(-ENODEV);
>  }
>  
> +/* Repeatedly locks the nested-in devlink instances while cross device rate
> + * nodes are supported. Returns the devlink instance where rates should be
> + * stored.
> + */
>  static struct devlink *devl_rate_lock(struct devlink *devlink)
>  {
> -     return devlink;
> +     struct devlink *rate_devlink = devlink;
> +
> +     while (rate_devlink->ops &&
> +            rate_devlink->ops->supported_cross_device_rate_nodes) {
> +             devlink = devlink_nested_in_get_lock(rate_devlink->rel);
> +             if (!devlink)
> +                     break;
> +             rate_devlink = devlink;
> +     }
> +     return rate_devlink;
>  }

[ ... ]

> +/* Repeatedly unlocks the nested-in devlink instances of 'devlink' while 
> cross
> + * device nodes are supported.
> + */
>  static void devl_rate_unlock(struct devlink *devlink)
>  {
> +     if (!devlink || !devlink->ops ||
> +         !devlink->ops->supported_cross_device_rate_nodes)
> +             return;
> +
> +     devl_rate_unlock(devlink_nested_in_get_locked(devlink->rel));
> +     devlink_nested_in_put_unlock(devlink->rel);
>  }

If a parent devlink is not yet registered, devlink_nested_in_get_lock() can
return NULL in devl_rate_lock() and break the loop without taking a lock.

If another thread completes registering the parent devlink concurrently,
devlink_nested_in_get_locked() in devl_rate_unlock() will find it. 

Could this lead to unlocking a mutex that was never acquired and dropping a
reference that was never taken, resulting in a use-after-free or lock
corruption?

[ ... ]
> @@ -320,13 +371,14 @@ devlink_nl_rate_parent_node_set(struct devlink_rate 
> *devlink_rate,
>                               struct genl_info *info,
>                               struct nlattr *nla_parent)
>  {
> -     struct devlink *devlink = devlink_rate->devlink;
> +     struct devlink *devlink = devlink_rate->devlink, *parent_devlink;
>       const char *parent_name = nla_data(nla_parent);
>       const struct devlink_ops *ops = devlink->ops;
>       size_t len = strlen(parent_name);
>       struct devlink_rate *parent;
>       int err = -EOPNOTSUPP;
>  
> +     parent_devlink = devlink_nl_ctx(info)->parent_devlink ? : devlink;
>       parent = devlink_rate->parent;
>  
>       if (parent && !len) {
> @@ -344,7 +396,13 @@ devlink_nl_rate_parent_node_set(struct devlink_rate 
> *devlink_rate,
>               refcount_dec(&parent->refcnt);
>               devlink_rate->parent = NULL;
>       } else if (len) {
> -             parent = devlink_rate_node_get_by_name(devlink, parent_name);
> +             /* parent_devlink (when different than devlink) isn't locked,
> +              * but the rate node devlink instance is, so nobody from the
> +              * same group of devices sharing rates could change the used
> +              * fields or unregister the parent.
> +              */
> +             parent = devlink_rate_node_get_by_name(parent_devlink,
> +                                                    parent_name);

Is parent_devlink validated to ensure it shares the same common rate root
ancestor as devlink?

If an arbitrary unrelated parent_devlink is specified,
devlink_rate_node_get_by_name() could traverse an unrelated parent's
rate_list that was never locked by devl_rate_lock(), leading to a data race.

Additionally, if a rate node is found, its priv pointer is passed to the
target driver's ops->rate_node_parent_set. Could this cause the target driver
to cast a foreign priv pointer to its own private struct type, resulting in
type confusion and memory corruption?

Reply via email to