On Fri, 4 Jan 2013 00:33:34 +0000
Ben Hutchings <[email protected]> wrote:

> From: Greg Rose <[email protected]>
> 
> commit 115c9b81928360d769a76c632bae62d15206a94a upstream.
> 
> Implement a new netlink attribute type IFLA_EXT_MASK.  The mask
> is a 32 bit value that can be used to indicate to the kernel that
> certain extended ifinfo values are requested by the user application.
> At this time the only mask value defined is RTEXT_FILTER_VF to
> indicate that the user wants the ifinfo dump to send information
> about the VFs belonging to the interface.
> 
> This patch fixes a bug in which certain applications do not have
> large enough buffers to accommodate the extra information returned
> by the kernel with large numbers of SR-IOV virtual functions.
> Those applications will not send the new netlink attribute with
> the interface info dump request netlink messages so they will
> not get unexpectedly large request buffers returned by the kernel.
> 
> Modifies the rtnl_calcit function to traverse the list of net
> devices and compute the minimum buffer size that can hold the
> info dumps of all matching devices based upon the filter passed
> in via the new netlink attribute filter mask.  If no filter
> mask is sent then the buffer allocation defaults to NLMSG_GOODSIZE.
> 
> With this change it is possible to add yet to be defined netlink
> attributes to the dump request which should make it fairly extensible
> in the future.
> 
> Signed-off-by: Greg Rose <[email protected]>
> Signed-off-by: David S. Miller <[email protected]>
> [bwh: Backported to 3.0:
>  - Adjust context
>  - Drop the change in do_setlink() that reverts commit f18da1456581
>    ('net: RTNETLINK adjusting values of min_ifinfo_dump_size'), which
>    was never applied here]
> Signed-off-by: Ben Hutchings <[email protected]>

Not sure if these need my ack or not, nevertheless...

Acked-by: Greg Rose <[email protected])

And thanks for doing this work Ben.

- Greg

> ---
>  include/linux/if_link.h   |    1 +
>  include/linux/rtnetlink.h |    3 ++
>  include/net/rtnetlink.h   |    2 +-
>  net/core/rtnetlink.c      |   77
> ++++++++++++++++++++++++++++++++++----------- 4 files changed, 63
> insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/if_link.h b/include/linux/if_link.h
> index 0ee969a..61a48b5 100644
> --- a/include/linux/if_link.h
> +++ b/include/linux/if_link.h
> @@ -137,6 +137,7 @@ enum {
>       IFLA_AF_SPEC,
>       IFLA_GROUP,             /* Group the device belongs to */
>       IFLA_NET_NS_FD,
> +     IFLA_EXT_MASK,          /* Extended info mask, VFs,
> etc */ __IFLA_MAX
>  };
>  
> diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
> index bbad657..5415dfb 100644
> --- a/include/linux/rtnetlink.h
> +++ b/include/linux/rtnetlink.h
> @@ -600,6 +600,9 @@ struct tcamsg {
>  #define TCA_ACT_TAB 1 /* attr type must be >=1 */    
>  #define TCAA_MAX 1
>  
> +/* New extended info filters for IFLA_EXT_MASK */
> +#define RTEXT_FILTER_VF              (1 << 0)
> +
>  /* End of information exported to user level */
>  
>  #ifdef __KERNEL__
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
> index 678f1ff..3702939 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -6,7 +6,7 @@
>  
>  typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
> void *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct
> netlink_callback *); -typedef u16 (*rtnl_calcit_func)(struct sk_buff
> *); +typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct
> nlmsghdr *); 
>  extern int   __rtnl_register(int protocol, int msgtype,
>                               rtnl_doit_func, rtnl_dumpit_func,
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 848de7b..e41ce2a 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -60,7 +60,6 @@ struct rtnl_link {
>  };
>  
>  static DEFINE_MUTEX(rtnl_mutex);
> -static u16 min_ifinfo_dump_size;
>  
>  void rtnl_lock(void)
>  {
> @@ -727,10 +726,11 @@ static void copy_rtnl_link_stats64(void *v,
> const struct rtnl_link_stats64 *b) }
>  
>  /* All VF info */
> -static inline int rtnl_vfinfo_size(const struct net_device *dev)
> +static inline int rtnl_vfinfo_size(const struct net_device *dev,
> +                                u32 ext_filter_mask)
>  {
> -     if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
> -
> +     if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
> +         (ext_filter_mask & RTEXT_FILTER_VF)) {
>               int num_vfs = dev_num_vf(dev->dev.parent);
>               size_t size = nla_total_size(sizeof(struct nlattr));
>               size += nla_total_size(num_vfs * sizeof(struct
> nlattr)); @@ -768,7 +768,8 @@ static size_t rtnl_port_size(const
> struct net_device *dev) return port_self_size;
>  }
>  
> -static noinline size_t if_nlmsg_size(const struct net_device *dev)
> +static noinline size_t if_nlmsg_size(const struct net_device *dev,
> +                                  u32 ext_filter_mask)
>  {
>       return NLMSG_ALIGN(sizeof(struct ifinfomsg))
>              + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
> @@ -786,8 +787,9 @@ static noinline size_t if_nlmsg_size(const struct
> net_device *dev)
>              + nla_total_size(4) /* IFLA_MASTER */
>              + nla_total_size(1) /* IFLA_OPERSTATE */
>              + nla_total_size(1) /* IFLA_LINKMODE */
> -            + nla_total_size(4) /* IFLA_NUM_VF */
> -            + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
> +            + nla_total_size(ext_filter_mask
> +                             & RTEXT_FILTER_VF ? 4 : 0) /*
> IFLA_NUM_VF */
> +            + rtnl_vfinfo_size(dev, ext_filter_mask) /*
> IFLA_VFINFO_LIST */
>              + rtnl_port_size(dev) /* IFLA_VF_PORTS +
> IFLA_PORT_SELF */
>              + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
>              + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
> @@ -870,7 +872,7 @@ static int rtnl_port_fill(struct sk_buff *skb,
> struct net_device *dev) 
>  static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device
> *dev, int type, u32 pid, u32 seq, u32 change,
> -                         unsigned int flags)
> +                         unsigned int flags, u32 ext_filter_mask)
>  {
>       struct ifinfomsg *ifm;
>       struct nlmsghdr *nlh;
> @@ -943,10 +945,11 @@ static int rtnl_fill_ifinfo(struct sk_buff
> *skb, struct net_device *dev, goto nla_put_failure;
>       copy_rtnl_link_stats64(nla_data(attr), stats);
>  
> -     if (dev->dev.parent)
> +     if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF))
>               NLA_PUT_U32(skb, IFLA_NUM_VF,
> dev_num_vf(dev->dev.parent)); 
> -     if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
> +     if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
> +         && (ext_filter_mask & RTEXT_FILTER_VF)) {
>               int i;
>  
>               struct nlattr *vfinfo, *vf;
> @@ -1033,11 +1036,20 @@ static int rtnl_dump_ifinfo(struct sk_buff
> *skb, struct netlink_callback *cb) struct net_device *dev;
>       struct hlist_head *head;
>       struct hlist_node *node;
> +     struct nlattr *tb[IFLA_MAX+1];
> +     u32 ext_filter_mask = 0;
>  
>       s_h = cb->args[0];
>       s_idx = cb->args[1];
>  
>       rcu_read_lock();
> +
> +     nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
> +                 ifla_policy);
> +
> +     if (tb[IFLA_EXT_MASK])
> +             ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
> +
>       for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
>               idx = 0;
>               head = &net->dev_index_head[h];
> @@ -1047,7 +1059,8 @@ static int rtnl_dump_ifinfo(struct sk_buff
> *skb, struct netlink_callback *cb) if (rtnl_fill_ifinfo(skb, dev,
> RTM_NEWLINK, NETLINK_CB(cb->skb).pid,
>                                            cb->nlh->nlmsg_seq, 0,
> -                                          NLM_F_MULTI) <= 0)
> +                                          NLM_F_MULTI,
> +                                          ext_filter_mask) <= 0)
>                               goto out;
>  cont:
>                       idx++;
> @@ -1081,6 +1094,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1]
> = { [IFLA_VF_PORTS]           = { .type = NLA_NESTED },
>       [IFLA_PORT_SELF]        = { .type = NLA_NESTED },
>       [IFLA_AF_SPEC]          = { .type = NLA_NESTED },
> +     [IFLA_EXT_MASK]         = { .type = NLA_U32 },
>  };
>  EXPORT_SYMBOL(ifla_policy);
>  
> @@ -1813,6 +1827,7 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) struct net_device *dev = NULL;
>       struct sk_buff *nskb;
>       int err;
> +     u32 ext_filter_mask = 0;
>  
>       err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX,
> ifla_policy); if (err < 0)
> @@ -1821,6 +1836,9 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) if (tb[IFLA_IFNAME])
>               nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
>  
> +     if (tb[IFLA_EXT_MASK])
> +             ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
> +
>       ifm = nlmsg_data(nlh);
>       if (ifm->ifi_index > 0)
>               dev = __dev_get_by_index(net, ifm->ifi_index);
> @@ -1832,12 +1850,12 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) if (dev == NULL)
>               return -ENODEV;
>  
> -     nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
> +     nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask),
> GFP_KERNEL); if (nskb == NULL)
>               return -ENOBUFS;
>  
>       err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK,
> NETLINK_CB(skb).pid,
> -                            nlh->nlmsg_seq, 0, 0);
> +                            nlh->nlmsg_seq, 0, 0,
> ext_filter_mask); if (err < 0) {
>               /* -EMSGSIZE implies BUG in if_nlmsg_size */
>               WARN_ON(err == -EMSGSIZE);
> @@ -1848,8 +1866,31 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) return err;
>  }
>  
> -static u16 rtnl_calcit(struct sk_buff *skb)
> +static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
>  {
> +     struct net *net = sock_net(skb->sk);
> +     struct net_device *dev;
> +     struct nlattr *tb[IFLA_MAX+1];
> +     u32 ext_filter_mask = 0;
> +     u16 min_ifinfo_dump_size = 0;
> +
> +     nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
> ifla_policy); +
> +     if (tb[IFLA_EXT_MASK])
> +             ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
> +
> +     if (!ext_filter_mask)
> +             return NLMSG_GOODSIZE;
> +     /*
> +      * traverse the list of net devices and compute the minimum
> +      * buffer size based upon the filter mask.
> +      */
> +     list_for_each_entry(dev, &net->dev_base_head, dev_list) {
> +             min_ifinfo_dump_size = max_t(u16,
> min_ifinfo_dump_size,
> +                                          if_nlmsg_size(dev,
> +
> ext_filter_mask));
> +     }
> +
>       return min_ifinfo_dump_size;
>  }
>  
> @@ -1884,13 +1925,11 @@ void rtmsg_ifinfo(int type, struct net_device
> *dev, unsigned change) int err = -ENOBUFS;
>       size_t if_info_size;
>  
> -     skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)),
> GFP_KERNEL);
> +     skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)),
> GFP_KERNEL); if (skb == NULL)
>               goto errout;
>  
> -     min_ifinfo_dump_size = max_t(u16, if_info_size,
> min_ifinfo_dump_size); -
> -     err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
> +     err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
>       if (err < 0) {
>               /* -EMSGSIZE implies BUG in if_nlmsg_size() */
>               WARN_ON(err == -EMSGSIZE);
> @@ -1948,7 +1987,7 @@ static int rtnetlink_rcv_msg(struct sk_buff
> *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP;
>               calcit = rtnl_get_calcit(family, type);
>               if (calcit)
> -                     min_dump_alloc = calcit(skb);
> +                     min_dump_alloc = calcit(skb, nlh);
>  
>               __rtnl_unlock();
>               rtnl = net->rtnl;
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to