[PATCH net-next v3 0/4] ipmr/ip6mr: add Netlink notifications on cache reports

2017-06-20 Thread Julien Gomes
Currently, all ipmr/ip6mr cache reports are sent through the
mroute/mroute6 socket only.
This forces the use of a single socket for mroute programming, cache
reports and, regarding ipmr, IGMP messages without Router Alert option
reception.

The present patches are aiming to send Netlink notifications in addition
to the existing igmpmsg/mrt6msg to give user programs a way to handle
cache reports in parallel with multiple sockets other than the
mroute/mroute6 socket.

Changes in v2:
- Changed attributes naming from {IPMRA,IP6MRA}_CACHEREPORTA_* to
  {IPMRA,IP6MRA}_CREPORT_*
- Improved packet data copy to handle non-linear packets in
  ipmr/ip6mr cache report Netlink notification creation
- Added two rtnetlink groups with restricted-binding
- Changed cache report notified groups from RTNL_{IPV4,IPV6}_MROUTE to
  the new restricted groups in ipmr/ip6mr

Changes in v3:
- Put message size calculation for {igmp,mrt6}msg_netlink_event in separate
  functions
- Increased vif id attributes size from u8 to u32

Julien Gomes (4):
  rtnetlink: add NEWCACHEREPORT message type
  rtnetlink: add restricted rtnl groups for ipv4 and ipv6 mroute
  ipmr: add netlink notifications on igmpmsg cache reports
  ip6mr: add netlink notifications on mrt6msg cache reports

 include/uapi/linux/mroute.h| 12 +++
 include/uapi/linux/mroute6.h   | 12 +++
 include/uapi/linux/rtnetlink.h |  7 +
 net/core/rtnetlink.c   | 13 
 net/ipv4/ipmr.c| 69 ++--
 net/ipv6/ip6mr.c   | 71 --
 security/selinux/nlmsgtab.c|  3 +-
 7 files changed, 182 insertions(+), 5 deletions(-)

-- 
2.13.1



[PATCH net-next v3 1/4] rtnetlink: add NEWCACHEREPORT message type

2017-06-20 Thread Julien Gomes
New NEWCACHEREPORT message type to be used for cache reports sent
via Netlink, effectively allowing splitting cache report reception from
mroute programming.

Suggested-by: Ryan Halbrook <halbr...@arista.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
Reviewed-by: Nikolay Aleksandrov <niko...@cumulusnetworks.com>
---
 include/uapi/linux/rtnetlink.h | 3 +++
 security/selinux/nlmsgtab.c| 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 564790e854f7..cd1afb900929 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -146,6 +146,9 @@ enum {
RTM_GETSTATS = 94,
 #define RTM_GETSTATS RTM_GETSTATS
 
+   RTM_NEWCACHEREPORT = 96,
+#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
+
__RTM_MAX,
 #define RTM_MAX(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 5aeaf30b7a13..7b7433a1a34c 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -79,6 +79,7 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
{ RTM_GETNSID,  NETLINK_ROUTE_SOCKET__NLMSG_READ  },
{ RTM_NEWSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ },
{ RTM_GETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+   { RTM_NEWCACHEREPORT,   NETLINK_ROUTE_SOCKET__NLMSG_READ },
 };
 
 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -158,7 +159,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 
*perm)
switch (sclass) {
case SECCLASS_NETLINK_ROUTE_SOCKET:
/* RTM_MAX always point to RTM_SET, ie RTM_NEWxxx + 3 */
-   BUILD_BUG_ON(RTM_MAX != (RTM_NEWSTATS + 3));
+   BUILD_BUG_ON(RTM_MAX != (RTM_NEWCACHEREPORT + 3));
err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 sizeof(nlmsg_route_perms));
break;
-- 
2.13.1



[PATCH net-next v3 3/4] ipmr: add netlink notifications on igmpmsg cache reports

2017-06-20 Thread Julien Gomes
Add Netlink notifications on cache reports in ipmr, in addition to the
existing igmpmsg sent to mroute_sk.
Send RTM_NEWCACHEREPORT notifications to RTNLGRP_IPV4_MROUTE_R.

MSGTYPE, VIF_ID, SRC_ADDR and DST_ADDR Netlink attributes contain the
same data as their equivalent fields in the igmpmsg header.
PKT attribute is the packet sent to mroute_sk, without the added igmpmsg
header.

Suggested-by: Ryan Halbrook <halbr...@arista.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
---
 include/uapi/linux/mroute.h | 12 
 net/ipv4/ipmr.c | 69 +++--
 2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index f904367c0cee..e8e5041dea8e 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -152,6 +152,18 @@ enum {
 };
 #define IPMRA_VIFA_MAX (__IPMRA_VIFA_MAX - 1)
 
+/* ipmr netlink cache report attributes */
+enum {
+   IPMRA_CREPORT_UNSPEC,
+   IPMRA_CREPORT_MSGTYPE,
+   IPMRA_CREPORT_VIF_ID,
+   IPMRA_CREPORT_SRC_ADDR,
+   IPMRA_CREPORT_DST_ADDR,
+   IPMRA_CREPORT_PKT,
+   __IPMRA_CREPORT_MAX
+};
+#define IPMRA_CREPORT_MAX (__IPMRA_CREPORT_MAX - 1)
+
 /* That's all usermode folks */
 
 #define MFC_ASSERT_THRESH (3*HZ)   /* Maximal freq. of asserts */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3e7454aa49e8..a1d521be612b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -109,6 +109,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct 
sk_buff *skb,
  struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 int cmd);
+static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(unsigned long arg);
 
@@ -995,8 +996,7 @@ static void ipmr_cache_resolve(struct net *net, struct 
mr_table *mrt,
}
 }
 
-/* Bounce a cache query up to mrouted. We could use netlink for this but 
mrouted
- * expects the following bizarre scheme.
+/* Bounce a cache query up to mrouted and netlink.
  *
  * Called under mrt_lock.
  */
@@ -1062,6 +1062,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
return -EINVAL;
}
 
+   igmpmsg_netlink_event(mrt, skb);
+
/* Deliver to mrouted */
ret = sock_queue_rcv_skb(mroute_sk, skb);
rcu_read_unlock();
@@ -2341,6 +2343,69 @@ static void mroute_netlink_event(struct mr_table *mrt, 
struct mfc_cache *mfc,
rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
 }
 
+static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
+{
+   size_t len =
+   NLMSG_ALIGN(sizeof(struct rtgenmsg))
+   + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */
+   + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */
+   + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */
+   + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */
+   /* IPMRA_CREPORT_PKT */
+   + nla_total_size(payloadlen)
+   ;
+
+   return len;
+}
+
+static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
+{
+   struct net *net = read_pnet(>net);
+   struct nlmsghdr *nlh;
+   struct rtgenmsg *rtgenm;
+   struct igmpmsg *msg;
+   struct sk_buff *skb;
+   struct nlattr *nla;
+   int payloadlen;
+
+   payloadlen = pkt->len - sizeof(struct igmpmsg);
+   msg = (struct igmpmsg *)skb_network_header(pkt);
+
+   skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC);
+   if (!skb)
+   goto errout;
+
+   nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
+   sizeof(struct rtgenmsg), 0);
+   if (!nlh)
+   goto errout;
+   rtgenm = nlmsg_data(nlh);
+   rtgenm->rtgen_family = RTNL_FAMILY_IPMR;
+   if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) ||
+   nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) ||
+   nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR,
+   msg->im_src.s_addr) ||
+   nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR,
+   msg->im_dst.s_addr))
+   goto nla_put_failure;
+
+   nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen);
+   if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg),
+ nla_data(nla), payloadlen))
+   goto nla_put_failure;
+
+   nlmsg_end(skb, nlh);
+
+   rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC);
+   return;
+
+nla_put_failure:
+   nlmsg_cancel(skb, nlh);
+errout:
+   kfree_skb(skb);
+   rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
+}

[PATCH net-next v3 4/4] ip6mr: add netlink notifications on mrt6msg cache reports

2017-06-20 Thread Julien Gomes
Add Netlink notifications on cache reports in ip6mr, in addition to the
existing mrt6msg sent to mroute6_sk.
Send RTM_NEWCACHEREPORT notifications to RTNLGRP_IPV6_MROUTE_R.

MSGTYPE, MIF_ID, SRC_ADDR and DST_ADDR Netlink attributes contain the
same data as their equivalent fields in the mrt6msg header.
PKT attribute is the packet sent to mroute6_sk, without the added
mrt6msg header.

Suggested-by: Ryan Halbrook <halbr...@arista.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
---
 include/uapi/linux/mroute6.h | 12 
 net/ipv6/ip6mr.c | 71 ++--
 2 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index ed5721148768..e4746816c855 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -133,4 +133,16 @@ struct mrt6msg {
struct in6_addr im6_src, im6_dst;
 };
 
+/* ip6mr netlink cache report attributes */
+enum {
+   IP6MRA_CREPORT_UNSPEC,
+   IP6MRA_CREPORT_MSGTYPE,
+   IP6MRA_CREPORT_MIF_ID,
+   IP6MRA_CREPORT_SRC_ADDR,
+   IP6MRA_CREPORT_DST_ADDR,
+   IP6MRA_CREPORT_PKT,
+   __IP6MRA_CREPORT_MAX
+};
+#define IP6MRA_CREPORT_MAX (__IP6MRA_CREPORT_MAX - 1)
+
 #endif /* _UAPI__LINUX_MROUTE6_H */
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index b0e2bf1f4212..7454850f2098 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -116,6 +116,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, 
struct sk_buff *skb,
   struct mfc6_cache *c, struct rtmsg *rtm);
 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
  int cmd);
+static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
   struct netlink_callback *cb);
 static void mroute_clean_tables(struct mr6_table *mrt, bool all);
@@ -1125,8 +1126,7 @@ static void ip6mr_cache_resolve(struct net *net, struct 
mr6_table *mrt,
 }
 
 /*
- * Bounce a cache query up to pim6sd. We could use netlink for this but 
pim6sd
- * expects the following bizarre scheme.
+ * Bounce a cache query up to pim6sd and netlink.
  *
  * Called under mrt_lock.
  */
@@ -1208,6 +1208,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, 
struct sk_buff *pkt,
return -EINVAL;
}
 
+   mrt6msg_netlink_event(mrt, skb);
+
/*
 *  Deliver to user space multicast routing algorithms
 */
@@ -2457,6 +2459,71 @@ static void mr6_netlink_event(struct mr6_table *mrt, 
struct mfc6_cache *mfc,
rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
 }
 
+static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
+{
+   size_t len =
+   NLMSG_ALIGN(sizeof(struct rtgenmsg))
+   + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
+   + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
+   /* IP6MRA_CREPORT_SRC_ADDR */
+   + nla_total_size(sizeof(struct in6_addr))
+   /* IP6MRA_CREPORT_DST_ADDR */
+   + nla_total_size(sizeof(struct in6_addr))
+   /* IP6MRA_CREPORT_PKT */
+   + nla_total_size(payloadlen)
+   ;
+
+   return len;
+}
+
+static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+{
+   struct net *net = read_pnet(>net);
+   struct nlmsghdr *nlh;
+   struct rtgenmsg *rtgenm;
+   struct mrt6msg *msg;
+   struct sk_buff *skb;
+   struct nlattr *nla;
+   int payloadlen;
+
+   payloadlen = pkt->len - sizeof(struct mrt6msg);
+   msg = (struct mrt6msg *)skb_transport_header(pkt);
+
+   skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
+   if (!skb)
+   goto errout;
+
+   nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
+   sizeof(struct rtgenmsg), 0);
+   if (!nlh)
+   goto errout;
+   rtgenm = nlmsg_data(nlh);
+   rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
+   if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
+   nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
+   nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
+>im6_src) ||
+   nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
+>im6_dst))
+   goto nla_put_failure;
+
+   nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
+   if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
+ nla_data(nla), payloadlen))
+   goto nla_put_failure;
+
+   nlmsg_end(skb, nlh);
+
+   rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
+   return;
+
+nla_put_

[PATCH net-next v3 2/4] rtnetlink: add restricted rtnl groups for ipv4 and ipv6 mroute

2017-06-20 Thread Julien Gomes
Add RTNLGRP_{IPV4,IPV6}_MROUTE_R as two new restricted groups for the
NETLINK_ROUTE family.
Binding to these groups specifically requires CAP_NET_ADMIN to allow
multicast of sensitive messages (e.g. mroute cache reports).

Suggested-by: Nikolay Aleksandrov <niko...@cumulusnetworks.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
Signed-off-by: Nikolay Aleksandrov <niko...@cumulusnetworks.com>
---
 include/uapi/linux/rtnetlink.h |  4 
 net/core/rtnetlink.c   | 13 +
 2 files changed, 17 insertions(+)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cd1afb900929..d148505010a7 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -669,6 +669,10 @@ enum rtnetlink_groups {
 #define RTNLGRP_NSID   RTNLGRP_NSID
RTNLGRP_MPLS_NETCONF,
 #define RTNLGRP_MPLS_NETCONF   RTNLGRP_MPLS_NETCONF
+   RTNLGRP_IPV4_MROUTE_R,
+#define RTNLGRP_IPV4_MROUTE_R  RTNLGRP_IPV4_MROUTE_R
+   RTNLGRP_IPV6_MROUTE_R,
+#define RTNLGRP_IPV6_MROUTE_R  RTNLGRP_IPV6_MROUTE_R
__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX(__RTNLGRP_MAX - 1)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3aa57848a895..4aefa5a2625f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4218,6 +4218,18 @@ static void rtnetlink_rcv(struct sk_buff *skb)
rtnl_unlock();
 }
 
+static int rtnetlink_bind(struct net *net, int group)
+{
+   switch (group) {
+   case RTNLGRP_IPV4_MROUTE_R:
+   case RTNLGRP_IPV6_MROUTE_R:
+   if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+   return -EPERM;
+   break;
+   }
+   return 0;
+}
+
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, 
void *ptr)
 {
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -4252,6 +4264,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
.input  = rtnetlink_rcv,
.cb_mutex   = _mutex,
.flags  = NL_CFG_F_NONROOT_RECV,
+   .bind   = rtnetlink_bind,
};
 
sk = netlink_kernel_create(net, NETLINK_ROUTE, );
-- 
2.13.1



[PATCH net-next 1/2] ipmr: restrict mroute "queue full" warning to related error values

2017-06-21 Thread Julien Gomes
When sending a cache report on mroute_sk, mroute will emit a
"pending queue full" warning for every error value returned by
sock_queue_rcv_skb().
This warning can be misleading, for example on the EPERM error value
that sk_filter() can return.

Restricting this warning to only ENOMEM or ENOBUFS seems more
appropriate.

Signed-off-by: Julien Gomes <jul...@arista.com>
---
 net/ipv4/ipmr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a1d521be612b..ace12cddb9de 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1068,7 +1068,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
ret = sock_queue_rcv_skb(mroute_sk, skb);
rcu_read_unlock();
if (ret < 0) {
-   net_warn_ratelimited("mroute: pending queue full, dropping 
entries\n");
+   if (ret == -ENOMEM || ret == -ENOBUFS)
+   net_warn_ratelimited("mroute: pending queue full, 
dropping entries\n");
kfree_skb(skb);
}
 
-- 
2.13.1



[PATCH net-next 2/2] ip6mr: restrict mroute6 "queue full" warning to related error values

2017-06-21 Thread Julien Gomes
When sending a cache report on mroute6_sk, mroute6 will emit a
"pending queue full" warning for every error value returned by
sock_queue_rcv_skb().
This warning can be misleading, for example on the EPERM error value
that sk_filter() can return.

Restricting this warning to only ENOMEM or ENOBUFS seems more
appropriate.

Signed-off-by: Julien Gomes <jul...@arista.com>
---
 net/ipv6/ip6mr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7454850f2098..62fe5fd64f22 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1215,7 +1215,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, 
struct sk_buff *pkt,
 */
ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
if (ret < 0) {
-   net_warn_ratelimited("mroute6: pending queue full, dropping 
entries\n");
+   if (ret == -ENOMEM || ret == -ENOBUFS)
+   net_warn_ratelimited("mroute6: pending queue full, 
dropping entries\n");
kfree_skb(skb);
}
 
-- 
2.13.1



Re: [PATCH net-next 1/2] ipmr: restrict mroute "queue full" warning to related error values

2017-06-23 Thread Julien Gomes
On 06/23/2017 11:47 AM, David Miller wrote:

> From: Julien Gomes <jul...@arista.com>
> Date: Fri, 23 Jun 2017 10:52:26 -0700
>
>> On 06/23/2017 10:39 AM, David Miller wrote:
>>
>>> From: Julien Gomes <jul...@arista.com>
>>> Date: Wed, 21 Jun 2017 10:58:10 -0700
>>>
>>>> When sending a cache report on mroute_sk, mroute will emit a
>>>> "pending queue full" warning for every error value returned by
>>>> sock_queue_rcv_skb().
>>>> This warning can be misleading, for example on the EPERM error value
>>>> that sk_filter() can return.
>>>>
>>>> Restricting this warning to only ENOMEM or ENOBUFS seems more
>>>> appropriate.
>>>>
>>>> Signed-off-by: Julien Gomes <jul...@arista.com>
>>> Incorrect, no other error codes are possible.
>>>
>>> We never attach a socket filter to these kernel internal sockets,
>>> therefore sk_filter() is not even applicable in this analysis.
>>>
>>> Therefore, -ENOBUFS and -ENOMEM are the only errors we can ever see
>>> returned from sock_queue_rcv_skb().
>>>
>>> This goes for your second patch as well.
>> Up to now I would agree, but now that cache reports are also sent
>> through Netlink, wouldn't it make sense to allow the user of mroute_sk
>> to attach a filter to it in order to not receive cache reports on it?
> There is not visibility of this socket outside of the kernel.

Hmm, maybe we are not talking about the same thing.

The value of this socket pointer is set by the MRT_INIT sockopt.
The socket is definitely visible outside of the kernel as it is first
created and used by the user for kernel-user communications
via the sockopts and the messages sent by the kernel to the user
through it.


The typical user setup up to now was to create this socket,
MRT_INIT to register it in ipmr, and handle the incoming packets,
including the cache reports.

Now that the cache reports are also sent through another medium,
the user should be able to decide whether it also wants the
reports on this socket, which, once again, it is already using.
And if the user wants to get the reports only through Netlink,
the kernel will currently emit those unrelated warnings.

Once again, we are not in the case of a purely internal kernel socket,
as this socket is used for internal kernel-user communications.

-- 
Julien Gomes



Re: [PATCH net-next 1/2] ipmr: restrict mroute "queue full" warning to related error values

2017-06-23 Thread Julien Gomes
On 06/23/2017 10:39 AM, David Miller wrote:

> From: Julien Gomes <jul...@arista.com>
> Date: Wed, 21 Jun 2017 10:58:10 -0700
>
>> When sending a cache report on mroute_sk, mroute will emit a
>> "pending queue full" warning for every error value returned by
>> sock_queue_rcv_skb().
>> This warning can be misleading, for example on the EPERM error value
>> that sk_filter() can return.
>>
>> Restricting this warning to only ENOMEM or ENOBUFS seems more
>> appropriate.
>>
>> Signed-off-by: Julien Gomes <jul...@arista.com>
> Incorrect, no other error codes are possible.
>
> We never attach a socket filter to these kernel internal sockets,
> therefore sk_filter() is not even applicable in this analysis.
>
> Therefore, -ENOBUFS and -ENOMEM are the only errors we can ever see
> returned from sock_queue_rcv_skb().
>
> This goes for your second patch as well.

Up to now I would agree, but now that cache reports are also sent
through Netlink, wouldn't it make sense to allow the user of mroute_sk
to attach a filter to it in order to not receive cache reports on it?

I agree this is not crucial in any way, but this could be a way to let
the user program choose whether it receives the reports through one
of the mediums, and not inevitably both.

-- 
Julien Gomes



[PATCH net-next v2 4/4] ip6mr: add netlink notifications on mrt6msg cache reports

2017-06-19 Thread Julien Gomes
Add Netlink notifications on cache reports in ip6mr, in addition to the
existing mrt6msg sent to mroute6_sk.
Send RTM_NEWCACHEREPORT notifications to RTNLGRP_IPV6_MROUTE_R.

MSGTYPE, MIF_ID, SRC_ADDR and DST_ADDR Netlink attributes contain the
same data as their equivalent fields in the mrt6msg header.
PKT attribute is the packet sent to mroute6_sk, without the added
mrt6msg header.

Suggested-by: Ryan Halbrook <halbr...@arista.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
---
 include/uapi/linux/mroute6.h | 12 
 net/ipv6/ip6mr.c | 67 ++--
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index ed5721148768..e4746816c855 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -133,4 +133,16 @@ struct mrt6msg {
struct in6_addr im6_src, im6_dst;
 };
 
+/* ip6mr netlink cache report attributes */
+enum {
+   IP6MRA_CREPORT_UNSPEC,
+   IP6MRA_CREPORT_MSGTYPE,
+   IP6MRA_CREPORT_MIF_ID,
+   IP6MRA_CREPORT_SRC_ADDR,
+   IP6MRA_CREPORT_DST_ADDR,
+   IP6MRA_CREPORT_PKT,
+   __IP6MRA_CREPORT_MAX
+};
+#define IP6MRA_CREPORT_MAX (__IP6MRA_CREPORT_MAX - 1)
+
 #endif /* _UAPI__LINUX_MROUTE6_H */
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index b0e2bf1f4212..28a1fb49f12e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -116,6 +116,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, 
struct sk_buff *skb,
   struct mfc6_cache *c, struct rtmsg *rtm);
 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
  int cmd);
+static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
   struct netlink_callback *cb);
 static void mroute_clean_tables(struct mr6_table *mrt, bool all);
@@ -1125,8 +1126,7 @@ static void ip6mr_cache_resolve(struct net *net, struct 
mr6_table *mrt,
 }
 
 /*
- * Bounce a cache query up to pim6sd. We could use netlink for this but 
pim6sd
- * expects the following bizarre scheme.
+ * Bounce a cache query up to pim6sd and netlink.
  *
  * Called under mrt_lock.
  */
@@ -1208,6 +1208,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, 
struct sk_buff *pkt,
return -EINVAL;
}
 
+   mrt6msg_netlink_event(mrt, skb);
+
/*
 *  Deliver to user space multicast routing algorithms
 */
@@ -2457,6 +2459,67 @@ static void mr6_netlink_event(struct mr6_table *mrt, 
struct mfc6_cache *mfc,
rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
 }
 
+static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+{
+   struct net *net = read_pnet(>net);
+   struct nlmsghdr *nlh;
+   struct rtgenmsg *rtgenm;
+   struct mrt6msg *msg;
+   struct sk_buff *skb;
+   struct nlattr *nla;
+   int payloadlen;
+   int msgsize;
+
+   payloadlen = pkt->len - sizeof(struct mrt6msg);
+   msg = (struct mrt6msg *)skb_transport_header(pkt);
+   msgsize = NLMSG_ALIGN(sizeof(struct rtgenmsg))
+   + nla_total_size(1)
+   /* IP6MRA_CREPORT_MSGTYPE */
+   + nla_total_size(2)
+   /* IP6MRA_CREPORT_MIF_ID */
+   + nla_total_size(sizeof(struct in6_addr))
+   /* IP6MRA_CREPORT_SRC_ADDR */
+   + nla_total_size(sizeof(struct in6_addr))
+   /* IP6MRA_CREPORT_DST_ADDR */
+   + nla_total_size(payloadlen)
+   /* IP6MRA_CREPORT_PKT */
+   ;
+
+   skb = nlmsg_new(msgsize, GFP_ATOMIC);
+   if (!skb)
+   goto errout;
+
+   nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
+   sizeof(struct rtgenmsg), 0);
+   if (!nlh)
+   goto errout;
+   rtgenm = nlmsg_data(nlh);
+   rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
+   if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
+   nla_put_u16(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
+   nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
+>im6_src) ||
+   nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
+>im6_dst))
+   goto nla_put_failure;
+
+   nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
+   if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
+ nla_data(nla), payloadlen))
+   goto nla_put_failure;
+
+   nlmsg_end(skb, nlh);
+
+   rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
+

[PATCH net-next v2 2/4] rtnetlink: add restricted rtnl groups for ipv4 and ipv6 mroute

2017-06-19 Thread Julien Gomes
Add RTNLGRP_{IPV4,IPV6}_MROUTE_R as two new restricted groups for the
NETLINK_ROUTE family.
Binding to these groups specifically requires CAP_NET_ADMIN to allow
multicast of sensitive messages (e.g. mroute cache reports).

Signed-off-by: Julien Gomes <jul...@arista.com>
---
 include/uapi/linux/rtnetlink.h |  4 
 net/core/rtnetlink.c   | 13 +
 2 files changed, 17 insertions(+)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cd1afb900929..d148505010a7 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -669,6 +669,10 @@ enum rtnetlink_groups {
 #define RTNLGRP_NSID   RTNLGRP_NSID
RTNLGRP_MPLS_NETCONF,
 #define RTNLGRP_MPLS_NETCONF   RTNLGRP_MPLS_NETCONF
+   RTNLGRP_IPV4_MROUTE_R,
+#define RTNLGRP_IPV4_MROUTE_R  RTNLGRP_IPV4_MROUTE_R
+   RTNLGRP_IPV6_MROUTE_R,
+#define RTNLGRP_IPV6_MROUTE_R  RTNLGRP_IPV6_MROUTE_R
__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX(__RTNLGRP_MAX - 1)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3aa57848a895..4aefa5a2625f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4218,6 +4218,18 @@ static void rtnetlink_rcv(struct sk_buff *skb)
rtnl_unlock();
 }
 
+static int rtnetlink_bind(struct net *net, int group)
+{
+   switch (group) {
+   case RTNLGRP_IPV4_MROUTE_R:
+   case RTNLGRP_IPV6_MROUTE_R:
+   if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+   return -EPERM;
+   break;
+   }
+   return 0;
+}
+
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, 
void *ptr)
 {
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -4252,6 +4264,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
.input  = rtnetlink_rcv,
.cb_mutex   = _mutex,
.flags  = NL_CFG_F_NONROOT_RECV,
+   .bind   = rtnetlink_bind,
};
 
sk = netlink_kernel_create(net, NETLINK_ROUTE, );
-- 
2.13.1



[PATCH net-next v2 3/4] ipmr: add netlink notifications on igmpmsg cache reports

2017-06-19 Thread Julien Gomes
Add Netlink notifications on cache reports in ipmr, in addition to the
existing igmpmsg sent to mroute_sk.
Send RTM_NEWCACHEREPORT notifications to RTNLGRP_IPV4_MROUTE_R.

MSGTYPE, VIF_ID, SRC_ADDR and DST_ADDR Netlink attributes contain the
same data as their equivalent fields in the igmpmsg header.
PKT attribute is the packet sent to mroute_sk, without the added igmpmsg
header.

Suggested-by: Ryan Halbrook <halbr...@arista.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
---
 include/uapi/linux/mroute.h | 12 
 net/ipv4/ipmr.c | 67 +++--
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index f904367c0cee..e8e5041dea8e 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -152,6 +152,18 @@ enum {
 };
 #define IPMRA_VIFA_MAX (__IPMRA_VIFA_MAX - 1)
 
+/* ipmr netlink cache report attributes */
+enum {
+   IPMRA_CREPORT_UNSPEC,
+   IPMRA_CREPORT_MSGTYPE,
+   IPMRA_CREPORT_VIF_ID,
+   IPMRA_CREPORT_SRC_ADDR,
+   IPMRA_CREPORT_DST_ADDR,
+   IPMRA_CREPORT_PKT,
+   __IPMRA_CREPORT_MAX
+};
+#define IPMRA_CREPORT_MAX (__IPMRA_CREPORT_MAX - 1)
+
 /* That's all usermode folks */
 
 #define MFC_ASSERT_THRESH (3*HZ)   /* Maximal freq. of asserts */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3e7454aa49e8..1e591bcaad6d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -109,6 +109,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct 
sk_buff *skb,
  struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 int cmd);
+static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(unsigned long arg);
 
@@ -995,8 +996,7 @@ static void ipmr_cache_resolve(struct net *net, struct 
mr_table *mrt,
}
 }
 
-/* Bounce a cache query up to mrouted. We could use netlink for this but 
mrouted
- * expects the following bizarre scheme.
+/* Bounce a cache query up to mrouted and netlink.
  *
  * Called under mrt_lock.
  */
@@ -1062,6 +1062,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
return -EINVAL;
}
 
+   igmpmsg_netlink_event(mrt, skb);
+
/* Deliver to mrouted */
ret = sock_queue_rcv_skb(mroute_sk, skb);
rcu_read_unlock();
@@ -2341,6 +2343,67 @@ static void mroute_netlink_event(struct mr_table *mrt, 
struct mfc_cache *mfc,
rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
 }
 
+static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
+{
+   struct net *net = read_pnet(>net);
+   struct nlmsghdr *nlh;
+   struct rtgenmsg *rtgenm;
+   struct igmpmsg *msg;
+   struct sk_buff *skb;
+   struct nlattr *nla;
+   int payloadlen;
+   int msgsize;
+
+   payloadlen = pkt->len - sizeof(struct igmpmsg);
+   msg = (struct igmpmsg *)skb_network_header(pkt);
+   msgsize = NLMSG_ALIGN(sizeof(struct rtgenmsg))
+   + nla_total_size(1)
+   /* IPMRA_CREPORT_MSGTYPE */
+   + nla_total_size(1)
+   /* IPMRA_CREPORT_VIF_ID */
+   + nla_total_size(4)
+   /* IPMRA_CREPORT_SRC_ADDR */
+   + nla_total_size(4)
+   /* IPMRA_CREPORT_DST_ADDR */
+   + nla_total_size(payloadlen)
+   /* IPMRA_CREPORT_PKT */
+   ;
+
+   skb = nlmsg_new(msgsize, GFP_ATOMIC);
+   if (!skb)
+   goto errout;
+
+   nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
+   sizeof(struct rtgenmsg), 0);
+   if (!nlh)
+   goto errout;
+   rtgenm = nlmsg_data(nlh);
+   rtgenm->rtgen_family = RTNL_FAMILY_IPMR;
+   if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) ||
+   nla_put_u8(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) ||
+   nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR,
+   msg->im_src.s_addr) ||
+   nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR,
+   msg->im_dst.s_addr))
+   goto nla_put_failure;
+
+   nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen);
+   if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg),
+ nla_data(nla), payloadlen))
+   goto nla_put_failure;
+
+   nlmsg_end(skb, nlh);
+
+   rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC);
+   return;
+
+nla_put_failure:
+   nlmsg_cancel(skb, nlh);
+errout:
+   kfree_

[PATCH net-next v2 0/4] ipmr/ip6mr: add Netlink notifications on cache reports

2017-06-19 Thread Julien Gomes
Currently, all ipmr/ip6mr cache reports are sent through the
mroute/mroute6 socket only.
This forces the use of a single socket for mroute programming, cache
reports and, regarding ipmr, IGMP messages without Router Alert option
reception.

The present patches are aiming to send Netlink notifications in addition
to the existing igmpmsg/mrt6msg to give user programs a way to handle
cache reports in parallel with multiple sockets other than the
mroute/mroute6 socket.

Changes in v2:
- Changed attributes naming from {IPMRA,IP6MRA}_CACHEREPORTA_* to
  {IPMRA,IP6MRA}_CREPORT_*
- Improved packet data copy to handle non-linear packets in
  ipmr/ip6mr cache report Netlink notification creation
- Added two rtnetlink groups with restricted-binding
- Changed cache report notified groups from RTNL_{IPV4,IPV6}_MROUTE to
  the new restricted groups in ipmr/ip6mr

Julien Gomes (4):
  rtnetlink: add NEWCACHEREPORT message type
  rtnetlink: add restricted rtnl groups for ipv4 and ipv6 mroute
  ipmr: add netlink notifications on igmpmsg cache reports
  ip6mr: add netlink notifications on mrt6msg cache reports

 include/uapi/linux/mroute.h| 12 
 include/uapi/linux/mroute6.h   | 12 
 include/uapi/linux/rtnetlink.h |  7 +
 net/core/rtnetlink.c   | 13 
 net/ipv4/ipmr.c| 67 --
 net/ipv6/ip6mr.c   | 67 --
 security/selinux/nlmsgtab.c|  3 +-
 7 files changed, 176 insertions(+), 5 deletions(-)

-- 
2.13.1



[PATCH net-next v2 1/4] rtnetlink: add NEWCACHEREPORT message type

2017-06-19 Thread Julien Gomes
New NEWCACHEREPORT message type to be used for cache reports sent
via Netlink, effectively allowing splitting cache report reception from
mroute programming.

Suggested-by: Ryan Halbrook <halbr...@arista.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
---
 include/uapi/linux/rtnetlink.h | 3 +++
 security/selinux/nlmsgtab.c| 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 564790e854f7..cd1afb900929 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -146,6 +146,9 @@ enum {
RTM_GETSTATS = 94,
 #define RTM_GETSTATS RTM_GETSTATS
 
+   RTM_NEWCACHEREPORT = 96,
+#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
+
__RTM_MAX,
 #define RTM_MAX(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 5aeaf30b7a13..7b7433a1a34c 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -79,6 +79,7 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
{ RTM_GETNSID,  NETLINK_ROUTE_SOCKET__NLMSG_READ  },
{ RTM_NEWSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ },
{ RTM_GETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+   { RTM_NEWCACHEREPORT,   NETLINK_ROUTE_SOCKET__NLMSG_READ },
 };
 
 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -158,7 +159,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 
*perm)
switch (sclass) {
case SECCLASS_NETLINK_ROUTE_SOCKET:
/* RTM_MAX always point to RTM_SET, ie RTM_NEWxxx + 3 */
-   BUILD_BUG_ON(RTM_MAX != (RTM_NEWSTATS + 3));
+   BUILD_BUG_ON(RTM_MAX != (RTM_NEWCACHEREPORT + 3));
err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 sizeof(nlmsg_route_perms));
break;
-- 
2.13.1



Re: [PATCH net-next 0/3] ipmr/ip6mr: add Netlink notifications on cache reports

2017-06-16 Thread Julien Gomes
On 06/15/2017 06:00 AM, Nikolay Aleksandrov wrote:
> On 15/06/17 14:44, Nikolay Aleksandrov wrote:
>> On 15/06/17 14:33, Nikolay Aleksandrov wrote:
>>> On 15/06/17 00:51, Julien Gomes wrote:
>>>> Hi Nikolay,
>>>>
>>>> On 06/14/2017 05:04 AM, Nikolay Aleksandrov wrote:
>>>>
>>>>> This has been on our todo list and I'm definitely interested in the 
>>>>> implementation.
>>>>> A few things that need careful consideration from my POV. First are the 
>>>>> security
>>>>> implications - this sends rtnl multicast messages but the rtnl socket has
>>>>> the NL_CFG_F_NONROOT_RECV flag thus allowing any user on the system to 
>>>>> listen in.
>>>>> This would allow them to see the full packets and all reports (granted 
>>>>> they can see
>>>>> the notifications even now), but the full packet is like giving them the 
>>>>> opportunity
>>>>> to tcpdump the PIM traffic.
>>>> I definitely see how this can be an issue.
>>>> From what I see, this means that either the packet should be
>>>> transmitted another way, or another Netlink family should be used.
>>>>
>>>> NETLINK_ROUTE looks to be the logical family to choose though,
>>>> but then I do not see a proper other way to handle this.
>>> Right, currently me neither, unless it provides a bind callback when 
>>> registering
>>> the kernel socket.
>>>
>>>> However I may just not be looking into the right direction,
>>>> maybe you currently have another approach in mind?
>>> I haven't gotten around to make (or even try) them but I was thinking about 
>>> 2 options
>>> ending up with a similar result:
>>>
>>> 1) genetlink
>>>  It also has the NONROOT_RECV flag, but it also allows for a callback - 
>>> mcast_bind()
>>>  which can be used to filter.
>>>
>>> or
>>>
>>> 2) Providing a bind callback to the NETLINK_ROUTE socket.
>>>
>> Ah nevermind, these cannot be used for filtering currently, so it seems
>> the netlink interface would need to be extended too if going down this road.
>>
> Sorry for the multiple emails, just to be thorough - again if going down this
> road all of these would obviously require a different group to bind to in 
> order
> to be able to filter on it, because users must keep receiving their 
> notifications
> for the ipmr one.

Actually, using a bind callback for NETLINK_ROUTE with a new group,
without netlink interface extension, could  work.

I quickly tested something like this:
> static int rtnetlink_bind(struct net *net, int group)
> {
> switch (group) {
> case RTNLGRP_IPV4_MROUTE_R:
> case RTNLGRP_IPV6_MROUTE_R:
>if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
>return -EPERM;
>break;
> }
> return 0;
> }

With the addition of one/two groups this does restrict the reports'
potential listeners.
The group names here are just placeholders, I am not especially fixed
on these ones.

It is not perfect as this would introduce groups with specific
requirements in NETLINK_ROUTE, but I think it can be decent.

What do you think about this?

-- 
Julien Gomes



Re: [PATCH net-next 2/3] ipmr: add netlink notifications on igmpmsg cache reports

2017-06-14 Thread Julien Gomes
On 06/14/2017 12:56 AM, Nicolas Dichtel wrote:
> Le 13/06/2017 à 19:08, Julien Gomes a écrit :
>> Add Netlink notifications on cache reports in ipmr, in addition to the
>> existing igmpmsg sent to mroute_sk.
>> Send RTM_NEWCACHEREPORT notifications to RTNLGRP_IPV4_MROUTE.
>>
>> MSGTYPE, VIF_ID, SRC_ADDR and DST_ADDR Netlink attributes contain the
>> same data as their equivalent fields in the igmpmsg header.
>> PKT attribute is the packet sent to mroute_sk, without the added igmpmsg
>> header.
>>
>> Suggested-by: Ryan Halbrook <halbr...@arista.com>
>> Signed-off-by: Julien Gomes <jul...@arista.com>
>> ---
>>  include/uapi/linux/mroute.h | 11 
>>  net/ipv4/ipmr.c | 63 
>> +++--
>>  2 files changed, 72 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
>> index f904367c0cee..f6f9e01ee734 100644
>> --- a/include/uapi/linux/mroute.h
>> +++ b/include/uapi/linux/mroute.h
>> @@ -152,6 +152,17 @@ enum {
>>  };
>>  #define IPMRA_VIFA_MAX (__IPMRA_VIFA_MAX - 1)
>>  
>> +/* ipmr netlink cache report attributes */
>> +enum {
> IPMRA_CACHEREPORTA_UNSPEC is missing.

Indeed, I will add it.

> By the way, maybe something shorter than IPMRA_CACHEREPORTA_ would be better.
> What about IPMR_CREPORTA_? IPMR_CACHEA_? IPMR_IGMPA_? or whatever.

I see absolutely no issue in shortening them.
IPMRA_CREPORT_ sounds good to me.

> What is the signification of the two 'A'? One for 'attribute', but the other?

Now that you mention it, the second 'A' is simply redundant.
I will remove it and go with IPMRA_CREPORT_.

-- 
Julien Gomes



Re: [PATCH net-next 0/3] ipmr/ip6mr: add Netlink notifications on cache reports

2017-06-14 Thread Julien Gomes
Hi Nikolay,

On 06/14/2017 05:04 AM, Nikolay Aleksandrov wrote:

> This has been on our todo list and I'm definitely interested in the 
> implementation.
> A few things that need careful consideration from my POV. First are the 
> security
> implications - this sends rtnl multicast messages but the rtnl socket has
> the NL_CFG_F_NONROOT_RECV flag thus allowing any user on the system to listen 
> in.
> This would allow them to see the full packets and all reports (granted they 
> can see
> the notifications even now), but the full packet is like giving them the 
> opportunity
> to tcpdump the PIM traffic.

I definitely see how this can be an issue.
>From what I see, this means that either the packet should be
transmitted another way, or another Netlink family should be used.

NETLINK_ROUTE looks to be the logical family to choose though,
but then I do not see a proper other way to handle this.

However I may just not be looking into the right direction,
maybe you currently have another approach in mind?

> My second (more fixable and minor) concern is about the packet itself, how do 
> you
> know that the packet is all linear so you can directly copy it ?

Indeed, I overlooked this possibility in this version.
I will improve that.

-- 
Julien Gomes



[PATCH net-next 3/3] ip6mr: add netlink notifications on mrt6msg cache reports

2017-06-13 Thread Julien Gomes
Add Netlink notifications on cache reports in ip6mr, in addition to the
existing mrt6msg sent to mroute6_sk.
Send RTM_NEWCACHEREPORT notifications to RTNLGRP_IPV6_MROUTE.

MSGTYPE, MIF_ID, SRC_ADDR and DST_ADDR Netlink attributes contain the
same data as their equivalent fields in the mrt6msg header.
PKT attribute is the packet sent to mroute6_sk, without the added
mrt6msg header.

Suggested-by: Ryan Halbrook <halbr...@arista.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
---
 include/uapi/linux/mroute6.h | 11 
 net/ipv6/ip6mr.c | 63 ++--
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index ed5721148768..8e3c2d2c170e 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -133,4 +133,15 @@ struct mrt6msg {
struct in6_addr im6_src, im6_dst;
 };
 
+/* ip6mr netlink cache report attributes */
+enum {
+   IP6MRA_CACHEREPORTA_MSGTYPE,
+   IP6MRA_CACHEREPORTA_MIF_ID,
+   IP6MRA_CACHEREPORTA_SRC_ADDR,
+   IP6MRA_CACHEREPORTA_DST_ADDR,
+   IP6MRA_CACHEREPORTA_PKT,
+   __IP6MRA_CACHEREPORTA_MAX
+};
+#define IP6MRA_CACHEREPORTA_MAX (__IP6MRA_CACHEREPORTA_MAX - 1)
+
 #endif /* _UAPI__LINUX_MROUTE6_H */
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 374997d26488..8667256b4343 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -116,6 +116,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, 
struct sk_buff *skb,
   struct mfc6_cache *c, struct rtmsg *rtm);
 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
  int cmd);
+static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
   struct netlink_callback *cb);
 static void mroute_clean_tables(struct mr6_table *mrt, bool all);
@@ -1123,8 +1124,7 @@ static void ip6mr_cache_resolve(struct net *net, struct 
mr6_table *mrt,
 }
 
 /*
- * Bounce a cache query up to pim6sd. We could use netlink for this but 
pim6sd
- * expects the following bizarre scheme.
+ * Bounce a cache query up to pim6sd and netlink.
  *
  * Called under mrt_lock.
  */
@@ -1206,6 +1206,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, 
struct sk_buff *pkt,
return -EINVAL;
}
 
+   mrt6msg_netlink_event(mrt, skb);
+
/*
 *  Deliver to user space multicast routing algorithms
 */
@@ -2455,6 +2457,63 @@ static void mr6_netlink_event(struct mr6_table *mrt, 
struct mfc6_cache *mfc,
rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
 }
 
+static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+{
+   struct net *net = read_pnet(>net);
+   struct nlmsghdr *nlh;
+   struct rtgenmsg *rtgenm;
+   struct mrt6msg *msg;
+   struct sk_buff *skb;
+   int payloadlen;
+   int msgsize;
+
+   payloadlen = pkt->len - sizeof(struct mrt6msg);
+   msg = (struct mrt6msg *)skb_transport_header(pkt);
+   msgsize = NLMSG_ALIGN(sizeof(struct rtgenmsg))
+   + nla_total_size(1)
+   /* IP6MRA_CACHEREPORTA_MSGTYPE */
+   + nla_total_size(2)
+   /* IP6MRA_CACHEREPORTA_MIF_ID */
+   + nla_total_size(sizeof(struct in6_addr))
+   /* IP6MRA_CACHEREPORTA_SRC_ADDR */
+   + nla_total_size(sizeof(struct in6_addr))
+   /* IP6MRA_CACHEREPORTA_DST_ADDR */
+   + nla_total_size(payloadlen)
+   /* IP6MRA_CACHEREPORTA_PKT */
+   ;
+
+   skb = nlmsg_new(msgsize, GFP_ATOMIC);
+   if (!skb)
+   goto errout;
+
+   nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
+   sizeof(struct rtgenmsg), 0);
+   if (!nlh)
+   goto errout;
+   rtgenm = nlmsg_data(nlh);
+   rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
+   if (nla_put_u8(skb, IP6MRA_CACHEREPORTA_MSGTYPE, msg->im6_msgtype) ||
+   nla_put_u16(skb, IP6MRA_CACHEREPORTA_MIF_ID, msg->im6_mif) ||
+   nla_put_in6_addr(skb, IP6MRA_CACHEREPORTA_SRC_ADDR,
+>im6_src) ||
+   nla_put_in6_addr(skb, IP6MRA_CACHEREPORTA_DST_ADDR,
+>im6_dst) ||
+   nla_put(skb, IP6MRA_CACHEREPORTA_PKT, payloadlen,
+   pkt->data + sizeof(struct mrt6msg)))
+   goto nla_put_failure;
+
+   nlmsg_end(skb, nlh);
+
+   rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
+   return;
+
+nla_put_failure:
+   nlmsg_cancel(skb, nlh);
+errout:
+   kfree_s

[PATCH net-next 1/3] rtnetlink: add NEWCACHEREPORT message type

2017-06-13 Thread Julien Gomes
New NEWCACHEREPORT message type to be used for cache reports sent
via Netlink, effectively allowing splitting cache report reception from
mroute programming.

Suggested-by: Ryan Halbrook <halbr...@arista.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
---
 include/uapi/linux/rtnetlink.h | 3 +++
 security/selinux/nlmsgtab.c| 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 564790e854f7..cd1afb900929 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -146,6 +146,9 @@ enum {
RTM_GETSTATS = 94,
 #define RTM_GETSTATS RTM_GETSTATS
 
+   RTM_NEWCACHEREPORT = 96,
+#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
+
__RTM_MAX,
 #define RTM_MAX(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 5aeaf30b7a13..7b7433a1a34c 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -79,6 +79,7 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
{ RTM_GETNSID,  NETLINK_ROUTE_SOCKET__NLMSG_READ  },
{ RTM_NEWSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ },
{ RTM_GETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+   { RTM_NEWCACHEREPORT,   NETLINK_ROUTE_SOCKET__NLMSG_READ },
 };
 
 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -158,7 +159,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 
*perm)
switch (sclass) {
case SECCLASS_NETLINK_ROUTE_SOCKET:
/* RTM_MAX always point to RTM_SET, ie RTM_NEWxxx + 3 */
-   BUILD_BUG_ON(RTM_MAX != (RTM_NEWSTATS + 3));
+   BUILD_BUG_ON(RTM_MAX != (RTM_NEWCACHEREPORT + 3));
err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 sizeof(nlmsg_route_perms));
break;
-- 
2.13.1



[PATCH net-next 0/3] ipmr/ip6mr: add Netlink notifications on cache reports

2017-06-13 Thread Julien Gomes
Currently, all ipmr/ip6mr cache reports are sent through the
mroute/mroute6 socket only.
This forces the use of a single socket for mroute programming, cache
reports and, regarding ipmr, IGMP messages without Router Alert option
reception.

The present patches are aiming to send Netlink notifications in addition
to the existing igmpmsg/mrt6msg to give user programs a way to handle
cache reports in parallel with multiple sockets other than the
mroute/mroute6 socket.

Julien Gomes (3):
  rtnetlink: add NEWCACHEREPORT message type
  ipmr: add netlink notifications on igmpmsg cache reports
  ip6mr: add netlink notifications on mrt6msg cache reports

 include/uapi/linux/mroute.h| 11 
 include/uapi/linux/mroute6.h   | 11 
 include/uapi/linux/rtnetlink.h |  3 ++
 net/ipv4/ipmr.c| 63 --
 net/ipv6/ip6mr.c   | 63 --
 security/selinux/nlmsgtab.c|  3 +-
 6 files changed, 149 insertions(+), 5 deletions(-)

-- 
2.13.1



[PATCH net-next 2/3] ipmr: add netlink notifications on igmpmsg cache reports

2017-06-13 Thread Julien Gomes
Add Netlink notifications on cache reports in ipmr, in addition to the
existing igmpmsg sent to mroute_sk.
Send RTM_NEWCACHEREPORT notifications to RTNLGRP_IPV4_MROUTE.

MSGTYPE, VIF_ID, SRC_ADDR and DST_ADDR Netlink attributes contain the
same data as their equivalent fields in the igmpmsg header.
PKT attribute is the packet sent to mroute_sk, without the added igmpmsg
header.

Suggested-by: Ryan Halbrook <halbr...@arista.com>
Signed-off-by: Julien Gomes <jul...@arista.com>
---
 include/uapi/linux/mroute.h | 11 
 net/ipv4/ipmr.c | 63 +++--
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index f904367c0cee..f6f9e01ee734 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -152,6 +152,17 @@ enum {
 };
 #define IPMRA_VIFA_MAX (__IPMRA_VIFA_MAX - 1)
 
+/* ipmr netlink cache report attributes */
+enum {
+   IPMRA_CACHEREPORTA_MSGTYPE,
+   IPMRA_CACHEREPORTA_VIF_ID,
+   IPMRA_CACHEREPORTA_SRC_ADDR,
+   IPMRA_CACHEREPORTA_DST_ADDR,
+   IPMRA_CACHEREPORTA_PKT,
+   __IPMRA_CACHEREPORTA_MAX
+};
+#define IPMRA_CACHEREPORTA_MAX (__IPMRA_CACHEREPORTA_MAX - 1)
+
 /* That's all usermode folks */
 
 #define MFC_ASSERT_THRESH (3*HZ)   /* Maximal freq. of asserts */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9374b99c7c17..6d1f4fae3749 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -109,6 +109,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct 
sk_buff *skb,
  struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 int cmd);
+static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(unsigned long arg);
 
@@ -993,8 +994,7 @@ static void ipmr_cache_resolve(struct net *net, struct 
mr_table *mrt,
}
 }
 
-/* Bounce a cache query up to mrouted. We could use netlink for this but 
mrouted
- * expects the following bizarre scheme.
+/* Bounce a cache query up to mrouted and netlink.
  *
  * Called under mrt_lock.
  */
@@ -1060,6 +1060,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
return -EINVAL;
}
 
+   igmpmsg_netlink_event(mrt, skb);
+
/* Deliver to mrouted */
ret = sock_queue_rcv_skb(mroute_sk, skb);
rcu_read_unlock();
@@ -2341,6 +2343,63 @@ static void mroute_netlink_event(struct mr_table *mrt, 
struct mfc_cache *mfc,
rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
 }
 
+static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
+{
+   struct net *net = read_pnet(>net);
+   struct nlmsghdr *nlh;
+   struct rtgenmsg *rtgenm;
+   struct igmpmsg *msg;
+   struct sk_buff *skb;
+   int payloadlen;
+   int msgsize;
+
+   payloadlen = pkt->len - sizeof(struct igmpmsg);
+   msg = (struct igmpmsg *)skb_network_header(pkt);
+   msgsize = NLMSG_ALIGN(sizeof(struct rtgenmsg))
+   + nla_total_size(1)
+   /* IPMRA_CACHEREPORTA_MSGTYPE */
+   + nla_total_size(1)
+   /* IPMRA_CACHEREPORTA_VIF_ID */
+   + nla_total_size(4)
+   /* IPMRA_CACHEREPORTA_SRC_ADDR */
+   + nla_total_size(4)
+   /* IPMRA_CACHEREPORTA_DST_ADDR */
+   + nla_total_size(payloadlen)
+   /* IPMRA_CACHEREPORTA_PKT */
+   ;
+
+   skb = nlmsg_new(msgsize, GFP_ATOMIC);
+   if (!skb)
+   goto errout;
+
+   nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
+   sizeof(struct rtgenmsg), 0);
+   if (!nlh)
+   goto errout;
+   rtgenm = nlmsg_data(nlh);
+   rtgenm->rtgen_family = RTNL_FAMILY_IPMR;
+   if (nla_put_u8(skb, IPMRA_CACHEREPORTA_MSGTYPE, msg->im_msgtype) ||
+   nla_put_u8(skb, IPMRA_CACHEREPORTA_VIF_ID, msg->im_vif) ||
+   nla_put_in_addr(skb, IPMRA_CACHEREPORTA_SRC_ADDR,
+   msg->im_src.s_addr) ||
+   nla_put_in_addr(skb, IPMRA_CACHEREPORTA_DST_ADDR,
+   msg->im_dst.s_addr) ||
+   nla_put(skb, IPMRA_CACHEREPORTA_PKT, payloadlen,
+   pkt->data + sizeof(struct igmpmsg)))
+   goto nla_put_failure;
+
+   nlmsg_end(skb, nlh);
+
+   rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
+   return;
+
+nla_put_failure:
+   nlmsg_cancel(skb, nlh);
+errout:
+   kfree_skb(skb);
+   rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, -ENOBUFS);
+}
+
 

[PATCH net] tun: allow positive return values on dev_get_valid_name() call

2017-10-25 Thread Julien Gomes
If the name argument of dev_get_valid_name() contains "%d", it will try
to assign it a unit number in __dev__alloc_name() and return either the
unit number (>= 0) or an error code (< 0).
Considering positive values as error values prevent tun device creations
relying this mechanism, therefor we should only consider negative values
as errors here.

Signed-off-by: Julien Gomes <jul...@arista.com>
---
 drivers/net/tun.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ea29da91ea5a..4d2400c253ba 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2253,7 +2253,7 @@ static int tun_set_iff(struct net *net, struct file 
*file, struct ifreq *ifr)
if (!dev)
return -ENOMEM;
err = dev_get_valid_name(net, dev, name);
-   if (err)
+   if (err < 0)
goto err_free_dev;
 
dev_net_set(dev, net);
-- 
2.14.2