The branch main has been updated by melifaro:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=04f75b980293d517558990a7fda6900445edcac6

commit 04f75b980293d517558990a7fda6900445edcac6
Author:     Alexander V. Chernikov <[email protected]>
AuthorDate: 2023-03-26 08:42:51 +0000
Commit:     Alexander V. Chernikov <[email protected]>
CommitDate: 2023-03-26 08:44:09 +0000

    netlink: allow netlink sockets in non-vnet jails.
    
    This change allow to open Netlink sockets in the non-vnet jails, even for
     unpriviledged processes.
    The security model largely follows the existing one. To be more specific:
    * by default, every `NETLINK_ROUTE` command is **NOT** allowed in non-VNET
     jail UNLESS `RTNL_F_ALLOW_NONVNET_JAIL` flag is specified in the command
     handler.
    * All notifications are **disabled** for non-vnet jails (requests to
     subscribe for the notifications are ignored). This will change to be more
     fine-grained model once the first netlink provider requiring this gets
     committed.
    * Listing interfaces (RTM_GETLINK) is **allowed** w/o limits (**including**
     interfaces w/o any addresses attached to the jail). The value of this is
     questionable, but it follows the existing approach.
    * Listing ARP/NDP neighbours is **forbidden**. This is a **change** from the
     current approach - currently we list static ARP/ND entries belonging to the
     addresses attached to the jail.
    * Listing interface addresses is **allowed**, but the addresses are filtered
     to match only ones attached to the jail.
    * Listing routes is **allowed**, but the routes are filtered to provide only
     host routes matching the addresses attached to the jail.
    * By default, every `NETLINK_GENERIC` command is **allowed** in non-VNET 
jail
     (as sub-families may be unrelated to network at all).
     It is the goal of the family author to implement the restriction if
     necessary.
    
    Differential Revision: https://reviews.freebsd.org/D39206
    MFC after:      1 month
---
 sys/kern/kern_jail.c          |  1 +
 sys/netlink/netlink_ctl.h     |  1 +
 sys/netlink/netlink_domain.c  | 12 ++++++++++++
 sys/netlink/netlink_generic.c |  1 +
 sys/netlink/netlink_route.c   |  5 +++++
 sys/netlink/netlink_var.h     |  1 +
 sys/netlink/route/iface.c     |  8 +++++++-
 sys/netlink/route/route_var.h |  3 ++-
 sys/netlink/route/rt.c        |  6 ++++++
 9 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 7b57e5bb9d61..0558c7d9b7fe 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -3440,6 +3440,7 @@ prison_check_af(struct ucred *cred, int af)
 #endif
        case AF_LOCAL:
        case AF_ROUTE:
+       case AF_NETLINK:
                break;
        default:
                if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))
diff --git a/sys/netlink/netlink_ctl.h b/sys/netlink/netlink_ctl.h
index 8cd29cf56d10..9369194151af 100644
--- a/sys/netlink/netlink_ctl.h
+++ b/sys/netlink/netlink_ctl.h
@@ -81,6 +81,7 @@ bool netlink_unregister_proto(int proto);
 bool nl_has_listeners(int netlink_family, uint32_t groups_mask);
 bool nlp_has_priv(struct nlpcb *nlp, int priv);
 struct ucred *nlp_get_cred(struct nlpcb *nlp);
+bool nlp_unconstrained_vnet(const struct nlpcb *nlp);
 
 /* netlink_generic.c */
 struct genl_cmd {
diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c
index 2704974173b4..24ca9de877f0 100644
--- a/sys/netlink/netlink_domain.c
+++ b/sys/netlink/netlink_domain.c
@@ -36,6 +36,7 @@
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/domain.h>
+#include <sys/jail.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
@@ -111,6 +112,10 @@ nl_add_group_locked(struct nlpcb *nlp, unsigned int 
group_id)
        MPASS(group_id <= NLP_MAX_GROUPS);
        --group_id;
 
+       /* TODO: add family handler callback */
+       if (!nlp_unconstrained_vnet(nlp))
+               return;
+
        nlp->nl_groups[group_id / 64] |= (uint64_t)1 << (group_id % 64);
 }
 
@@ -212,6 +217,12 @@ nlp_has_priv(struct nlpcb *nlp, int priv)
        return (priv_check_cred(nlp->nl_cred, priv) == 0);
 }
 
+bool
+nlp_unconstrained_vnet(const struct nlpcb *nlp)
+{
+       return (nlp->nl_unconstrained_vnet);
+}
+
 struct ucred *
 nlp_get_cred(struct nlpcb *nlp)
 {
@@ -308,6 +319,7 @@ nl_pru_attach(struct socket *so, int proto, struct thread 
*td)
        nlp->nl_process_id = curproc->p_pid;
        nlp->nl_linux = is_linux;
        nlp->nl_active = true;
+       nlp->nl_unconstrained_vnet = !jailed_without_vnet(so->so_cred);
        NLP_LOCK_INIT(nlp);
        refcount_init(&nlp->nl_refcount, 1);
        nl_init_io(nlp);
diff --git a/sys/netlink/netlink_generic.c b/sys/netlink/netlink_generic.c
index d4022c2c2a75..a2bd624f99d9 100644
--- a/sys/netlink/netlink_generic.c
+++ b/sys/netlink/netlink_generic.c
@@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/ck.h>
 #include <sys/epoch.h>
 #include <sys/kernel.h>
+#include <sys/jail.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/priv.h>
diff --git a/sys/netlink/netlink_route.c b/sys/netlink/netlink_route.c
index 037fd2170c66..ce0c0eb36dbc 100644
--- a/sys/netlink/netlink_route.c
+++ b/sys/netlink/netlink_route.c
@@ -93,6 +93,11 @@ rtnl_handle_message(struct nlmsghdr *hdr, struct nl_pstate 
*npt)
        } else if (cmd->priv != 0)
                NLP_LOG(LOG_DEBUG3, nlp, "priv %d check passed for msg %s", 
cmd->priv, cmd->name);
 
+       if (!nlp_unconstrained_vnet(nlp) && (cmd->flags & 
RTNL_F_ALLOW_NONVNET_JAIL) == 0) {
+               NLP_LOG(LOG_DEBUG2, nlp, "jail check failed for msg %s", 
cmd->name);
+               return (EPERM);
+       }
+
        bool need_epoch = !(cmd->flags & RTNL_F_NOEPOCH);
 
        if (need_epoch)
diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h
index ed19008248e9..0114306885cf 100644
--- a/sys/netlink/netlink_var.h
+++ b/sys/netlink/netlink_var.h
@@ -61,6 +61,7 @@ struct nlpcb {
         bool                   nl_task_pending;
        bool                    nl_tx_blocked; /* No new requests accepted */
        bool                    nl_linux; /* true if running under compat */
+       bool                    nl_unconstrained_vnet; /* true if running under 
VNET jail (or without jail) */
        struct nl_io_queue      rx_queue;
        struct nl_io_queue      tx_queue;
        struct taskqueue        *nl_taskqueue;
diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c
index 6704acd1624f..18eab05576b7 100644
--- a/sys/netlink/route/iface.c
+++ b/sys/netlink/route/iface.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/types.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
+#include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
@@ -61,6 +62,7 @@ struct netlink_walkargs {
        struct nl_writer *nw;
        struct nlmsghdr hdr;
        struct nlpcb *so;
+       struct ucred *cred;
        uint32_t fibnum;
        int family;
        int error;
@@ -833,6 +835,8 @@ dump_iface_addrs(struct netlink_walkargs *wa, struct ifnet 
*ifp)
                        continue;
                if (ifa->ifa_addr->sa_family == AF_LINK)
                        continue;
+               if (prison_if(wa->cred, ifa->ifa_addr) != 0)
+                       continue;
                wa->count++;
                if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr))
                        return (ENOMEM);
@@ -856,6 +860,7 @@ rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb 
*nlp, struct nl_pstate *n
        struct netlink_walkargs wa = {
                .so = nlp,
                .nw = npt->nw,
+               .cred = nlp_get_cred(nlp),
                .family = attrs.ifa_family,
                .hdr.nlmsg_pid = hdr->nlmsg_pid,
                .hdr.nlmsg_seq = hdr->nlmsg_seq,
@@ -977,7 +982,7 @@ static const struct rtnl_cmd_handler cmd_handlers[] = {
                .cmd = NL_RTM_GETLINK,
                .name = "RTM_GETLINK",
                .cb = &rtnl_handle_getlink,
-               .flags = RTNL_F_NOEPOCH,
+               .flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL,
        },
        {
                .cmd = NL_RTM_DELLINK,
@@ -997,6 +1002,7 @@ static const struct rtnl_cmd_handler cmd_handlers[] = {
                .cmd = NL_RTM_GETADDR,
                .name = "RTM_GETADDR",
                .cb = &rtnl_handle_getaddr,
+               .flags = RTNL_F_ALLOW_NONVNET_JAIL,
        },
        {
                .cmd = NL_RTM_NEWADDR,
diff --git a/sys/netlink/route/route_var.h b/sys/netlink/route/route_var.h
index f3b1d7d929a5..a11857b14a1f 100644
--- a/sys/netlink/route/route_var.h
+++ b/sys/netlink/route/route_var.h
@@ -48,7 +48,8 @@ struct rtnl_cmd_handler {
        int             flags;
 };
 
-#define        RTNL_F_NOEPOCH  0x01
+#define        RTNL_F_NOEPOCH                  0x01    /* Do not enter epoch 
when handling command */
+#define        RTNL_F_ALLOW_NONVNET_JAIL       0x02    /* Allow command 
execution inside non-VNET jail */
 
 bool rtnl_register_messages(const struct rtnl_cmd_handler *handlers, int 
count);
 
diff --git a/sys/netlink/route/rt.c b/sys/netlink/route/rt.c
index badd8d937be2..ef52dbf4edd6 100644
--- a/sys/netlink/route/rt.c
+++ b/sys/netlink/route/rt.c
@@ -513,6 +513,8 @@ dump_rtentry(struct rtentry *rt, void *_arg)
        wa->count++;
        if (wa->error != 0)
                return (0);
+       if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp)))
+               return (0);
        wa->dumped++;
 
        rt_get_rnd(rt, &wa->rnd);
@@ -606,6 +608,9 @@ handle_rtm_getroute(struct nlpcb *nlp, struct 
nl_parsed_route *attrs,
 
        RIB_RUNLOCK(rnh);
 
+       if (!rt_is_exportable(rt, nlp_get_cred(nlp)))
+               return (ESRCH);
+
        IF_DEBUG_LEVEL(LOG_DEBUG2) {
                char rtbuf[NHOP_PRINT_BUFSIZE] __unused, 
nhbuf[NHOP_PRINT_BUFSIZE] __unused;
                FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s 
for %s",
@@ -1026,6 +1031,7 @@ static const struct rtnl_cmd_handler cmd_handlers[] = {
                .cmd = NL_RTM_GETROUTE,
                .name = "RTM_GETROUTE",
                .cb = &rtnl_handle_getroute,
+               .flags = RTNL_F_ALLOW_NONVNET_JAIL,
        },
        {
                .cmd = NL_RTM_DELROUTE,

Reply via email to