The bpf(cmd, attr, size) syscall copies up to 'size' bytes on input, but several commands write outputs back to userspace unconditionally. Because copy_to_user() does not fault on adjacent mapped memory, a short userspace buffer results in out-of-bounds writes, potentially overwriting adjacent userspace memory.
Address this by introducing two policies based on field type: 1) Mandatory fields (original ABI): Return -EINVAL in __sys_bpf() if the buffer size does not cover them. This hardens the syscall front-gate for the following commands: - BPF_PROG_QUERY (min size: query.prog_cnt) - BPF_PROG_TEST_RUN (min size: test.duration) - BPF_*_GET_NEXT_ID (min size: next_id) - BPF_OBJ_GET_INFO_BY_FD (min size: info.info_len) - BPF_TASK_FD_QUERY (minimum size: task_fd_query.probe_addr) - BPF_MAP_*_BATCH (min size: batch.flags) 2) Optional fields (later revisions): Skip writeback if the buffer size does not cover the field. This is applied to BPF_PROG_QUERY's 'query.revision'. Older userspace passing a smaller size (e.g., 40 bytes) will have the write safely skipped. This size-gating pattern mirrors the existing precedent used for 'log_true_size' (verifier.c) and 'btf_log_true_size' (btf.c). To support this, the user-declared 'size' is plumbed from __sys_bpf() through the query dispatchers (cgroup, tcx, netkit) to the underlying writeback helpers in cgroup.c and mprog.c. Cc: Maciej Żenczykowski <[email protected]> Cc: Lorenzo Colitti <[email protected]> Signed-off-by: Yuyang Huang <[email protected]> Link: https://lore.kernel.org/r/CANP3RGfZTXM_u=e_atoompzxutoqj02nomkccr-ybzbom2s...@mail.gmail.com --- drivers/net/netkit.c | 5 +++-- include/linux/bpf-cgroup.h | 5 +++-- include/linux/bpf_mprog.h | 4 ++-- include/net/netkit.h | 6 ++++-- include/net/tcx.h | 5 +++-- kernel/bpf/cgroup.c | 13 +++++++------ kernel/bpf/mprog.c | 5 +++-- kernel/bpf/syscall.c | 34 +++++++++++++++++++++++++++++----- kernel/bpf/tcx.c | 5 +++-- 9 files changed, 57 insertions(+), 25 deletions(-) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 5e2eecc3165d..680607d6e039 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -813,7 +813,8 @@ int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog) return ret; } -int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) +int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, + u32 uattr_size) { struct net_device *dev; int ret; @@ -826,7 +827,7 @@ int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) ret = PTR_ERR(dev); goto out; } - ret = bpf_mprog_query(attr, uattr, netkit_entry_fetch(dev, false)); + ret = bpf_mprog_query(attr, uattr, uattr_size, netkit_entry_fetch(dev, false)); out: rtnl_unlock(); return ret; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index b2e79c2b41d5..4d0cc65976a1 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -421,7 +421,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int cgroup_bpf_prog_query(const union bpf_attr *attr, - union bpf_attr __user *uattr); + union bpf_attr __user *uattr, u32 uattr_size); const struct bpf_func_proto * cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -452,7 +452,8 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, } static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, - union bpf_attr __user *uattr) + union bpf_attr __user *uattr, + u32 uattr_size) { return -EINVAL; } diff --git a/include/linux/bpf_mprog.h b/include/linux/bpf_mprog.h index 0b9f4caeeb0a..fa479ace854a 100644 --- a/include/linux/bpf_mprog.h +++ b/include/linux/bpf_mprog.h @@ -72,7 +72,7 @@ * // bpf_mprog user-side lock * // fetch active @entry from attach location * [...] - * ret = bpf_mprog_query(attr, uattr, entry); + * ret = bpf_mprog_query(attr, uattr, uattr_size, entry); * // bpf_mprog user-side unlock * * Data/fast path: @@ -329,7 +329,7 @@ int bpf_mprog_detach(struct bpf_mprog_entry *entry, u32 flags, u32 id_or_fd, u64 revision); int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, - struct bpf_mprog_entry *entry); + u32 uattr_size, struct bpf_mprog_entry *entry); static inline bool bpf_mprog_supported(enum bpf_prog_type type) { diff --git a/include/net/netkit.h b/include/net/netkit.h index 9ec0163739f4..fe209d1f9a64 100644 --- a/include/net/netkit.h +++ b/include/net/netkit.h @@ -9,7 +9,8 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); -int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, + u32 uattr_size); INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev)); #else static inline int netkit_prog_attach(const union bpf_attr *attr, @@ -31,7 +32,8 @@ static inline int netkit_prog_detach(const union bpf_attr *attr, } static inline int netkit_prog_query(const union bpf_attr *attr, - union bpf_attr __user *uattr) + union bpf_attr __user *uattr, + u32 uattr_size) { return -EINVAL; } diff --git a/include/net/tcx.h b/include/net/tcx.h index 23a61af13547..610626b39676 100644 --- a/include/net/tcx.h +++ b/include/net/tcx.h @@ -166,7 +166,7 @@ int tcx_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); void tcx_uninstall(struct net_device *dev, bool ingress); int tcx_prog_query(const union bpf_attr *attr, - union bpf_attr __user *uattr); + union bpf_attr __user *uattr, u32 uattr_size); static inline void dev_tcx_uninstall(struct net_device *dev) { @@ -194,7 +194,8 @@ static inline int tcx_prog_detach(const union bpf_attr *attr, } static inline int tcx_prog_query(const union bpf_attr *attr, - union bpf_attr __user *uattr) + union bpf_attr __user *uattr, + u32 uattr_size) { return -EINVAL; } diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 876f6a81a9b6..2c2bdaa86aa7 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1208,7 +1208,7 @@ static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, /* Must be called with cgroup_mutex held to avoid races. */ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, - union bpf_attr __user *uattr) + union bpf_attr __user *uattr, u32 uattr_size) { __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags); bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE; @@ -1259,7 +1259,8 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, return -EFAULT; if (!effective_query && from_atype == to_atype) revision = cgrp->bpf.revisions[from_atype]; - if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision))) + if (uattr_size >= offsetofend(union bpf_attr, query.revision) && + copy_to_user(&uattr->query.revision, &revision, sizeof(revision))) return -EFAULT; if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt) /* return early if user requested only program count + flags */ @@ -1312,12 +1313,12 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, } static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, - union bpf_attr __user *uattr) + union bpf_attr __user *uattr, u32 uattr_size) { int ret; cgroup_lock(); - ret = __cgroup_bpf_query(cgrp, attr, uattr); + ret = __cgroup_bpf_query(cgrp, attr, uattr, uattr_size); cgroup_unlock(); return ret; } @@ -1520,7 +1521,7 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) } int cgroup_bpf_prog_query(const union bpf_attr *attr, - union bpf_attr __user *uattr) + union bpf_attr __user *uattr, u32 uattr_size) { struct cgroup *cgrp; int ret; @@ -1529,7 +1530,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, if (IS_ERR(cgrp)) return PTR_ERR(cgrp); - ret = cgroup_bpf_query(cgrp, attr, uattr); + ret = cgroup_bpf_query(cgrp, attr, uattr, uattr_size); cgroup_put(cgrp); return ret; diff --git a/kernel/bpf/mprog.c b/kernel/bpf/mprog.c index 1394168062e8..822d9c4c0db4 100644 --- a/kernel/bpf/mprog.c +++ b/kernel/bpf/mprog.c @@ -393,7 +393,7 @@ int bpf_mprog_detach(struct bpf_mprog_entry *entry, } int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, - struct bpf_mprog_entry *entry) + u32 uattr_size, struct bpf_mprog_entry *entry) { u32 __user *uprog_flags, *ulink_flags; u32 __user *uprog_id, *ulink_id; @@ -413,7 +413,8 @@ int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, } if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) return -EFAULT; - if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision))) + if (uattr_size >= offsetofend(union bpf_attr, query.revision) && + copy_to_user(&uattr->query.revision, &revision, sizeof(revision))) return -EFAULT; if (copy_to_user(&uattr->query.count, &count, sizeof(count))) return -EFAULT; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a3c0214ca934..a46b0510d9e2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4654,7 +4654,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) #define BPF_PROG_QUERY_LAST_FIELD query.revision static int bpf_prog_query(const union bpf_attr *attr, - union bpf_attr __user *uattr) + union bpf_attr __user *uattr, u32 uattr_size) { if (!bpf_net_capable()) return -EPERM; @@ -4693,7 +4693,7 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: case BPF_LSM_CGROUP: - return cgroup_bpf_prog_query(attr, uattr); + return cgroup_bpf_prog_query(attr, uattr, uattr_size); case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); case BPF_FLOW_DISSECTOR: @@ -4706,10 +4706,10 @@ static int bpf_prog_query(const union bpf_attr *attr, return sock_map_bpf_prog_query(attr, uattr); case BPF_TCX_INGRESS: case BPF_TCX_EGRESS: - return tcx_prog_query(attr, uattr); + return tcx_prog_query(attr, uattr, uattr_size); case BPF_NETKIT_PRIMARY: case BPF_NETKIT_PEER: - return netkit_prog_query(attr, uattr); + return netkit_prog_query(attr, uattr, uattr_size); default: return -EINVAL; } @@ -6260,20 +6260,30 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) err = bpf_prog_detach(&attr); break; case BPF_PROG_QUERY: - err = bpf_prog_query(&attr, uattr.user); + if (size < offsetofend(union bpf_attr, query.prog_cnt)) + return -EINVAL; + err = bpf_prog_query(&attr, uattr.user, size); break; case BPF_PROG_TEST_RUN: + if (size < offsetofend(union bpf_attr, test.duration)) + return -EINVAL; err = bpf_prog_test_run(&attr, uattr.user); break; case BPF_PROG_GET_NEXT_ID: + if (size < offsetofend(union bpf_attr, next_id)) + return -EINVAL; err = bpf_obj_get_next_id(&attr, uattr.user, &prog_idr, &prog_idr_lock); break; case BPF_MAP_GET_NEXT_ID: + if (size < offsetofend(union bpf_attr, next_id)) + return -EINVAL; err = bpf_obj_get_next_id(&attr, uattr.user, &map_idr, &map_idr_lock); break; case BPF_BTF_GET_NEXT_ID: + if (size < offsetofend(union bpf_attr, next_id)) + return -EINVAL; err = bpf_obj_get_next_id(&attr, uattr.user, &btf_idr, &btf_idr_lock); break; @@ -6284,6 +6294,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) err = bpf_map_get_fd_by_id(&attr); break; case BPF_OBJ_GET_INFO_BY_FD: + if (size < offsetofend(union bpf_attr, info.info_len)) + return -EINVAL; err = bpf_obj_get_info_by_fd(&attr, uattr.user); break; case BPF_RAW_TRACEPOINT_OPEN: @@ -6296,22 +6308,32 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) err = bpf_btf_get_fd_by_id(&attr); break; case BPF_TASK_FD_QUERY: + if (size < offsetofend(union bpf_attr, task_fd_query.probe_addr)) + return -EINVAL; err = bpf_task_fd_query(&attr, uattr.user); break; case BPF_MAP_LOOKUP_AND_DELETE_ELEM: err = map_lookup_and_delete_elem(&attr); break; case BPF_MAP_LOOKUP_BATCH: + if (size < offsetofend(union bpf_attr, batch.flags)) + return -EINVAL; err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); break; case BPF_MAP_LOOKUP_AND_DELETE_BATCH: + if (size < offsetofend(union bpf_attr, batch.flags)) + return -EINVAL; err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_AND_DELETE_BATCH); break; case BPF_MAP_UPDATE_BATCH: + if (size < offsetofend(union bpf_attr, batch.flags)) + return -EINVAL; err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); break; case BPF_MAP_DELETE_BATCH: + if (size < offsetofend(union bpf_attr, batch.flags)) + return -EINVAL; err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); break; case BPF_LINK_CREATE: @@ -6324,6 +6346,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) err = bpf_link_get_fd_by_id(&attr); break; case BPF_LINK_GET_NEXT_ID: + if (size < offsetofend(union bpf_attr, next_id)) + return -EINVAL; err = bpf_obj_get_next_id(&attr, uattr.user, &link_idr, &link_idr_lock); break; diff --git a/kernel/bpf/tcx.c b/kernel/bpf/tcx.c index 02db0113b8e7..2a91f6075511 100644 --- a/kernel/bpf/tcx.c +++ b/kernel/bpf/tcx.c @@ -119,7 +119,8 @@ void tcx_uninstall(struct net_device *dev, bool ingress) tcx_entry_free(entry); } -int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) +int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, + u32 uattr_size) { bool ingress = attr->query.attach_type == BPF_TCX_INGRESS; struct net *net = current->nsproxy->net_ns; @@ -132,7 +133,7 @@ int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) ret = -ENODEV; goto out; } - ret = bpf_mprog_query(attr, uattr, tcx_entry_fetch(dev, ingress)); + ret = bpf_mprog_query(attr, uattr, uattr_size, tcx_entry_fetch(dev, ingress)); out: rtnl_unlock(); return ret; -- 2.54.0.563.g4f69b47b94-goog

