On Thu, 21 Nov 2024 at 19:57, Linus Torvalds
<torva...@linux-foundation.org> wrote:
>
> Anyway, that was a long way of saying: I really think we should just
> special-case the (few) important cases that get reported. Because any
> *big* improvements will come not from just inlining.

Looking around at the futex code some more, I note:

 - the cmpxchg case and futex ops use an explicit barrier too, which is bad

 - we'd actually be better off inlining not just the user access, but
the whole futex_get_value_locked(), because then the compiler will be
able to do CSE on the user address masking, and only do it once
(several places do multiple different futex_get_value_locked() calls).

iow, I think the fix for the futex case ends up being a patch
something like the attached.

[ Annoyingly, we need "can_do_masked_user_access()" even on x86,
because the 32-bit case doesn't do the address masking trick ]

I've only compiled it so far, about to actually boot into it. Pray for me,

               Linus
 arch/x86/include/asm/futex.h |  8 ++++--
 kernel/futex/core.c          | 22 -----------------
 kernel/futex/futex.h         | 59 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 63 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 99d345b686fa..6e2458088800 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -48,7 +48,9 @@ do {								\
 static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
 		u32 __user *uaddr)
 {
-	if (!user_access_begin(uaddr, sizeof(u32)))
+	if (can_do_masked_user_access())
+		uaddr = masked_user_access_begin(uaddr);
+	else if (!user_access_begin(uaddr, sizeof(u32)))
 		return -EFAULT;
 
 	switch (op) {
@@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 {
 	int ret = 0;
 
-	if (!user_access_begin(uaddr, sizeof(u32)))
+	if (can_do_masked_user_access())
+		uaddr = masked_user_access_begin(uaddr);
+	else if (!user_access_begin(uaddr, sizeof(u32)))
 		return -EFAULT;
 	asm volatile("\n"
 		"1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 326bfe6549d7..9833fdb63e39 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -451,28 +451,6 @@ struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *
 	return NULL;
 }
 
-int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
-{
-	int ret;
-
-	pagefault_disable();
-	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
-	pagefault_enable();
-
-	return ret;
-}
-
-int futex_get_value_locked(u32 *dest, u32 __user *from)
-{
-	int ret;
-
-	pagefault_disable();
-	ret = __get_user(*dest, from);
-	pagefault_enable();
-
-	return ret ? -EFAULT : 0;
-}
-
 /**
  * wait_for_owner_exiting - Block until the owner has exited
  * @ret: owner's current futex lock status
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 8b195d06f4e8..323572014b32 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -6,6 +6,7 @@
 #include <linux/rtmutex.h>
 #include <linux/sched/wake_q.h>
 #include <linux/compat.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_PREEMPT_RT
 #include <linux/rcuwait.h>
@@ -225,10 +226,64 @@ extern bool __futex_wake_mark(struct futex_q *q);
 extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
 
 extern int fault_in_user_writeable(u32 __user *uaddr);
-extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval);
-extern int futex_get_value_locked(u32 *dest, u32 __user *from);
 extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
 
+static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
+{
+	int ret;
+
+	pagefault_disable();
+	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
+	pagefault_enable();
+
+	return ret;
+}
+
+/*
+ * This does a plain atomic user space read, and the user pointer has
+ * already been verified earlier by get_futex_key() to be both aligned
+ * and actually in user space, just like futex_atomic_cmpxchg_inatomic().
+ *
+ * We still want to avoid any speculation, and while __get_user() is
+ * the traditional model for this, it's actually slower then doing
+ * this manually these days.
+ *
+ * We could just have a per-architecture special function for it,
+ * the same way we do futex_atomic_cmpxchg_inatomic(), but rather
+ * than force everybody to do that, write it out long-hand using
+ * the low-level user-access infrastructure.
+ *
+ * This looks a bit overkill, but generally just results in a couple
+ * of instructions.
+ */
+static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
+{
+	u32 val;
+
+	if (can_do_masked_user_access())
+		from = masked_user_access_begin(from);
+	else if (!user_read_access_begin(from, sizeof(*from)))
+		return -EFAULT;
+	unsafe_get_user(val, from, Efault);
+	user_access_end();
+	*dest = val;
+	return 0;
+Efault:
+	user_access_end();
+	return -EFAULT;
+}
+
+static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
+{
+	int ret;
+
+	pagefault_disable();
+	ret = futex_read_inatomic(dest, from);
+	pagefault_enable();
+
+	return ret;
+}
+
 extern void __futex_unqueue(struct futex_q *q);
 extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
 extern int futex_unqueue(struct futex_q *q);

Reply via email to