On Wed, 23 Oct 2024 at 02:45, Borislav Petkov <b...@alien8.de> wrote:
>
> So I was able to get some info: CLAC/STAC in everything Zen4 and older is
> serializing so there's no issue there.
>
> Zen5 is a different story and AC is being renamed there and thus,
> non-serializing. So we'd need to do something there, perhaps something
> like Josh's patch...
>
> But the good thing is we can alternative it out only for those machines, apart
> from the rest.

Hmm. The problem with alternatives is that it gets increasingly ugly
when there are more than two of them.

And even doing a single alternative with some "don't do this unless X"
then means either extra no-ops or some horrid static branch.

Now, if we ignore LAM, we only have two cases (LA48 vs LA57), but we
know that even if we disable LAM until we do the LASS support, that
third case *will* happen.

Finally - the main reason to use the sign bit is that you don't need
some big constant - and the code is small, so the "shift right by 63
and or" is just two instructions and a couple of bytes.

But once you have an alternative *with* a big constant, that advantage
just goes away because you'll just replace the size the constant takes
up with no-ops instead.

So then you might as well just _always_ use a constant.

Which makes me wonder if we can't just use the runtime-const
infrastructure instead. That has the advantage that you can have any
number of different constants and it won't complicate the usage sites,
and you can easily just update the constant at init time with
runtime_const_init() to an arbitrary value.

So then LAM support becomes just updating the value for
runtime_const_init() - none of the code changes, and no different
alternatives cases.

Something like the attached, in other words. Note that this actually does that

+               if (cpu_feature_enabled(X86_FEATURE_LAM))
+                       USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;

partly to show it as docs, and partly because we should just clear
that X86_FEATURE_LAM bit if LASS isn't set, so I think it's right
anyway.

NOTE! This is obviously untested and I didn't check that it does the
cmp/sbb/or the right way around.

Also, it's worth noting that the cmp/sbb/or sequence (if I *did* get
it the right way around) will set all bits only if the original
address is *larger* than USER_PTR_MAX, but we already subtract
PAGE_SIZE from the actual hardware maximum due to other errata, so
that's fine.

IOW, it will turn USER_PTR_MAX and anything less into itself, but
USER_PTR_MAX+1 (and above) becomes all ones.

                   Linus
 arch/x86/include/asm/uaccess_64.h | 17 ++++++++++++++++-
 arch/x86/kernel/cpu/common.c      |  6 ++++++
 arch/x86/kernel/vmlinux.lds.S     |  1 +
 arch/x86/lib/getuser.S            |  9 +++++++--
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index afce8ee5d7b7..df1468ada3ed 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -12,6 +12,13 @@
 #include <asm/cpufeatures.h>
 #include <asm/page.h>
 #include <asm/percpu.h>
+#include <asm/runtime-const.h>
+
+/*
+ * Virtual variable: there's no actual backing store for this,
+ * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
+ */
+extern unsigned long USER_PTR_MAX;
 
 #ifdef CONFIG_ADDRESS_MASKING
 /*
@@ -58,7 +65,15 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
  * user_access_begin that can avoid the fencing. This only works
  * for dense accesses starting at the address.
  */
-#define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63)))
+static inline void __user *mask_user_address(const void __user *ptr)
+{
+	void __user *ret;
+	asm("cmp %1,%0; sbb %k0,%k0; or %1,%0"
+		:"=r" (ret)
+		:"r" (ptr),
+		 "0" (runtime_const_ptr(USER_PTR_MAX)));
+	return ret;
+}
 #define masked_user_access_begin(x) ({				\
 	__auto_type __masked_ptr = (x);				\
 	__masked_ptr = mask_user_address(__masked_ptr);		\
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1040cb64841..9d419dc1df12 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -69,6 +69,7 @@
 #include <asm/sev.h>
 #include <asm/tdx.h>
 #include <asm/posted_intr.h>
+#include <asm/runtime-const.h>
 
 #include "cpu.h"
 
@@ -2389,6 +2390,11 @@ void __init arch_cpu_finalize_init(void)
 	alternative_instructions();
 
 	if (IS_ENABLED(CONFIG_X86_64)) {
+		unsigned long USER_PTR_MAX = TASK_SIZE_MAX;
+
+		if (cpu_feature_enabled(X86_FEATURE_LAM))
+			USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
+		runtime_const_init(ptr, USER_PTR_MAX);
 		/*
 		 * Make sure the first 2MB area is not mapped by huge pages
 		 * There are typically fixed size MTRRs in there and overlapping
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 6726be89b7a6..b8c5741d2fb4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -358,6 +358,7 @@ SECTIONS
 #endif
 
 	RUNTIME_CONST_VARIABLES
+	RUNTIME_CONST(ptr, USER_PTR_MAX)
 
 	. = ALIGN(PAGE_SIZE);
 
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index d066aecf8aeb..202d68e3fea0 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -39,8 +39,13 @@
 
 .macro check_range size:req
 .if IS_ENABLED(CONFIG_X86_64)
-	mov %rax, %rdx
-	sar $63, %rdx
+	movq $0x0123456789abcdef,%rdx
+  1:
+  .pushsection runtime_ptr_USER_PTR_MAX,"a"
+	.long 1b - 8 - .
+  .popsection
+	cmp %rax, %rdx
+	sbb %edx, %edx
 	or %rdx, %rax
 .else
 	cmp $TASK_SIZE_MAX-\size+1, %eax

Reply via email to