This revision was automatically updated to reflect the committed changes.
Closed by commit rG03d8cd1d722d: [lldb][AArch64] Add support for SME's SVE 
streaming mode registers (authored by DavidSpickett).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154926/new/

https://reviews.llvm.org/D154926

Files:
  lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
  lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
  lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
  lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
  lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile
  
lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py
  lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c

Index: lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c
===================================================================
--- /dev/null
+++ lldb/test/API/commands/register/register/aarch64_sve_simd_registers/main.c
@@ -0,0 +1,108 @@
+#include <stdint.h>
+#include <sys/prctl.h>
+
+void write_simd_regs() {
+#define WRITE_SIMD(NUM)                                                        \
+  asm volatile("MOV v" #NUM ".d[0], %0\n\t"                                    \
+               "MOV v" #NUM ".d[1], %0\n\t" ::"r"(NUM))
+
+  WRITE_SIMD(0);
+  WRITE_SIMD(1);
+  WRITE_SIMD(2);
+  WRITE_SIMD(3);
+  WRITE_SIMD(4);
+  WRITE_SIMD(5);
+  WRITE_SIMD(6);
+  WRITE_SIMD(7);
+  WRITE_SIMD(8);
+  WRITE_SIMD(9);
+  WRITE_SIMD(10);
+  WRITE_SIMD(11);
+  WRITE_SIMD(12);
+  WRITE_SIMD(13);
+  WRITE_SIMD(14);
+  WRITE_SIMD(15);
+  WRITE_SIMD(16);
+  WRITE_SIMD(17);
+  WRITE_SIMD(18);
+  WRITE_SIMD(19);
+  WRITE_SIMD(20);
+  WRITE_SIMD(21);
+  WRITE_SIMD(22);
+  WRITE_SIMD(23);
+  WRITE_SIMD(24);
+  WRITE_SIMD(25);
+  WRITE_SIMD(26);
+  WRITE_SIMD(27);
+  WRITE_SIMD(28);
+  WRITE_SIMD(29);
+  WRITE_SIMD(30);
+  WRITE_SIMD(31);
+}
+
+unsigned verify_simd_regs() {
+  uint64_t got_low = 0;
+  uint64_t got_high = 0;
+  uint64_t target = 0;
+
+#define VERIFY_SIMD(NUM)                                                       \
+  do {                                                                         \
+    got_low = 0;                                                               \
+    got_high = 0;                                                              \
+    asm volatile("MOV %0, v" #NUM ".d[0]\n\t"                                  \
+                 "MOV %1, v" #NUM ".d[1]\n\t"                                  \
+                 : "=r"(got_low), "=r"(got_high));                             \
+    target = NUM + 1;                                                          \
+    if ((got_low != target) || (got_high != target))                           \
+      return 1;                                                                \
+  } while (0)
+
+  VERIFY_SIMD(0);
+  VERIFY_SIMD(1);
+  VERIFY_SIMD(2);
+  VERIFY_SIMD(3);
+  VERIFY_SIMD(4);
+  VERIFY_SIMD(5);
+  VERIFY_SIMD(6);
+  VERIFY_SIMD(7);
+  VERIFY_SIMD(8);
+  VERIFY_SIMD(9);
+  VERIFY_SIMD(10);
+  VERIFY_SIMD(11);
+  VERIFY_SIMD(12);
+  VERIFY_SIMD(13);
+  VERIFY_SIMD(14);
+  VERIFY_SIMD(15);
+  VERIFY_SIMD(16);
+  VERIFY_SIMD(17);
+  VERIFY_SIMD(18);
+  VERIFY_SIMD(19);
+  VERIFY_SIMD(20);
+  VERIFY_SIMD(21);
+  VERIFY_SIMD(22);
+  VERIFY_SIMD(23);
+  VERIFY_SIMD(24);
+  VERIFY_SIMD(25);
+  VERIFY_SIMD(26);
+  VERIFY_SIMD(27);
+  VERIFY_SIMD(28);
+  VERIFY_SIMD(29);
+  VERIFY_SIMD(30);
+  VERIFY_SIMD(31);
+
+  return 0;
+}
+
+int main() {
+#ifdef SSVE
+  asm volatile("msr  s0_3_c4_c7_3, xzr" /*smstart*/);
+#elif defined SVE
+  // Make the non-streaming SVE registers active.
+  asm volatile("cpy  z0.b, p0/z, #1\n\t");
+#endif
+  // else test plain SIMD access.
+
+  write_simd_regs();
+
+  return verify_simd_regs(); // Set a break point here.
+}
Index: lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py
===================================================================
--- /dev/null
+++ lldb/test/API/commands/register/register/aarch64_sve_simd_registers/TestSVESIMDRegisters.py
@@ -0,0 +1,108 @@
+"""
+Test that LLDB correctly reads and writes AArch64 SIMD registers in SVE,
+streaming SVE and normal SIMD modes.
+
+There are a few operating modes and we use different strategies for each:
+* Without SVE, in SIMD mode - read the SIMD regset.
+* With SVE, but SVE is inactive - read the SVE regset, but get SIMD data from it.
+* With SVE, SVE is active - read the SVE regset, use the bottom 128 bits of the
+  Z registers.
+* With streaming SVE active - read the SSVE regset, use the bottom 128 bits of
+  the Z registers.
+
+This text excercise most of those.
+"""
+
+from enum import Enum
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+class Mode(Enum):
+    SIMD = 0
+    SVE = 1
+    SSVE = 2
+
+class SVESIMDRegistersTestCase(TestBase):
+    def get_build_flags(self, mode):
+        cflags = "-march=armv8-a+sve"
+        if mode == Mode.SSVE:
+            cflags += " -DSSVE"
+        elif mode == Mode.SVE:
+            cflags += " -DSVE"
+        # else we want SIMD mode, which processes start up in already.
+
+        return {"CFLAGS_EXTRAS": cflags}
+
+    def skip_if_needed(self, mode):
+        if (mode == Mode.SVE) and not self.isAArch64SVE():
+            self.skipTest("SVE registers must be supported.")
+
+        if (mode == Mode.SSVE) and not self.isAArch64SME():
+            self.skipTest("SSVE registers must be supported.")
+
+    def make_simd_value(self, n):
+        pad = " ".join(["0x00"] * 7)
+        return "{{0x{:02x} {} 0x{:02x} {}}}".format(n, pad, n, pad)
+
+    def sve_simd_registers_impl(self, mode):
+        self.skip_if_needed(mode)
+
+        self.build(dictionary=self.get_build_flags(mode))
+        self.line = line_number("main.c", "// Set a break point here.")
+
+        exe = self.getBuildArtifact("a.out")
+        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+
+        lldbutil.run_break_set_by_file_and_line(
+            self, "main.c", self.line, num_expected_locations=1
+        )
+        self.runCmd("run", RUN_SUCCEEDED)
+
+        self.expect(
+            "thread backtrace",
+            STOPPED_DUE_TO_BREAKPOINT,
+            substrs=["stop reason = breakpoint 1."],
+        )
+
+        # These are 128 bit registers, so getting them from the API as unsigned
+        # values doesn't work. Check the command output instead.
+        for i in range(32):
+            self.expect("register read v{}".format(i),
+                substrs=[self.make_simd_value(i)])
+
+        # Write a new set of values. The kernel will move the program back to
+        # non-streaming mode here.
+        for i in range(32):
+            self.runCmd("register write v{} \"{}\"".format(
+                i, self.make_simd_value(i+1)))
+
+        # Should be visible within lldb.
+        for i in range(32):
+            self.expect("register read v{}".format(i),
+                substrs=[self.make_simd_value(i+1)])
+
+        # The program should agree with lldb.
+        self.expect("continue", substrs=["exited with status = 0"])
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_simd_registers_sve(self):
+        """Test read/write of SIMD registers when in SVE mode."""
+        self.sve_simd_registers_impl(Mode.SVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_simd_registers_ssve(self):
+        """Test read/write of SIMD registers when in SSVE mode."""
+        self.sve_simd_registers_impl(Mode.SSVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_simd_registers_simd(self):
+        """Test read/write of SIMD registers when in SIMD mode."""
+        self.sve_simd_registers_impl(Mode.SIMD)
Index: lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile
+++ lldb/test/API/commands/register/register/aarch64_sve_simd_registers/Makefile
@@ -1,5 +1,3 @@
 C_SOURCES := main.c
 
-CFLAGS_EXTRAS := -march=armv8-a+sve
-
 include Makefile.rules
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/main.c
@@ -1,6 +1,15 @@
+#include <stdbool.h>
 #include <sys/prctl.h>
 
+#ifndef PR_SME_SET_VL
+#define PR_SME_SET_VL 63
+#endif
+
+#define SMSTART() asm volatile("msr  s0_3_c4_c7_3, xzr" /*smstart*/)
+
 void write_sve_regs() {
+  // We assume the smefa64 feature is present, which allows ffr access
+  // in streaming mode.
   asm volatile("setffr\n\t");
   asm volatile("ptrue p0.b\n\t");
   asm volatile("ptrue p1.h\n\t");
@@ -53,18 +62,84 @@
   asm volatile("cpy  z31.b, p15/z, #32\n\t");
 }
 
+// Set some different values so we can tell if lldb correctly returns to the set
+// above after the expression is finished.
+void write_sve_regs_expr() {
+  asm volatile("pfalse p0.b\n\t");
+  asm volatile("wrffr p0.b\n\t");
+  asm volatile("pfalse p1.b\n\t");
+  asm volatile("pfalse p2.b\n\t");
+  asm volatile("pfalse p3.b\n\t");
+  asm volatile("ptrue p4.b\n\t");
+  asm volatile("pfalse p5.b\n\t");
+  asm volatile("pfalse p6.b\n\t");
+  asm volatile("pfalse p7.b\n\t");
+  asm volatile("pfalse p8.b\n\t");
+  asm volatile("ptrue p9.b\n\t");
+  asm volatile("pfalse p10.b\n\t");
+  asm volatile("pfalse p11.b\n\t");
+  asm volatile("pfalse p12.b\n\t");
+  asm volatile("pfalse p13.b\n\t");
+  asm volatile("ptrue p14.b\n\t");
+  asm volatile("pfalse p15.b\n\t");
+
+  asm volatile("cpy  z0.b, p0/z, #2\n\t");
+  asm volatile("cpy  z1.b, p5/z, #3\n\t");
+  asm volatile("cpy  z2.b, p10/z, #4\n\t");
+  asm volatile("cpy  z3.b, p15/z, #5\n\t");
+  asm volatile("cpy  z4.b, p0/z, #6\n\t");
+  asm volatile("cpy  z5.b, p5/z, #7\n\t");
+  asm volatile("cpy  z6.b, p10/z, #8\n\t");
+  asm volatile("cpy  z7.b, p15/z, #9\n\t");
+  asm volatile("cpy  z8.b, p0/z, #10\n\t");
+  asm volatile("cpy  z9.b, p5/z, #11\n\t");
+  asm volatile("cpy  z10.b, p10/z, #12\n\t");
+  asm volatile("cpy  z11.b, p15/z, #13\n\t");
+  asm volatile("cpy  z12.b, p0/z, #14\n\t");
+  asm volatile("cpy  z13.b, p5/z, #15\n\t");
+  asm volatile("cpy  z14.b, p10/z, #16\n\t");
+  asm volatile("cpy  z15.b, p15/z, #17\n\t");
+  asm volatile("cpy  z16.b, p0/z, #18\n\t");
+  asm volatile("cpy  z17.b, p5/z, #19\n\t");
+  asm volatile("cpy  z18.b, p10/z, #20\n\t");
+  asm volatile("cpy  z19.b, p15/z, #21\n\t");
+  asm volatile("cpy  z20.b, p0/z, #22\n\t");
+  asm volatile("cpy  z21.b, p5/z, #23\n\t");
+  asm volatile("cpy  z22.b, p10/z, #24\n\t");
+  asm volatile("cpy  z23.b, p15/z, #25\n\t");
+  asm volatile("cpy  z24.b, p0/z, #26\n\t");
+  asm volatile("cpy  z25.b, p5/z, #27\n\t");
+  asm volatile("cpy  z26.b, p10/z, #28\n\t");
+  asm volatile("cpy  z27.b, p15/z, #29\n\t");
+  asm volatile("cpy  z28.b, p0/z, #30\n\t");
+  asm volatile("cpy  z29.b, p5/z, #31\n\t");
+  asm volatile("cpy  z30.b, p10/z, #32\n\t");
+  asm volatile("cpy  z31.b, p15/z, #33\n\t");
+}
+
 // This function will be called using jitted expression call. We change vector
 // length and write SVE registers. Our program context should restore to
 // orignal vector length and register values after expression evaluation.
-int expr_eval_func() {
-  prctl(PR_SVE_SET_VL, 8 * 2);
-  write_sve_regs();
-  prctl(PR_SVE_SET_VL, 8 * 4);
-  write_sve_regs();
+int expr_eval_func(bool streaming) {
+  int SET_VL_OPT = streaming ? PR_SME_SET_VL : PR_SVE_SET_VL;
+  prctl(SET_VL_OPT, 8 * 2);
+  // Note that doing a syscall brings you back to non-streaming mode, so we
+  // don't need to SMSTOP here.
+  if (streaming)
+    SMSTART();
+  write_sve_regs_expr();
+  prctl(SET_VL_OPT, 8 * 4);
+  if (streaming)
+    SMSTART();
+  write_sve_regs_expr();
   return 1;
 }
 
 int main() {
+#ifdef START_SSVE
+  SMSTART();
+#endif
   write_sve_regs();
+
   return 0; // Set a break point here.
 }
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/TestSVERegisters.py
@@ -2,11 +2,15 @@
 Test the AArch64 SVE registers.
 """
 
+from enum import Enum
 import lldb
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
 
+class Mode(Enum):
+    SVE = 0
+    SSVE = 1
 
 class RegisterCommandsTestCase(TestBase):
     def check_sve_register_size(self, set, name, expected):
@@ -61,20 +65,28 @@
 
         self.expect("register read " + "ffr", substrs=[p_regs_value])
 
-    @no_debug_info_test
-    @skipIf(archs=no_match(["aarch64"]))
-    @skipIf(oslist=no_match(["linux"]))
-    def test_sve_registers_configuration(self):
-        """Test AArch64 SVE registers size configuration."""
-        self.build()
+    def get_build_flags(self, mode):
+        cflags = "-march=armv8-a+sve"
+        if mode == Mode.SSVE:
+            cflags += " -DSTART_SSVE"
+        return {"CFLAGS_EXTRAS": cflags}
+
+    def skip_if_needed(self, mode):
+        if (mode == Mode.SVE) and not self.isAArch64SVE():
+            self.skipTest("SVE registers must be supported.")
+
+        if (mode == Mode.SSVE) and not self.isAArch64SME():
+            self.skipTest("SSVE registers must be supported.")
+
+    def sve_registers_configuration_impl(self, mode):
+        self.skip_if_needed(mode)
+
+        self.build(dictionary=self.get_build_flags(mode))
         self.line = line_number("main.c", "// Set a break point here.")
 
         exe = self.getBuildArtifact("a.out")
         self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
 
-        if not self.isAArch64SVE():
-            self.skipTest("SVE registers must be supported.")
-
         lldbutil.run_break_set_by_file_and_line(
             self, "main.c", self.line, num_expected_locations=1
         )
@@ -91,26 +103,17 @@
         thread = process.GetThreadAtIndex(0)
         currentFrame = thread.GetFrameAtIndex(0)
 
-        has_sve = False
-        for registerSet in currentFrame.GetRegisters():
-            if "Scalable Vector Extension Registers" in registerSet.GetName():
-                has_sve = True
-
         registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
-
-        sve_registers = registerSets.GetValueAtIndex(2)
-
-        vg_reg = sve_registers.GetChildMemberWithName("vg")
+        sve_registers = registerSets.GetFirstValueByName("Scalable Vector Extension Registers")
+        self.assertTrue(sve_registers)
 
         vg_reg_value = sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned()
 
         z_reg_size = vg_reg_value * 8
-
-        p_reg_size = z_reg_size / 8
-
         for i in range(32):
             self.check_sve_register_size(sve_registers, "z%i" % (i), z_reg_size)
 
+        p_reg_size = z_reg_size / 8
         for i in range(16):
             self.check_sve_register_size(sve_registers, "p%i" % (i), p_reg_size)
 
@@ -119,17 +122,26 @@
     @no_debug_info_test
     @skipIf(archs=no_match(["aarch64"]))
     @skipIf(oslist=no_match(["linux"]))
-    def test_sve_registers_read_write(self):
-        """Test AArch64 SVE registers read and write."""
-        self.build()
-        self.line = line_number("main.c", "// Set a break point here.")
+    def test_sve_registers_configuration(self):
+        """Test AArch64 SVE registers size configuration."""
+        self.sve_registers_configuration_impl(Mode.SVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_ssve_registers_configuration(self):
+        """Test AArch64 SSVE registers size configuration."""
+        self.sve_registers_configuration_impl(Mode.SSVE)
+
+    def sve_registers_read_write_impl(self, start_mode, eval_mode):
+        self.skip_if_needed(start_mode)
+        self.skip_if_needed(eval_mode)
+        self.build(dictionary=self.get_build_flags(start_mode))
 
         exe = self.getBuildArtifact("a.out")
         self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
 
-        if not self.isAArch64SVE():
-            self.skipTest("SVE registers must be supported.")
-
+        self.line = line_number("main.c", "// Set a break point here.")
         lldbutil.run_break_set_by_file_and_line(
             self, "main.c", self.line, num_expected_locations=1
         )
@@ -143,34 +155,55 @@
 
         target = self.dbg.GetSelectedTarget()
         process = target.GetProcess()
-        thread = process.GetThreadAtIndex(0)
-        currentFrame = thread.GetFrameAtIndex(0)
-
-        has_sve = False
-        for registerSet in currentFrame.GetRegisters():
-            if "Scalable Vector Extension Registers" in registerSet.GetName():
-                has_sve = True
 
         registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
-
-        sve_registers = registerSets.GetValueAtIndex(2)
-
-        vg_reg = sve_registers.GetChildMemberWithName("vg")
+        sve_registers = registerSets.GetFirstValueByName("Scalable Vector Extension Registers")
+        self.assertTrue(sve_registers)
 
         vg_reg_value = sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned()
-
         z_reg_size = vg_reg_value * 8
-
         self.check_sve_regs_read(z_reg_size)
 
         # Evaluate simple expression and print function expr_eval_func address.
         self.expect("expression expr_eval_func", substrs=["= 0x"])
 
         # Evaluate expression call function expr_eval_func.
-        self.expect_expr("expr_eval_func()", result_type="int", result_value="1")
+        self.expect_expr("expr_eval_func({})".format(
+            "true" if (eval_mode == Mode.SSVE) else "false"), result_type="int",
+            result_value="1")
 
         # We called a jitted function above which must not have changed SVE
         # vector length or register values.
         self.check_sve_regs_read(z_reg_size)
 
         self.check_sve_regs_read_after_write(z_reg_size)
+
+    # The following tests all setup some register values then evaluate an
+    # expression. After the expression, the mode and register values should be
+    # the same as before. Finally they read/write some values in the registers.
+    # The only difference is the mode we start the program in, and the mode
+    # the expression function uses.
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_registers_expr_read_write_sve_sve(self):
+        self.sve_registers_read_write_impl(Mode.SVE, Mode.SVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_registers_expr_read_write_ssve_ssve(self):
+        self.sve_registers_read_write_impl(Mode.SSVE, Mode.SSVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_registers_expr_read_write_sve_ssve(self):
+        self.sve_registers_read_write_impl(Mode.SVE, Mode.SSVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_registers_expr_read_write_ssve_sve(self):
+        self.sve_registers_read_write_impl(Mode.SSVE, Mode.SVE)
\ No newline at end of file
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_static_config/Makefile
@@ -1,5 +1,3 @@
 C_SOURCES := main.c
 
-CFLAGS_EXTRAS := -march=armv8-a+sve
-
 include Makefile.rules
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/main.c
@@ -1,6 +1,12 @@
 #include <pthread.h>
 #include <sys/prctl.h>
 
+#ifndef PR_SME_SET_VL
+#define PR_SME_SET_VL 63
+#endif
+
+#define SMSTART() asm volatile("msr  s0_3_c4_c7_3, xzr" /*smstart*/)
+
 static inline void write_sve_registers() {
   asm volatile("setffr\n\t");
   asm volatile("ptrue p0.b\n\t");
@@ -54,26 +60,41 @@
   asm volatile("cpy  z31.b, p15/z, #32\n\t");
 }
 
+int SET_VL_OPT = PR_SVE_SET_VL;
+
 void *threadX_func(void *x_arg) {
-  prctl(PR_SVE_SET_VL, 8 * 4);
+  prctl(SET_VL_OPT, 8 * 4);
+#ifdef USE_SSVE
+  SMSTART();
+#endif
   write_sve_registers();
   write_sve_registers(); // Thread X breakpoint 1
   return NULL;           // Thread X breakpoint 2
 }
 
 void *threadY_func(void *y_arg) {
-  prctl(PR_SVE_SET_VL, 8 * 2);
+  prctl(SET_VL_OPT, 8 * 2);
+#ifdef USE_SSVE
+  SMSTART();
+#endif
   write_sve_registers();
   write_sve_registers(); // Thread Y breakpoint 1
   return NULL;           // Thread Y breakpoint 2
 }
 
 int main() {
+#ifdef USE_SSVE
+  SET_VL_OPT = PR_SME_SET_VL;
+#endif
+
   /* this variable is our reference to the second thread */
   pthread_t x_thread, y_thread;
 
   /* Set vector length to 8 and write SVE registers values */
-  prctl(PR_SVE_SET_VL, 8 * 8);
+  prctl(SET_VL_OPT, 8 * 8);
+#ifdef USE_SSVE
+  SMSTART();
+#endif
   write_sve_registers();
 
   /* create a second thread which executes with argument x */
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
@@ -1,5 +1,6 @@
 """
-Test the AArch64 SVE registers dynamic resize with multiple threads.
+Test the AArch64 SVE and Streaming SVE (SSVE) registers dynamic resize with
+multiple threads.
 
 This test assumes a minimum supported vector length (VL) of 256 bits
 and will test 512 bits if possible. We refer to "vg" which is the
@@ -7,11 +8,15 @@
 the same as a vg of 4.
 """
 
+from enum import Enum
 import lldb
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
 
+class Mode(Enum):
+    SVE = 0
+    SSVE = 1
 
 class RegisterCommandsTestCase(TestBase):
     def get_supported_vg(self):
@@ -45,6 +50,9 @@
             if not self.res.GetError():
                 supported_vg.append(vg)
 
+        self.runCmd("breakpoint delete 1")
+        self.runCmd("continue")
+
         return supported_vg
 
     def check_sve_registers(self, vg_test_value):
@@ -88,24 +96,24 @@
 
         self.expect("register read ffr", substrs=[p_regs_value])
 
-    @no_debug_info_test
-    @skipIf(archs=no_match(["aarch64"]))
-    @skipIf(oslist=no_match(["linux"]))
-    def test_sve_registers_dynamic_config(self):
-        """Test AArch64 SVE registers multi-threaded dynamic resize."""
-
-        if not self.isAArch64SVE():
+    def run_sve_test(self, mode):
+        if (mode == Mode.SVE) and not self.isAArch64SVE():
             self.skipTest("SVE registers must be supported.")
 
+        if (mode == Mode.SSVE) and not self.isAArch64SME():
+            self.skipTest("Streaming SVE registers must be supported.")
+
+        cflags = "-march=armv8-a+sve -lpthread"
+        if mode == Mode.SSVE:
+            cflags += " -DUSE_SSVE"
+        self.build(dictionary={"CFLAGS_EXTRAS": cflags})
+
         self.build()
         supported_vg = self.get_supported_vg()
 
         if not (2 in supported_vg and 4 in supported_vg):
             self.skipTest("Not all required SVE vector lengths are supported.")
 
-        exe = self.getBuildArtifact("a.out")
-        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
-
         main_thread_stop_line = line_number("main.c", "// Break in main thread")
         lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line)
 
@@ -176,3 +184,17 @@
             elif stopped_at_line_number == thY_break_line2:
                 self.runCmd("thread select %d" % (idx + 1))
                 self.check_sve_registers(4)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_sve_registers_dynamic_config(self):
+        """Test AArch64 SVE registers multi-threaded dynamic resize."""
+        self.run_sve_test(Mode.SVE)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_ssve_registers_dynamic_config(self):
+        """Test AArch64 SSVE registers multi-threaded dynamic resize."""
+        self.run_sve_test(Mode.SSVE)
Index: lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
===================================================================
--- lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
+++ lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
@@ -15,7 +15,7 @@
 #include "lldb/lldb-private.h"
 #include <map>
 
-enum class SVEState { Unknown, Disabled, FPSIMD, Full };
+enum class SVEState : uint8_t { Unknown, Disabled, FPSIMD, Full, Streaming };
 
 class RegisterInfoPOSIX_arm64
     : public lldb_private::RegisterInfoAndSetInterface {
@@ -26,9 +26,10 @@
   enum {
     eRegsetMaskDefault = 0,
     eRegsetMaskSVE = 1,
-    eRegsetMaskPAuth = 2,
-    eRegsetMaskMTE = 4,
-    eRegsetMaskTLS = 8,
+    eRegsetMaskSSVE = 2,
+    eRegsetMaskPAuth = 4,
+    eRegsetMaskMTE = 8,
+    eRegsetMaskTLS = 16,
     eRegsetMaskDynamic = ~1,
   };
 
@@ -115,6 +116,7 @@
   }
 
   bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); }
+  bool IsSSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSSVE); }
   bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); }
   bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); }
 
Index: lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
===================================================================
--- lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
+++ lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
@@ -212,7 +212,7 @@
     // dynamic register set like MTE, Pointer Authentication regset then we need
     // to create dynamic register infos and regset array. Push back all optional
     // register infos and regset and calculate register offsets accordingly.
-    if (m_opt_regsets.AllSet(eRegsetMaskSVE)) {
+    if (m_opt_regsets.AnySet(eRegsetMaskSVE | eRegsetMaskSSVE)) {
       m_register_info_p = g_register_infos_arm64_sve_le;
       m_register_info_count = sve_ffr + 1;
       m_per_regset_regnum_range[m_register_set_count++] =
Index: lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
===================================================================
--- lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
+++ lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
@@ -149,7 +149,7 @@
 
   void *GetTLSTPIDR() { return &m_tls_tpidr_reg; }
 
-  void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); };
+  void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); }
 
   size_t GetSVEHeaderSize() { return sizeof(m_sve_header); }
 
@@ -157,6 +157,8 @@
 
   size_t GetSVEBufferSize() { return m_sve_ptrace_payload.size(); }
 
+  unsigned GetSVERegSet();
+
   size_t GetMTEControlSize() { return sizeof(m_mte_ctrl_reg); }
 
   size_t GetTLSTPIDRSize() { return sizeof(m_tls_tpidr_reg); }
Index: lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
===================================================================
--- lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
+++ lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
@@ -36,6 +36,11 @@
 #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension */
 #endif
 
+#ifndef NT_ARM_SSVE
+#define NT_ARM_SSVE                                                            \
+  0x40b /* ARM Scalable Matrix Extension, Streaming SVE mode */
+#endif
+
 #ifndef NT_ARM_PAC_MASK
 #define NT_ARM_PAC_MASK 0x406 /* Pointer authentication code masks */
 #endif
@@ -71,9 +76,20 @@
     if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET,
                                           native_thread.GetID(), &regset,
                                           &ioVec, sizeof(sve_header))
-            .Success())
+            .Success()) {
       opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSVE);
 
+      // We may also have the Scalable Matrix Extension (SME) which adds a
+      // streaming SVE mode.
+      ioVec.iov_len = sizeof(sve_header);
+      regset = NT_ARM_SSVE;
+      if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET,
+                                            native_thread.GetID(), &regset,
+                                            &ioVec, sizeof(sve_header))
+              .Success())
+        opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSSVE);
+    }
+
     NativeProcessLinux &process = native_thread.GetProcess();
 
     std::optional<uint64_t> auxv_at_hwcap =
@@ -134,7 +150,7 @@
   m_mte_ctrl_is_valid = false;
   m_tls_tpidr_is_valid = false;
 
-  if (GetRegisterInfo().IsSVEEnabled())
+  if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled())
     m_sve_state = SVEState::Unknown;
   else
     m_sve_state = SVEState::Disabled;
@@ -203,25 +219,27 @@
       assert(offset < GetFPRSize());
       src = (uint8_t *)GetFPRBuffer() + offset;
     } else {
-      // SVE enabled, we will read and cache SVE ptrace data
+      // SVE or SSVE enabled, we will read and cache SVE ptrace data.
+      // In SIMD or Full mode, the data comes from the SVE regset. In streaming
+      // mode it comes from the streaming SVE regset.
       error = ReadAllSVE();
       if (error.Fail())
         return error;
 
       // FPSR and FPCR will be located right after Z registers in
-      // SVEState::FPSIMD while in SVEState::Full they will be located at the
-      // end of register data after an alignment correction based on currently
-      // selected vector length.
+      // SVEState::FPSIMD while in SVEState::Full or SVEState::Streaming they
+      // will be located at the end of register data after an alignment
+      // correction based on currently selected vector length.
       uint32_t sve_reg_num = LLDB_INVALID_REGNUM;
       if (reg == GetRegisterInfo().GetRegNumFPSR()) {
         sve_reg_num = reg;
-        if (m_sve_state == SVEState::Full)
+        if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming)
           offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl));
         else if (m_sve_state == SVEState::FPSIMD)
           offset = sve::ptrace_fpsimd_offset + (32 * 16);
       } else if (reg == GetRegisterInfo().GetRegNumFPCR()) {
         sve_reg_num = reg;
-        if (m_sve_state == SVEState::Full)
+        if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming)
           offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl));
         else if (m_sve_state == SVEState::FPSIMD)
           offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4;
@@ -344,25 +362,25 @@
 
       return WriteFPR();
     } else {
-      // SVE enabled, we will read and cache SVE ptrace data
+      // SVE enabled, we will read and cache SVE ptrace data.
       error = ReadAllSVE();
       if (error.Fail())
         return error;
 
       // FPSR and FPCR will be located right after Z registers in
-      // SVEState::FPSIMD while in SVEState::Full they will be located at the
-      // end of register data after an alignment correction based on currently
-      // selected vector length.
+      // SVEState::FPSIMD while in SVEState::Full or SVEState::Streaming they
+      // will be located at the end of register data after an alignment
+      // correction based on currently selected vector length.
       uint32_t sve_reg_num = LLDB_INVALID_REGNUM;
       if (reg == GetRegisterInfo().GetRegNumFPSR()) {
         sve_reg_num = reg;
-        if (m_sve_state == SVEState::Full)
+        if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming)
           offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl));
         else if (m_sve_state == SVEState::FPSIMD)
           offset = sve::ptrace_fpsimd_offset + (32 * 16);
       } else if (reg == GetRegisterInfo().GetRegNumFPCR()) {
         sve_reg_num = reg;
-        if (m_sve_state == SVEState::Full)
+        if (m_sve_state == SVEState::Full || m_sve_state == SVEState::Streaming)
           offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl));
         else if (m_sve_state == SVEState::FPSIMD)
           offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4;
@@ -479,9 +497,10 @@
 
 Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues(
     lldb::WritableDataBufferSP &data_sp) {
-  // AArch64 register data must contain GPRs, either FPR or SVE registers
-  // and optional MTE register. Pointer Authentication (PAC) registers are
-  // read-only and will be skiped.
+  // AArch64 register data must contain GPRs and either FPR or SVE registers.
+  // SVE registers can be non-streaming (aka SVE) or streaming (aka SSVE).
+  // Finally an optional MTE register. Pointer Authentication (PAC) registers
+  // are read-only and will be skipped.
 
   // In order to create register data checkpoint we first read all register
   // values if not done already and calculate total size of register set data.
@@ -495,8 +514,10 @@
     return error;
 
   // If SVE is enabled we need not copy FPR separately.
-  if (GetRegisterInfo().IsSVEEnabled()) {
+  if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) {
     reg_data_byte_size += GetSVEBufferSize();
+    // Also store the current SVE mode.
+    reg_data_byte_size += sizeof(uint32_t);
     error = ReadAllSVE();
   } else {
     reg_data_byte_size += GetFPRSize();
@@ -524,7 +545,9 @@
   ::memcpy(dst, GetGPRBuffer(), GetGPRBufferSize());
   dst += GetGPRBufferSize();
 
-  if (GetRegisterInfo().IsSVEEnabled()) {
+  if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) {
+    *dst = static_cast<uint8_t>(m_sve_state);
+    dst += sizeof(m_sve_state);
     ::memcpy(dst, GetSVEBuffer(), GetSVEBufferSize());
     dst += GetSVEBufferSize();
   } else {
@@ -543,8 +566,8 @@
 Status NativeRegisterContextLinux_arm64::WriteAllRegisterValues(
     const lldb::DataBufferSP &data_sp) {
   // AArch64 register data must contain GPRs, either FPR or SVE registers
-  // and optional MTE register. Pointer Authentication (PAC) registers are
-  // read-only and will be skiped.
+  // (which can be streaming or non-streaming) and optional MTE register.
+  // Pointer Authentication (PAC) registers are read-only and will be skipped.
 
   // We store all register values in data_sp by copying full PTrace data that
   // corresponds to register sets enabled by current register context. In order
@@ -594,6 +617,10 @@
       (data_sp->GetByteSize() > (reg_data_min_size + GetSVEHeaderSize()));
 
   if (contains_sve_reg_data) {
+    // Restore to the correct mode, streaming or not.
+    m_sve_state = static_cast<SVEState>(*src);
+    src += sizeof(m_sve_state);
+
     // We have SVE register data first write SVE header.
     ::memcpy(GetSVEHeader(), src, GetSVEHeaderSize());
     if (!sve::vl_valid(m_sve_header.vl)) {
@@ -824,6 +851,10 @@
   ConfigureRegisterContext();
 }
 
+unsigned NativeRegisterContextLinux_arm64::GetSVERegSet() {
+  return m_sve_state == SVEState::Streaming ? NT_ARM_SSVE : NT_ARM_SVE;
+}
+
 Status NativeRegisterContextLinux_arm64::ReadSVEHeader() {
   Status error;
 
@@ -834,7 +865,7 @@
   ioVec.iov_base = GetSVEHeader();
   ioVec.iov_len = GetSVEHeaderSize();
 
-  error = ReadRegisterSet(&ioVec, GetSVEHeaderSize(), NT_ARM_SVE);
+  error = ReadRegisterSet(&ioVec, GetSVEHeaderSize(), GetSVERegSet());
 
   if (error.Success())
     m_sve_header_is_valid = true;
@@ -875,12 +906,11 @@
   m_sve_header_is_valid = false;
   m_fpu_is_valid = false;
 
-  return WriteRegisterSet(&ioVec, GetSVEHeaderSize(), NT_ARM_SVE);
+  return WriteRegisterSet(&ioVec, GetSVEHeaderSize(), GetSVERegSet());
 }
 
 Status NativeRegisterContextLinux_arm64::ReadAllSVE() {
   Status error;
-
   if (m_sve_buffer_is_valid)
     return error;
 
@@ -888,7 +918,7 @@
   ioVec.iov_base = GetSVEBuffer();
   ioVec.iov_len = GetSVEBufferSize();
 
-  error = ReadRegisterSet(&ioVec, GetSVEBufferSize(), NT_ARM_SVE);
+  error = ReadRegisterSet(&ioVec, GetSVEBufferSize(), GetSVERegSet());
 
   if (error.Success())
     m_sve_buffer_is_valid = true;
@@ -912,7 +942,7 @@
   m_sve_header_is_valid = false;
   m_fpu_is_valid = false;
 
-  return WriteRegisterSet(&ioVec, GetSVEBufferSize(), NT_ARM_SVE);
+  return WriteRegisterSet(&ioVec, GetSVEBufferSize(), GetSVERegSet());
 }
 
 Status NativeRegisterContextLinux_arm64::ReadMTEControl() {
@@ -985,21 +1015,43 @@
 
 void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() {
   // ConfigureRegisterContext gets called from InvalidateAllRegisters
-  // on every stop and configures SVE vector length.
+  // on every stop and configures SVE vector length and whether we are in
+  // streaming SVE mode.
   // If m_sve_state is set to SVEState::Disabled on first stop, code below will
   // be deemed non operational for the lifetime of current process.
   if (!m_sve_header_is_valid && m_sve_state != SVEState::Disabled) {
+    // If we have SVE we may also have the SVE streaming mode that SME added.
+    // We can read the header of either mode, but only the active mode will
+    // have valid register data.
+
+    // Check whether SME is present and the streaming SVE mode is active.
+    m_sve_header_is_valid = false;
+    m_sve_buffer_is_valid = false;
+    m_sve_state = SVEState::Streaming;
     Status error = ReadSVEHeader();
-    if (error.Success()) {
-      // If SVE is enabled thread can switch between SVEState::FPSIMD and
-      // SVEState::Full on every stop.
-      if ((m_sve_header.flags & sve::ptrace_regs_mask) ==
-          sve::ptrace_regs_fpsimd)
-        m_sve_state = SVEState::FPSIMD;
-      else if ((m_sve_header.flags & sve::ptrace_regs_mask) ==
-               sve::ptrace_regs_sve)
-        m_sve_state = SVEState::Full;
 
+    // Streaming mode is active if the header has the SVE active flag set.
+    if (!(error.Success() && ((m_sve_header.flags & sve::ptrace_regs_mask) ==
+                              sve::ptrace_regs_sve))) {
+      // Non-streaming might be active instead.
+      m_sve_header_is_valid = false;
+      m_sve_buffer_is_valid = false;
+      m_sve_state = SVEState::Full;
+      error = ReadSVEHeader();
+      if (error.Success()) {
+        // If SVE is enabled thread can switch between SVEState::FPSIMD and
+        // SVEState::Full on every stop.
+        if ((m_sve_header.flags & sve::ptrace_regs_mask) ==
+            sve::ptrace_regs_fpsimd)
+          m_sve_state = SVEState::FPSIMD;
+        // Else we are in SVEState::Full.
+      } else {
+        m_sve_state = SVEState::Disabled;
+      }
+    }
+
+    if (m_sve_state == SVEState::Full || m_sve_state == SVEState::FPSIMD ||
+        m_sve_state == SVEState::Streaming) {
       // On every stop we configure SVE vector length by calling
       // ConfigureVectorLength regardless of current SVEState of this thread.
       uint32_t vq = RegisterInfoPOSIX_arm64::eVectorQuadwordAArch64SVE;
@@ -1025,7 +1077,9 @@
     const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB];
     sve_reg_offset = sve::ptrace_fpsimd_offset +
                      (reg - GetRegisterInfo().GetRegNumSVEZ0()) * 16;
-  } else if (m_sve_state == SVEState::Full) {
+    // Between non-streaming and streaming mode, the layout is identical.
+  } else if (m_sve_state == SVEState::Full ||
+             m_sve_state == SVEState::Streaming) {
     uint32_t sve_z0_offset = GetGPRSize() + 16;
     sve_reg_offset =
         sve::SigRegsOffset() + reg_info->byte_offset - sve_z0_offset;
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to