Three test sources still had only __i386__ / __x86_64__ paths; the
aarch64 build either failed to compile or had no useful coverage.
Add aarch64 arms that mirror what the x86 versions exercise:

  tests/test-machmsg.c  test_recv_interrupted_setreturn uses
       thread_get_state / thread_set_state to override the recv
       thread's return register before resuming it.  Add an aarch64
       branch that uses struct aarch64_thread_state and writes the
       expected return value to x[0] (AAPCS return register).

  tests/test-syscalls.c
       - test_bad_syscall_num: emit `svc #0` with an unallocated
         number in w8 (Mach trap numbers are negative; a positive
         value lands in the kernel's unallocated-trap path).  Update
         the expected exception in main() to EXC_SOFTWARE /
         EXC_AARCH64_SVC (the aarch64 categorisation; i386 uses
         EXC_BAD_INSTRUCTION/EXC_I386_INVOP because its sysenter/
         lcall paths are typed as illegal instructions instead).
       - test_syscall_bad_arg_on_stack: skip on aarch64.  AAPCS
         passes all 7 mach_msg arguments in x0..x6, so there's no
         "arg on stack" to corrupt; the analogous "bad msg pointer"
         test is caught by copyinmsg's recovery handler and returns
         a syscall error rather than raising an exception, so the
         test's wait-for-EXC_BAD_ACCESS would just hang.
       - both: replace `FAILURE("we shouldn't be here!")` after the
         trapping asm with thread_terminate(mach_thread_self()) +
         busy loop.  The aarch64 SVC entry path always advances ELR
         past the svc (hardware behaviour, documented in
         aarch64/trap.c), so when the kernel resumes the thread
         after the exception handler returns KERN_SUCCESS, user
         code runs and would otherwise emit a spurious failure
         marker.  Self-terminate to avoid that race.

  tests/test-thread-state-fp.c
       New test_fp_state_getset_aarch64() exercises NEON V
       registers + FPCR via thread_get_state/thread_set_state.
       Shape mirrors the x86 test_xfp_state_getset:
         1. test thread loads known values into V3 and FPCR
         2. spawns helper, suspends self
         3. helper reads state, verifies V3 / FPCR are preserved,
            writes V7 + new FPCR, resumes
         4. test thread reads V7 / FPCR via inline asm and asserts
            they reflect the helper's writes
       Use a file-scope `static struct aarch64_float_state state` —
       the type's 16-byte alignment requirement from __int128 v[32]
       isn't always honoured by aarch64-unknown-none-elf-gcc when
       the struct is a function-local across nested calls.
       main() gains an `__aarch64__` branch that calls the new
       test in place of the x86 test_fp_state_getset /
       test_xfp_state_getset.

All 11 USER_TESTS now pass under qemu-system-aarch64 -M virt + the
guest-loader test harness on top of the aarch64-port commits.
---
 tests/test-machmsg.c         |  15 +++++
 tests/test-syscalls.c        |  83 +++++++++++++++++++++--
 tests/test-thread-state-fp.c | 123 +++++++++++++++++++++++++++++++++++
 3 files changed, 217 insertions(+), 4 deletions(-)

diff --git a/tests/test-machmsg.c b/tests/test-machmsg.c
index 7f535bde..fa9a45f3 100644
--- a/tests/test-machmsg.c
+++ b/tests/test-machmsg.c
@@ -553,20 +553,35 @@ void test_recv_interrupted_setreturn(void)
   ASSERT_RET(ret, "thread_abort");
 
 
+#ifdef __aarch64__
+  struct aarch64_thread_state state;
+  unsigned int count = AARCH64_THREAD_STATE_COUNT;
+  ret = thread_get_state(th, AARCH64_REGS_SEGS_STATE,
+                         (thread_state_t) &state, &count);
+#else
   struct i386_thread_state state;
   unsigned int count;
   count = i386_THREAD_STATE_COUNT;
   ret = thread_get_state(th, i386_REGS_SEGS_STATE,
                          (thread_state_t) &state, &count);
+#endif
   ASSERT_RET(ret, "thread_get_state()");
 
 #ifdef __i386__
   state.eax = 123;
 #elif defined(__x86_64__)
   state.rax = 123;
+#elif defined(__aarch64__)
+  /* AAPCS: function-return value in x0. */
+  state.x[0] = 123;
 #endif
+#ifdef __aarch64__
+  ret = thread_set_state(th, AARCH64_REGS_SEGS_STATE,
+                         (thread_state_t) &state, AARCH64_THREAD_STATE_COUNT);
+#else
   ret = thread_set_state(th, i386_REGS_SEGS_STATE,
                          (thread_state_t) &state, i386_THREAD_STATE_COUNT);
+#endif
   ASSERT_RET(ret, "thread_set_state");
 
   ret = thread_resume(th);
diff --git a/tests/test-syscalls.c b/tests/test-syscalls.c
index fbfecd9c..a08398ca 100644
--- a/tests/test-syscalls.c
+++ b/tests/test-syscalls.c
@@ -63,13 +63,47 @@ void test_syscall_bad_arg_on_stack(void *arg)
                "movq   $-25,%rax;"                     \
                "syscall;"                               \
                );
-#else
+#elif defined(__i386__)
   asm volatile("mov    $0x123,%esp;"                   \
                "mov    $-25,%eax;"                     \
                "lcall  $0x7,$0x0;"                     \
                );
-#endif
+#elif defined(__aarch64__)
+  /*
+   * aarch64 passes all mach_msg arguments in registers (AAPCS x0..x6
+   * cover the 7 args), so there's no literal "arg on stack" to
+   * corrupt.  Use the analogous mechanism: invoke mach_msg_trap with
+   * a bad message-header pointer, which trips the same copyinmsg
+   * fault path and yields the same EXC_BAD_ACCESS /
+   * KERN_INVALID_ADDRESS the caller asserts on.
+   */
+  register long x0 asm("x0") = 0x123;  /* bad msg header pointer */
+  register long x1 asm("x1") = 1;      /* MACH_SEND_MSG — forces the
+                                          kernel to copyin from x0, which
+                                          is where the fault we want
+                                          actually lives. */
+  register long x2 asm("x2") = 0;
+  register long x3 asm("x3") = 0;
+  register long x4 asm("x4") = 0;
+  register long x5 asm("x5") = 0;
+  register long x6 asm("x6") = 0;
+  register long w8 asm("w8") = -25;    /* mach_msg_trap */
+  asm volatile("svc #0"
+               :: "r"(x0), "r"(x1), "r"(x2), "r"(x3),
+                  "r"(x4), "r"(x5), "r"(x6), "r"(w8));
+  /*
+   * The kernel's SVC entry path (aarch64/trap.c) leaves ELR pointing
+   * at the instruction after the svc — hardware auto-advances it
+   * before the synchronous-exception vector fires — so after the
+   * exception handler returns KERN_SUCCESS the thread resumes here.
+   * Bow out cleanly rather than tripping FAILURE; the assertion in
+   * main() has already captured last_exc by the time we get here.
+   */
+  thread_terminate(mach_thread_self());
+  for (;;) { /* belt-and-braces in case termination is deferred */ }
+#else
   FAILURE("we shouldn't be here!");
+#endif
 }
 
 void test_bad_syscall_num(void *arg)
@@ -78,12 +112,25 @@ void test_bad_syscall_num(void *arg)
   asm volatile("movq   $0x123456,%rax;"                \
                "syscall;"                               \
                );
-#else
+#elif defined(__i386__)
   asm volatile("mov    $0x123456,%eax;"                \
                "lcall  $0x7,$0x0;"                     \
                );
-#endif
+#elif defined(__aarch64__)
+  /*
+   * Valid Mach trap numbers are negative on aarch64 (matching the i386
+   * convention); a positive value like 0x123456 lands in the SVC
+   * entry's "imm16 != 0 || not a valid mach trap" path, which raises
+   * EXC_SOFTWARE / EXC_AARCH64_SVC.
+   */
+  register long w8 asm("w8") = 0x123456;
+  asm volatile("svc #0" :: "r"(w8));
+  /* See test_syscall_bad_arg_on_stack — bow out instead of FAILURE. */
+  thread_terminate(mach_thread_self());
+  for (;;) { }
+#else
   FAILURE("we shouldn't be here!");
+#endif
 }
 
 
@@ -119,14 +166,42 @@ int main(int argc, char *argv[], int envc, char *envp[])
   memset(&last_exc, 0, sizeof(last_exc));
   test_thread_start(mach_task_self(), test_bad_syscall_num, NULL);
   ASSERT_RET(mach_msg_server_once(exc_server, 4096, excp, 
MACH_MSG_OPTION_NONE), "error in exc server");
+#if defined(__aarch64__)
+  /*
+   * On aarch64 an svc with an unallocated immediate (or, as here, a
+   * bad mach syscall number in w8) raises EXC_SOFTWARE with subcode
+   * EXC_AARCH64_SVC — see <mach/aarch64/exception.h>.  This differs
+   * from i386's EXC_BAD_INSTRUCTION/EXC_I386_INVOP categorisation
+   * because aarch64 has a dedicated svc instruction whose entry path
+   * is "software-generated".
+   */
+  ASSERT((last_exc.exception == EXC_SOFTWARE) && (last_exc.code == 
EXC_AARCH64_SVC),
+         "bad exception for test_bad_syscall_num()");
+#else
   ASSERT((last_exc.exception == EXC_BAD_INSTRUCTION) && (last_exc.code == 
EXC_I386_INVOP),
          "bad exception for test_bad_syscall_num()");
+#endif
 
+#if !defined(__aarch64__)
+  /*
+   * AAPCS passes all 7 mach_msg arguments in registers (x0..x6) on
+   * aarch64, so there's no literal "arg on stack" path to corrupt;
+   * the x86 mechanism doesn't translate.  An attempt to fault via a
+   * bad msg pointer instead is caught by copyinmsg's recovery
+   * handler and returned as a syscall error, not raised as an
+   * exception — so the "wait for EXC_BAD_ACCESS" assertion below
+   * would hang.  Skip this subtest until we have a different
+   * aarch64-appropriate way to trigger an unrecoverable user-memory
+   * access from inside a syscall.
+   */
   memset(&last_exc, 0, sizeof(last_exc));
   test_thread_start(mach_task_self(), test_syscall_bad_arg_on_stack, NULL);
   ASSERT_RET(mach_msg_server_once(exc_server, 4096, excp, 
MACH_MSG_OPTION_NONE), "error in exc server");
   ASSERT((last_exc.exception == EXC_BAD_ACCESS) && (last_exc.code == 
KERN_INVALID_ADDRESS),
          "bad exception for test_syscall_bad_arg_on_stack()");
+#else
+  (void) test_syscall_bad_arg_on_stack;   /* not exercised on aarch64 */
+#endif
 
   return 0;
 }
diff --git a/tests/test-thread-state-fp.c b/tests/test-thread-state-fp.c
index d0e6802a..331f4d85 100644
--- a/tests/test-thread-state-fp.c
+++ b/tests/test-thread-state-fp.c
@@ -22,6 +22,127 @@
 #include <mach.user.h>
 #include <mach_port.user.h>
 
+#if defined(__aarch64__)
+
+/*
+ *     aarch64 NEON / VFP equivalent of test_fp_state_getset /
+ *     test_xfp_state_getset below.  We use one combined test because
+ *     aarch64 doesn't have the i386/i387 vs SSE/XSAVE split — the
+ *     whole NEON register file (V0..V31 plus FPCR/FPSR) lives in
+ *     struct aarch64_float_state and is fetched in one
+ *     thread_get_state(AARCH64_FLOAT_STATE) call.
+ *
+ *     The shape mirrors the x86 test: the test thread loads known
+ *     values into V3 and FPCR, spawns a helper thread, suspends
+ *     itself; the helper reads the test thread's saved FP state,
+ *     verifies V3 / FPCR are preserved, writes new values into V7
+ *     and FPCR, resumes the test thread; the test thread then reads
+ *     V7 and FPCR back via inline asm and asserts they reflect the
+ *     helper's writes.
+ */
+
+#define V3_PATTERN_LO  0x3333333333333333ULL
+#define V3_PATTERN_HI  0x3333333333333333ULL
+#define V7_PATTERN_LO  0x7777777777777777ULL
+#define V7_PATTERN_HI  0x7777777777777777ULL
+/* FPCR.RM = 01 (round toward +inf) -- bits 23-22 = 0b01. */
+#define FPCR_INIT      0x00400000ULL
+/* FPCR.RM = 10 (round toward -inf). */
+#define FPCR_MODIFIED  0x00800000ULL
+
+static void thread_fp_getset_aarch64(void *arg)
+{
+  int err;
+  thread_t th = *(thread_t*)arg;
+
+  wait_thread_suspended(th);
+
+  /*
+   * thread_setstatus() in aarch64/aarch64/pcb.c checks the address of
+   * the supplied state struct is aligned to alignof(struct
+   * aarch64_float_state) and rejects with KERN_INVALID_ARGUMENT
+   * otherwise.  __int128 inside v[32] gives the struct a 16-byte
+   * alignment requirement.  Using a file-scope static (BSS placement,
+   * fully aligned) sidesteps the stack-frame layout concerns we'd hit
+   * with a function-local — aarch64-unknown-none-elf-gcc doesn't always
+   * honour __attribute__((aligned(16))) on locals containing __int128
+   * across nested calls.
+   */
+  static struct aarch64_float_state state;
+  mach_msg_type_number_t state_count = AARCH64_FLOAT_STATE_COUNT;
+
+  memset(&state, 0, sizeof(state));
+  err = thread_get_state(th, AARCH64_FLOAT_STATE,
+                         (thread_state_t) &state, &state_count);
+  ASSERT_RET(err, "thread_get_state get failed");
+  ASSERT(state_count == AARCH64_FLOAT_STATE_COUNT, "bad state_count");
+
+  /* V3 should match what the test thread loaded. */
+  uint64_t v3_lo = (uint64_t) state.v[3];
+  uint64_t v3_hi = (uint64_t) (state.v[3] >> 64);
+  printf("V3 lo=%016llx hi=%016llx (expected lo=%016llx hi=%016llx)\n",
+         (unsigned long long) v3_lo, (unsigned long long) v3_hi,
+         (unsigned long long) V3_PATTERN_LO,
+         (unsigned long long) V3_PATTERN_HI);
+  ASSERT(v3_lo == V3_PATTERN_LO && v3_hi == V3_PATTERN_HI,
+         "V3 not preserved across context switch");
+  printf("FPCR get=%016llx (expected %016llx)\n",
+         (unsigned long long) state.fpcr,
+         (unsigned long long) FPCR_INIT);
+  ASSERT(state.fpcr == FPCR_INIT, "FPCR not preserved");
+
+  /* Modify V7 and FPCR. */
+  state.v[7] = ((__int128) V7_PATTERN_HI << 64) | V7_PATTERN_LO;
+  state.fpcr = FPCR_MODIFIED;
+
+  printf("set: state addr=%p count=%u fpsr=%llx fpcr=%llx fpmr=%llx\n",
+         &state, state_count,
+         (unsigned long long) state.fpsr,
+         (unsigned long long) state.fpcr,
+         (unsigned long long) state.fpmr);
+  err = thread_set_state(th, AARCH64_FLOAT_STATE,
+                         (thread_state_t) &state, state_count);
+  ASSERT_RET(err, "thread_set_state set failed");
+
+  err = thread_resume(th);
+  ASSERT_RET(err, "error in thread_resume");
+  thread_terminate(mach_thread_self());
+  FAILURE("thread_terminate");
+}
+
+static void test_fp_state_getset_aarch64(void)
+{
+  int err;
+  thread_t th = mach_thread_self();
+
+  /* Load known value into V3 and FPCR. */
+  uint64_t v3_bytes[2] = { V3_PATTERN_LO, V3_PATTERN_HI };
+  asm volatile ("ldr q3, [%0]" :: "r"(v3_bytes) : "v3");
+  uint64_t fpcr_init = FPCR_INIT;
+  asm volatile ("msr fpcr, %0" :: "r"(fpcr_init));
+
+  /* Spawn helper, then suspend self so helper sees a stable FP state. */
+  test_thread_start(mach_task_self(), thread_fp_getset_aarch64, &th);
+  err = thread_suspend(th);
+  ASSERT_RET(err, "error in thread_suspend");
+
+  /* Check V7 and FPCR have the values the helper set. */
+  uint64_t v7_after[2] = { 0, 0 };
+  asm volatile ("str q7, [%0]" :: "r"(v7_after) : "memory");
+  uint64_t fpcr_after = 0;
+  asm volatile ("mrs %0, fpcr" : "=r"(fpcr_after));
+  printf("V7 lo=%016llx hi=%016llx\n",
+         (unsigned long long) v7_after[0], (unsigned long long) v7_after[1]);
+  printf("FPCR after=%016llx (expected %016llx)\n",
+         (unsigned long long) fpcr_after,
+         (unsigned long long) FPCR_MODIFIED);
+  ASSERT(v7_after[0] == V7_PATTERN_LO && v7_after[1] == V7_PATTERN_HI,
+         "V7 wasn't correctly set by the helper thread");
+  ASSERT(fpcr_after == FPCR_MODIFIED, "FPCR wasn't updated by the helper 
thread");
+}
+
+#endif /* __aarch64__ */
+
 #if defined(__i386__) || defined(__x86_64__)
 #include <mach_i386.user.h>
 
@@ -244,6 +365,8 @@ int main(int argc, char *argv[], int envc, char *envp[])
 #if defined(__i386__) || defined(__x86_64__)
   test_fp_state_getset();
   test_xfp_state_getset();
+#elif defined(__aarch64__)
+  test_fp_state_getset_aarch64();
 #else
   FAILURE("FP/XSTATE test missing on this arch!");
 #endif
-- 
2.54.0


Reply via email to