Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (176094 => 176095)
--- trunk/Source/_javascript_Core/ChangeLog 2014-11-13 22:59:04 UTC (rev 176094)
+++ trunk/Source/_javascript_Core/ChangeLog 2014-11-13 23:00:07 UTC (rev 176095)
@@ -1,3 +1,23 @@
+2014-11-13 Mark Lam <[email protected]>
+
+ Change X86/64 JIT probes to save/restore xmm regs as double instead of __m128.
+ <https://webkit.org/b/138708>
+
+ Reviewed by Michael Saboff.
+
+ The JIT code only uses the xmm regs as double registers. This patch changes
+ the storage types of the FP registers in X86Assembler.h to double instead of
+ __m128, and updates the X86 and X86_64 JIT probe implementations accordingly.
+
+ Also made some minor cosmetic changes in the output of the probe dump functions.
+
+ * assembler/MacroAssemblerX86Common.cpp:
+ (JSC::MacroAssemblerX86Common::ProbeContext::dumpCPURegisters):
+ * assembler/X86Assembler.h:
+ * jit/JITStubsX86.h:
+ * jit/JITStubsX86Common.h:
+ * jit/JITStubsX86_64.h:
+
2014-11-13 Juergen Ributzka <[email protected]>
Update WebKit to build with LLVM TOT
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.cpp (176094 => 176095)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.cpp 2014-11-13 22:59:04 UTC (rev 176094)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.cpp 2014-11-13 23:00:07 UTC (rev 176095)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2013, 2014 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -37,12 +37,12 @@
#if CPU(X86)
#define DUMP_GPREGISTER(_type, _regName) { \
int32_t value = reinterpret_cast<int32_t>(cpu._regName); \
- dataLogF("%s %6s: 0x%08x %d\n", indentation, #_regName, value, value) ; \
+ dataLogF("%s %6s: 0x%08x %d\n", indentation, #_regName, value, value) ; \
}
#elif CPU(X86_64)
#define DUMP_GPREGISTER(_type, _regName) { \
int64_t value = reinterpret_cast<int64_t>(cpu._regName); \
- dataLogF("%s %6s: 0x%016llx %lld\n", indentation, #_regName, value, value) ; \
+ dataLogF("%s %6s: 0x%016llx %lld\n", indentation, #_regName, value, value) ; \
}
#endif
FOR_EACH_CPU_GPREGISTER(DUMP_GPREGISTER)
@@ -50,10 +50,9 @@
#undef DUMP_GPREGISTER
#define DUMP_FPREGISTER(_type, _regName) { \
- uint32_t* u = reinterpret_cast<uint32_t*>(&cpu._regName); \
+ uint64_t* u = reinterpret_cast<uint64_t*>(&cpu._regName); \
double* d = reinterpret_cast<double*>(&cpu._regName); \
- dataLogF("%s %6s: 0x%08x%08x 0x%08x%08x %12g %12g\n", \
- indentation, #_regName, u[3], u[2], u[1], u[0], d[1], d[0]); \
+ dataLogF("%s %6s: 0x%016llx %.13g\n", indentation, #_regName, *u, *d); \
}
FOR_EACH_CPU_FPREGISTER(DUMP_FPREGISTER)
#undef DUMP_FPREGISTER
Modified: trunk/Source/_javascript_Core/assembler/X86Assembler.h (176094 => 176095)
--- trunk/Source/_javascript_Core/assembler/X86Assembler.h 2014-11-13 22:59:04 UTC (rev 176094)
+++ trunk/Source/_javascript_Core/assembler/X86Assembler.h 2014-11-13 23:00:07 UTC (rev 176095)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008, 2012, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2008, 2012, 2013, 2014 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -73,14 +73,14 @@
// the storing still expects a __m128 slot. This will be changed when the JIT
// probe code is updated later to reflect the JITs' usage of these registers.
#define FOR_EACH_CPU_FPREGISTER(V) \
- V(__m128, xmm0) \
- V(__m128, xmm1) \
- V(__m128, xmm2) \
- V(__m128, xmm3) \
- V(__m128, xmm4) \
- V(__m128, xmm5) \
- V(__m128, xmm6) \
- V(__m128, xmm7) \
+ V(double, xmm0) \
+ V(double, xmm1) \
+ V(double, xmm2) \
+ V(double, xmm3) \
+ V(double, xmm4) \
+ V(double, xmm5) \
+ V(double, xmm6) \
+ V(double, xmm7) \
FOR_EACH_X86_64_CPU_FPREGISTER(V)
#if CPU(X86)
@@ -101,14 +101,14 @@
V(void*, r15)
#define FOR_EACH_X86_64_CPU_FPREGISTER(V) \
- V(__m128, xmm8) \
- V(__m128, xmm9) \
- V(__m128, xmm10) \
- V(__m128, xmm11) \
- V(__m128, xmm12) \
- V(__m128, xmm13) \
- V(__m128, xmm14) \
- V(__m128, xmm15)
+ V(double, xmm8) \
+ V(double, xmm9) \
+ V(double, xmm10) \
+ V(double, xmm11) \
+ V(double, xmm12) \
+ V(double, xmm13) \
+ V(double, xmm14) \
+ V(double, xmm15)
#endif // CPU(X86_64)
Modified: trunk/Source/_javascript_Core/jit/JITStubsX86.h (176094 => 176095)
--- trunk/Source/_javascript_Core/jit/JITStubsX86.h 2014-11-13 22:59:04 UTC (rev 176094)
+++ trunk/Source/_javascript_Core/jit/JITStubsX86.h 2014-11-13 23:00:07 UTC (rev 176095)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008, 2009, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2008, 2009, 2013, 2014 Apple Inc. All rights reserved.
* Copyright (C) 2008 Cameron Zwarich <[email protected]>
* Copyright (C) Research In Motion Limited 2010, 2011. All rights reserved.
*
@@ -95,14 +95,14 @@
"movl 6 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%eax), %ecx" "\n"
"movl %ecx, " STRINGIZE_VALUE_OF(PROBE_CPU_ESP_OFFSET) "(%ebp)" "\n"
- "movdqa %xmm0, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%ebp)" "\n"
- "movdqa %xmm1, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%ebp)" "\n"
- "movdqa %xmm2, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%ebp)" "\n"
- "movdqa %xmm3, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%ebp)" "\n"
- "movdqa %xmm4, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%ebp)" "\n"
- "movdqa %xmm5, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%ebp)" "\n"
- "movdqa %xmm6, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%ebp)" "\n"
- "movdqa %xmm7, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%ebp)" "\n"
+ "movq %xmm0, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%ebp)" "\n"
+ "movq %xmm1, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%ebp)" "\n"
+ "movq %xmm2, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%ebp)" "\n"
+ "movq %xmm3, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%ebp)" "\n"
+ "movq %xmm4, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%ebp)" "\n"
+ "movq %xmm5, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%ebp)" "\n"
+ "movq %xmm6, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%ebp)" "\n"
+ "movq %xmm7, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%ebp)" "\n"
// Reserve stack space for the arg while maintaining the required stack
// pointer 32 byte alignment:
@@ -119,14 +119,14 @@
"movl " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%ebp), %esi" "\n"
"movl " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%ebp), %edi" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%ebp), %xmm0" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%ebp), %xmm1" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%ebp), %xmm2" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%ebp), %xmm3" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%ebp), %xmm4" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%ebp), %xmm5" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%ebp), %xmm6" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%ebp), %xmm7" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%ebp), %xmm0" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%ebp), %xmm1" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%ebp), %xmm2" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%ebp), %xmm3" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%ebp), %xmm4" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%ebp), %xmm5" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%ebp), %xmm6" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%ebp), %xmm7" "\n"
// There are 6 more registers left to restore:
// eax, ecx, ebp, esp, eip, and eflags.
Modified: trunk/Source/_javascript_Core/jit/JITStubsX86Common.h (176094 => 176095)
--- trunk/Source/_javascript_Core/jit/JITStubsX86Common.h 2014-11-13 22:59:04 UTC (rev 176094)
+++ trunk/Source/_javascript_Core/jit/JITStubsX86Common.h 2014-11-13 23:00:07 UTC (rev 176095)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2013, 2014 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -50,39 +50,35 @@
#define PROBE_ARG1_OFFSET (1 * PTR_SIZE)
#define PROBE_ARG2_OFFSET (2 * PTR_SIZE)
-#define PROBE_CPU_EAX_OFFSET (4 * PTR_SIZE)
-#define PROBE_CPU_EBX_OFFSET (5 * PTR_SIZE)
-#define PROBE_CPU_ECX_OFFSET (6 * PTR_SIZE)
-#define PROBE_CPU_EDX_OFFSET (7 * PTR_SIZE)
-#define PROBE_CPU_ESI_OFFSET (8 * PTR_SIZE)
-#define PROBE_CPU_EDI_OFFSET (9 * PTR_SIZE)
-#define PROBE_CPU_EBP_OFFSET (10 * PTR_SIZE)
-#define PROBE_CPU_ESP_OFFSET (11 * PTR_SIZE)
+#define PROBE_FIRST_GPR_OFFSET (3 * PTR_SIZE)
+#define PROBE_CPU_EAX_OFFSET (PROBE_FIRST_GPR_OFFSET + (0 * PTR_SIZE))
+#define PROBE_CPU_ECX_OFFSET (PROBE_FIRST_GPR_OFFSET + (1 * PTR_SIZE))
+#define PROBE_CPU_EDX_OFFSET (PROBE_FIRST_GPR_OFFSET + (2 * PTR_SIZE))
+#define PROBE_CPU_EBX_OFFSET (PROBE_FIRST_GPR_OFFSET + (3 * PTR_SIZE))
+#define PROBE_CPU_ESP_OFFSET (PROBE_FIRST_GPR_OFFSET + (4 * PTR_SIZE))
+#define PROBE_CPU_EBP_OFFSET (PROBE_FIRST_GPR_OFFSET + (5 * PTR_SIZE))
+#define PROBE_CPU_ESI_OFFSET (PROBE_FIRST_GPR_OFFSET + (6 * PTR_SIZE))
+#define PROBE_CPU_EDI_OFFSET (PROBE_FIRST_GPR_OFFSET + (7 * PTR_SIZE))
#if CPU(X86)
-#define PROBE_FIRST_SPECIAL_OFFSET (12 * PTR_SIZE)
+#define PROBE_FIRST_SPECIAL_OFFSET (PROBE_FIRST_GPR_OFFSET + (8 * PTR_SIZE))
#else // CPU(X86_64)
-#define PROBE_CPU_R8_OFFSET (12 * PTR_SIZE)
-#define PROBE_CPU_R9_OFFSET (13 * PTR_SIZE)
-#define PROBE_CPU_R10_OFFSET (14 * PTR_SIZE)
-#define PROBE_CPU_R11_OFFSET (15 * PTR_SIZE)
-#define PROBE_CPU_R12_OFFSET (16 * PTR_SIZE)
-#define PROBE_CPU_R13_OFFSET (17 * PTR_SIZE)
-#define PROBE_CPU_R14_OFFSET (18 * PTR_SIZE)
-#define PROBE_CPU_R15_OFFSET (19 * PTR_SIZE)
-#define PROBE_FIRST_SPECIAL_OFFSET (20 * PTR_SIZE)
+#define PROBE_CPU_R8_OFFSET (PROBE_FIRST_GPR_OFFSET + (8 * PTR_SIZE))
+#define PROBE_CPU_R9_OFFSET (PROBE_FIRST_GPR_OFFSET + (9 * PTR_SIZE))
+#define PROBE_CPU_R10_OFFSET (PROBE_FIRST_GPR_OFFSET + (10 * PTR_SIZE))
+#define PROBE_CPU_R11_OFFSET (PROBE_FIRST_GPR_OFFSET + (11 * PTR_SIZE))
+#define PROBE_CPU_R12_OFFSET (PROBE_FIRST_GPR_OFFSET + (12 * PTR_SIZE))
+#define PROBE_CPU_R13_OFFSET (PROBE_FIRST_GPR_OFFSET + (13 * PTR_SIZE))
+#define PROBE_CPU_R14_OFFSET (PROBE_FIRST_GPR_OFFSET + (14 * PTR_SIZE))
+#define PROBE_CPU_R15_OFFSET (PROBE_FIRST_GPR_OFFSET + (15 * PTR_SIZE))
+#define PROBE_FIRST_SPECIAL_OFFSET (PROBE_FIRST_GPR_OFFSET + (16 * PTR_SIZE))
#endif // CPU(X86_64)
#define PROBE_CPU_EIP_OFFSET (PROBE_FIRST_SPECIAL_OFFSET + (0 * PTR_SIZE))
#define PROBE_CPU_EFLAGS_OFFSET (PROBE_FIRST_SPECIAL_OFFSET + (1 * PTR_SIZE))
+#define PROBE_FIRST_XMM_OFFSET (PROBE_FIRST_SPECIAL_OFFSET + (2 * PTR_SIZE))
-#if CPU(X86)
-#define PROBE_FIRST_XMM_OFFSET (PROBE_FIRST_SPECIAL_OFFSET + (4 * PTR_SIZE)) // After padding.
-#else // CPU(X86_64)
-#define PROBE_FIRST_XMM_OFFSET (PROBE_FIRST_SPECIAL_OFFSET + (2 * PTR_SIZE)) // After padding.
-#endif // CPU(X86_64)
-
-#define XMM_SIZE 16
+#define XMM_SIZE 8
#define PROBE_CPU_XMM0_OFFSET (PROBE_FIRST_XMM_OFFSET + (0 * XMM_SIZE))
#define PROBE_CPU_XMM1_OFFSET (PROBE_FIRST_XMM_OFFSET + (1 * XMM_SIZE))
#define PROBE_CPU_XMM2_OFFSET (PROBE_FIRST_XMM_OFFSET + (2 * XMM_SIZE))
@@ -92,7 +88,19 @@
#define PROBE_CPU_XMM6_OFFSET (PROBE_FIRST_XMM_OFFSET + (6 * XMM_SIZE))
#define PROBE_CPU_XMM7_OFFSET (PROBE_FIRST_XMM_OFFSET + (7 * XMM_SIZE))
+#if CPU(X86)
#define PROBE_SIZE (PROBE_CPU_XMM7_OFFSET + XMM_SIZE)
+#else // CPU(X86_64)
+#define PROBE_CPU_XMM8_OFFSET (PROBE_FIRST_XMM_OFFSET + (8 * XMM_SIZE))
+#define PROBE_CPU_XMM9_OFFSET (PROBE_FIRST_XMM_OFFSET + (9 * XMM_SIZE))
+#define PROBE_CPU_XMM10_OFFSET (PROBE_FIRST_XMM_OFFSET + (10 * XMM_SIZE))
+#define PROBE_CPU_XMM11_OFFSET (PROBE_FIRST_XMM_OFFSET + (11 * XMM_SIZE))
+#define PROBE_CPU_XMM12_OFFSET (PROBE_FIRST_XMM_OFFSET + (12 * XMM_SIZE))
+#define PROBE_CPU_XMM13_OFFSET (PROBE_FIRST_XMM_OFFSET + (13 * XMM_SIZE))
+#define PROBE_CPU_XMM14_OFFSET (PROBE_FIRST_XMM_OFFSET + (14 * XMM_SIZE))
+#define PROBE_CPU_XMM15_OFFSET (PROBE_FIRST_XMM_OFFSET + (15 * XMM_SIZE))
+#define PROBE_SIZE (PROBE_CPU_XMM15_OFFSET + XMM_SIZE)
+#endif // CPU(X86_64)
// These ASSERTs remind you that if you change the layout of ProbeContext,
// you need to change ctiMasmProbeTrampoline offsets above to match.
@@ -132,11 +140,19 @@
COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm6) == PROBE_CPU_XMM6_OFFSET, ProbeContext_cpu_xmm6_offset_matches_ctiMasmProbeTrampoline);
COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm7) == PROBE_CPU_XMM7_OFFSET, ProbeContext_cpu_xmm7_offset_matches_ctiMasmProbeTrampoline);
+#if CPU(X86_64)
+COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm8) == PROBE_CPU_XMM8_OFFSET, ProbeContext_cpu_xmm8_offset_matches_ctiMasmProbeTrampoline);
+COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm9) == PROBE_CPU_XMM9_OFFSET, ProbeContext_cpu_xmm9_offset_matches_ctiMasmProbeTrampoline);
+COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm10) == PROBE_CPU_XMM10_OFFSET, ProbeContext_cpu_xmm10_offset_matches_ctiMasmProbeTrampoline);
+COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm11) == PROBE_CPU_XMM11_OFFSET, ProbeContext_cpu_xmm11_offset_matches_ctiMasmProbeTrampoline);
+COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm12) == PROBE_CPU_XMM12_OFFSET, ProbeContext_cpu_xmm12_offset_matches_ctiMasmProbeTrampoline);
+COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm13) == PROBE_CPU_XMM13_OFFSET, ProbeContext_cpu_xmm13_offset_matches_ctiMasmProbeTrampoline);
+COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm14) == PROBE_CPU_XMM14_OFFSET, ProbeContext_cpu_xmm14_offset_matches_ctiMasmProbeTrampoline);
+COMPILE_ASSERT(PROBE_OFFSETOF(cpu.xmm15) == PROBE_CPU_XMM15_OFFSET, ProbeContext_cpu_xmm15_offset_matches_ctiMasmProbeTrampoline);
+#endif // CPU(X86_64)
+
COMPILE_ASSERT(sizeof(MacroAssembler::ProbeContext) == PROBE_SIZE, ProbeContext_size_matches_ctiMasmProbeTrampoline);
-// Also double check that the xmm registers are 16 byte (128-bit) aligned as
-// required by the movdqa instruction used in the trampoline.
-COMPILE_ASSERT(!(PROBE_OFFSETOF(cpu.xmm0) % 16), ProbeContext_xmm0_offset_not_aligned_properly);
#undef PROBE_OFFSETOF
#endif // ENABLE(MASM_PROBE)
Modified: trunk/Source/_javascript_Core/jit/JITStubsX86_64.h (176094 => 176095)
--- trunk/Source/_javascript_Core/jit/JITStubsX86_64.h 2014-11-13 22:59:04 UTC (rev 176094)
+++ trunk/Source/_javascript_Core/jit/JITStubsX86_64.h 2014-11-13 23:00:07 UTC (rev 176095)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008, 2009, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2008, 2009, 2013, 2014 Apple Inc. All rights reserved.
* Copyright (C) 2008 Cameron Zwarich <[email protected]>
* Copyright (C) Research In Motion Limited 2010, 2011. All rights reserved.
*
@@ -104,14 +104,22 @@
"movq %r14, " STRINGIZE_VALUE_OF(PROBE_CPU_R14_OFFSET) "(%rbp)" "\n"
"movq %r15, " STRINGIZE_VALUE_OF(PROBE_CPU_R15_OFFSET) "(%rbp)" "\n"
- "movdqa %xmm0, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%rbp)" "\n"
- "movdqa %xmm1, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%rbp)" "\n"
- "movdqa %xmm2, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%rbp)" "\n"
- "movdqa %xmm3, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%rbp)" "\n"
- "movdqa %xmm4, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%rbp)" "\n"
- "movdqa %xmm5, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%rbp)" "\n"
- "movdqa %xmm6, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%rbp)" "\n"
- "movdqa %xmm7, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%rbp)" "\n"
+ "movq %xmm0, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%rbp)" "\n"
+ "movq %xmm1, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%rbp)" "\n"
+ "movq %xmm2, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%rbp)" "\n"
+ "movq %xmm3, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%rbp)" "\n"
+ "movq %xmm4, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%rbp)" "\n"
+ "movq %xmm5, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%rbp)" "\n"
+ "movq %xmm6, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%rbp)" "\n"
+ "movq %xmm7, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%rbp)" "\n"
+ "movq %xmm8, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM8_OFFSET) "(%rbp)" "\n"
+ "movq %xmm9, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM9_OFFSET) "(%rbp)" "\n"
+ "movq %xmm10, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM10_OFFSET) "(%rbp)" "\n"
+ "movq %xmm11, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM11_OFFSET) "(%rbp)" "\n"
+ "movq %xmm12, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM12_OFFSET) "(%rbp)" "\n"
+ "movq %xmm13, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM13_OFFSET) "(%rbp)" "\n"
+ "movq %xmm14, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM14_OFFSET) "(%rbp)" "\n"
+ "movq %xmm15, " STRINGIZE_VALUE_OF(PROBE_CPU_XMM15_OFFSET) "(%rbp)" "\n"
"movq %rbp, %rdi" "\n" // the ProbeContext* arg.
"call *" STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "(%rbp)" "\n"
@@ -133,14 +141,22 @@
"movq " STRINGIZE_VALUE_OF(PROBE_CPU_R14_OFFSET) "(%rbp), %r14" "\n"
"movq " STRINGIZE_VALUE_OF(PROBE_CPU_R15_OFFSET) "(%rbp), %r15" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%rbp), %xmm0" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%rbp), %xmm1" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%rbp), %xmm2" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%rbp), %xmm3" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%rbp), %xmm4" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%rbp), %xmm5" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%rbp), %xmm6" "\n"
- "movdqa " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%rbp), %xmm7" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM0_OFFSET) "(%rbp), %xmm0" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM1_OFFSET) "(%rbp), %xmm1" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM2_OFFSET) "(%rbp), %xmm2" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM3_OFFSET) "(%rbp), %xmm3" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM4_OFFSET) "(%rbp), %xmm4" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM5_OFFSET) "(%rbp), %xmm5" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM6_OFFSET) "(%rbp), %xmm6" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM7_OFFSET) "(%rbp), %xmm7" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM8_OFFSET) "(%rbp), %xmm8" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM9_OFFSET) "(%rbp), %xmm9" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM10_OFFSET) "(%rbp), %xmm10" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM11_OFFSET) "(%rbp), %xmm11" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM12_OFFSET) "(%rbp), %xmm12" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM13_OFFSET) "(%rbp), %xmm13" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM14_OFFSET) "(%rbp), %xmm14" "\n"
+ "movq " STRINGIZE_VALUE_OF(PROBE_CPU_XMM15_OFFSET) "(%rbp), %xmm15" "\n"
// There are 6 more registers left to restore:
// rax, rcx, rbp, rsp, rip, and rflags.