Revision: 21177
Author: [email protected]
Date: Tue May 6 15:56:17 2014 UTC
Log: Arm64: Ensure that csp is always aligned to 16 byte values even
if jssp is not.
Even although the Arm64 specification specifies that csp
only needs to be aligned to 16 bytes if it is dereferenced, some
implementations show poor performance if csp is every set to a non-aligned
value. This CL ensures that csp is always aligned to 16 byte values on
these platforms and adds checks to ensure this in debug mode.
Also makes the following change:
- Enable CPU support for arm64 to enable probing of cpu implementer and
cpu part.
- Add ALWAYS_ALIGN_CSP CpuFeature for Arm64 and set it based on runtime
probing of the cpu implementer.
- Rename PrepareForPush and PrepareForPop to PushPreamble and PopPostamble
and move PopPostable after the pop.
-
[email protected], [email protected]
Review URL: https://codereview.chromium.org/264773004
http://code.google.com/p/v8/source/detail?r=21177
Modified:
/branches/bleeding_edge/src/arm64/cpu-arm64.cc
/branches/bleeding_edge/src/arm64/cpu-arm64.h
/branches/bleeding_edge/src/arm64/macro-assembler-arm64-inl.h
/branches/bleeding_edge/src/arm64/macro-assembler-arm64.cc
/branches/bleeding_edge/src/arm64/macro-assembler-arm64.h
/branches/bleeding_edge/src/arm64/regexp-macro-assembler-arm64.cc
/branches/bleeding_edge/src/cpu.cc
/branches/bleeding_edge/src/cpu.h
/branches/bleeding_edge/src/flag-definitions.h
/branches/bleeding_edge/src/v8globals.h
=======================================
--- /branches/bleeding_edge/src/arm64/cpu-arm64.cc Fri May 2 12:35:51 2014
UTC
+++ /branches/bleeding_edge/src/arm64/cpu-arm64.cc Tue May 6 15:56:17 2014
UTC
@@ -18,6 +18,7 @@
bool CpuFeatures::initialized_ = false;
#endif
unsigned CpuFeatures::supported_ = 0;
+unsigned CpuFeatures::found_by_runtime_probing_only_ = 0;
unsigned CpuFeatures::cross_compile_ = 0;
@@ -126,8 +127,24 @@
void CpuFeatures::Probe(bool serializer_enabled) {
- // AArch64 has no configuration options, no further probing is required.
- supported_ = 0;
+ ASSERT(supported_ == 0);
+
+ if (serializer_enabled && FLAG_enable_always_align_csp) {
+ // Always align csp in snapshot code - this is safe and ensures that
csp
+ // will always be aligned if it is enabled by probing at runtime.
+ supported_ |= static_cast<uint64_t>(1) << ALWAYS_ALIGN_CSP;
+ }
+
+ if (!serializer_enabled) {
+ CPU cpu;
+ // Always align csp on Nvidia cores.
+ if (cpu.implementer() == CPU::NVIDIA && FLAG_enable_always_align_csp) {
+ found_by_runtime_probing_only_ |=
+ static_cast<uint64_t>(1) << ALWAYS_ALIGN_CSP;
+ }
+
+ supported_ |= found_by_runtime_probing_only_;
+ }
#ifdef DEBUG
initialized_ = true;
=======================================
--- /branches/bleeding_edge/src/arm64/cpu-arm64.h Mon May 5 09:52:03 2014
UTC
+++ /branches/bleeding_edge/src/arm64/cpu-arm64.h Tue May 6 15:56:17 2014
UTC
@@ -24,11 +24,9 @@
// Check whether a feature is supported by the target CPU.
static bool IsSupported(CpuFeature f) {
ASSERT(initialized_);
- // There are no optional features for ARM64.
- return false;
+ return Check(f, supported_);
};
- // There are no optional features for ARM64.
static bool IsSafeForSnapshot(Isolate* isolate, CpuFeature f) {
return IsSupported(f);
}
@@ -40,16 +38,13 @@
static unsigned supported_;
static bool VerifyCrossCompiling() {
- // There are no optional features for ARM64.
- ASSERT(cross_compile_ == 0);
- return true;
+ return cross_compile_ == 0;
}
static bool VerifyCrossCompiling(CpuFeature f) {
- // There are no optional features for ARM64.
- USE(f);
- ASSERT(cross_compile_ == 0);
- return true;
+ unsigned mask = flag2set(f);
+ return cross_compile_ == 0 ||
+ (cross_compile_ & mask) == mask;
}
static bool SupportsCrankshaft() { return true; }
@@ -59,9 +54,17 @@
static bool initialized_;
#endif
- // This isn't used (and is always 0), but it is required by V8.
+ static unsigned found_by_runtime_probing_only_;
static unsigned cross_compile_;
+ static bool Check(CpuFeature f, unsigned set) {
+ return (set & flag2set(f)) != 0;
+ }
+
+ static unsigned flag2set(CpuFeature f) {
+ return 1u << f;
+ }
+
friend class PlatformFeatureScope;
DISALLOW_COPY_AND_ASSIGN(CpuFeatures);
};
=======================================
--- /branches/bleeding_edge/src/arm64/macro-assembler-arm64-inl.h Tue May
6 08:05:27 2014 UTC
+++ /branches/bleeding_edge/src/arm64/macro-assembler-arm64-inl.h Tue May
6 15:56:17 2014 UTC
@@ -1246,29 +1246,56 @@
void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
ASSERT(!csp.Is(sp_));
- // TODO(jbramley): Several callers rely on this not using scratch
registers,
- // so we use the assembler directly here. However, this means that large
- // immediate values of 'space' cannot be handled cleanly. (Only 24-bits
- // immediates or values of 'space' that can be encoded in one
instruction are
- // accepted.) Once we implement our flexible scratch register idea, we
could
- // greatly simplify this function.
- InstructionAccurateScope scope(this);
- if ((space.IsImmediate()) && !is_uint12(space.immediate())) {
- // The subtract instruction supports a 12-bit immediate, shifted left
by
- // zero or 12 bits. So, in two instructions, we can subtract any
immediate
- // between zero and (1 << 24) - 1.
- int64_t imm = space.immediate();
- ASSERT(is_uint24(imm));
+ { InstructionAccurateScope scope(this);
+ if (!TmpList()->IsEmpty()) {
+ if (CpuFeatures::IsSupported(ALWAYS_ALIGN_CSP)) {
+ UseScratchRegisterScope temps(this);
+ Register temp = temps.AcquireX();
+ sub(temp, StackPointer(), space);
+ bic(csp, temp, 0xf);
+ } else {
+ sub(csp, StackPointer(), space);
+ }
+ } else {
+ // TODO(jbramley): Several callers rely on this not using scratch
+ // registers, so we use the assembler directly here. However, this
means
+ // that large immediate values of 'space' cannot be handled cleanly.
(Only
+ // 24-bits immediates or values of 'space' that can be encoded in one
+ // instruction are accepted.) Once we implement our flexible scratch
+ // register idea, we could greatly simplify this function.
+ ASSERT(space.IsImmediate());
+ // Align to 16 bytes.
+ uint64_t imm = RoundUp(space.immediate(), 0x10);
+ ASSERT(is_uint24(imm));
- int64_t imm_top_12_bits = imm >> 12;
- sub(csp, StackPointer(), imm_top_12_bits << 12);
- imm -= imm_top_12_bits << 12;
- if (imm > 0) {
- sub(csp, csp, imm);
+ Register source = StackPointer();
+ if (CpuFeatures::IsSupported(ALWAYS_ALIGN_CSP)) {
+ bic(csp, source, 0xf);
+ source = csp;
+ }
+ if (!is_uint12(imm)) {
+ int64_t imm_top_12_bits = imm >> 12;
+ sub(csp, source, imm_top_12_bits << 12);
+ source = csp;
+ imm -= imm_top_12_bits << 12;
+ }
+ if (imm > 0) {
+ sub(csp, source, imm);
+ }
}
- } else {
- sub(csp, StackPointer(), space);
+ }
+ AssertStackConsistency();
+}
+
+
+void MacroAssembler::SyncSystemStackPointer() {
+ ASSERT(emit_debug_code());
+ ASSERT(!csp.Is(sp_));
+ if (CpuFeatures::IsSupported(ALWAYS_ALIGN_CSP)) {
+ InstructionAccurateScope scope(this);
+ bic(csp, StackPointer(), 0xf);
}
+ AssertStackConsistency();
}
@@ -1540,7 +1567,7 @@
// It is safe to leave csp where it is when unwinding the JavaScript
stack,
// but if we keep it matching StackPointer, the simulator can detect
memory
// accesses in the now-free part of the stack.
- Mov(csp, StackPointer());
+ SyncSystemStackPointer();
}
}
@@ -1562,7 +1589,7 @@
// It is safe to leave csp where it is when unwinding the JavaScript
stack,
// but if we keep it matching StackPointer, the simulator can detect
memory
// accesses in the now-free part of the stack.
- Mov(csp, StackPointer());
+ SyncSystemStackPointer();
}
}
@@ -1584,7 +1611,7 @@
// It is safe to leave csp where it is when unwinding the JavaScript
stack,
// but if we keep it matching StackPointer, the simulator can detect
memory
// accesses in the now-free part of the stack.
- Mov(csp, StackPointer());
+ SyncSystemStackPointer();
}
}
=======================================
--- /branches/bleeding_edge/src/arm64/macro-assembler-arm64.cc Tue May 6
11:25:37 2014 UTC
+++ /branches/bleeding_edge/src/arm64/macro-assembler-arm64.cc Tue May 6
15:56:17 2014 UTC
@@ -124,6 +124,7 @@
// register so we use the temp register as an intermediate again.
Logical(temp, rn, temp, op);
Mov(csp, temp);
+ AssertStackConsistency();
} else {
Logical(rd, rn, temp, op);
}
@@ -231,6 +232,7 @@
// pointer.
if (rd.IsSP()) {
mov(rd, temp);
+ AssertStackConsistency();
}
}
}
@@ -767,7 +769,7 @@
int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
int size = src0.SizeInBytes();
- PrepareForPush(count, size);
+ PushPreamble(count, size);
PushHelper(count, size, src0, src1, src2, src3);
}
@@ -781,7 +783,7 @@
int count = 5 + src5.IsValid() + src6.IsValid() + src6.IsValid();
int size = src0.SizeInBytes();
- PrepareForPush(count, size);
+ PushPreamble(count, size);
PushHelper(4, size, src0, src1, src2, src3);
PushHelper(count - 4, size, src4, src5, src6, src7);
}
@@ -798,22 +800,15 @@
int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
int size = dst0.SizeInBytes();
- PrepareForPop(count, size);
PopHelper(count, size, dst0, dst1, dst2, dst3);
-
- if (!csp.Is(StackPointer()) && emit_debug_code()) {
- // It is safe to leave csp where it is when unwinding the JavaScript
stack,
- // but if we keep it matching StackPointer, the simulator can detect
memory
- // accesses in the now-free part of the stack.
- Mov(csp, StackPointer());
- }
+ PopPostamble(count, size);
}
void MacroAssembler::PushPopQueue::PushQueued() {
if (queued_.empty()) return;
- masm_->PrepareForPush(size_);
+ masm_->PushPreamble(size_);
int count = queued_.size();
int index = 0;
@@ -838,8 +833,6 @@
void MacroAssembler::PushPopQueue::PopQueued() {
if (queued_.empty()) return;
- masm_->PrepareForPop(size_);
-
int count = queued_.size();
int index = 0;
while (index < count) {
@@ -856,6 +849,7 @@
batch[0], batch[1], batch[2], batch[3]);
}
+ masm_->PopPostamble(size_);
queued_.clear();
}
@@ -863,7 +857,7 @@
void MacroAssembler::PushCPURegList(CPURegList registers) {
int size = registers.RegisterSizeInBytes();
- PrepareForPush(registers.Count(), size);
+ PushPreamble(registers.Count(), size);
// Push up to four registers at a time because if the current stack
pointer is
// csp and reg_size is 32, registers must be pushed in blocks of four in
order
// to maintain the 16-byte alignment for csp.
@@ -882,7 +876,6 @@
void MacroAssembler::PopCPURegList(CPURegList registers) {
int size = registers.RegisterSizeInBytes();
- PrepareForPop(registers.Count(), size);
// Pop up to four registers at a time because if the current stack
pointer is
// csp and reg_size is 32, registers must be pushed in blocks of four in
// order to maintain the 16-byte alignment for csp.
@@ -895,20 +888,14 @@
int count = count_before - registers.Count();
PopHelper(count, size, dst0, dst1, dst2, dst3);
}
-
- if (!csp.Is(StackPointer()) && emit_debug_code()) {
- // It is safe to leave csp where it is when unwinding the JavaScript
stack,
- // but if we keep it matching StackPointer, the simulator can detect
memory
- // accesses in the now-free part of the stack.
- Mov(csp, StackPointer());
- }
+ PopPostamble(registers.Count(), size);
}
void MacroAssembler::PushMultipleTimes(CPURegister src, int count) {
int size = src.SizeInBytes();
- PrepareForPush(count, size);
+ PushPreamble(count, size);
if (FLAG_optimize_for_size && count > 8) {
UseScratchRegisterScope temps(this);
@@ -944,7 +931,7 @@
void MacroAssembler::PushMultipleTimes(CPURegister src, Register count) {
- PrepareForPush(Operand(count, UXTW, WhichPowerOf2(src.SizeInBytes())));
+ PushPreamble(Operand(count, UXTW, WhichPowerOf2(src.SizeInBytes())));
UseScratchRegisterScope temps(this);
Register temp = temps.AcquireSameSizeAs(count);
@@ -1070,7 +1057,7 @@
}
-void MacroAssembler::PrepareForPush(Operand total_size) {
+void MacroAssembler::PushPreamble(Operand total_size) {
// TODO(jbramley): This assertion generates too much code in some debug
tests.
// AssertStackConsistency();
if (csp.Is(StackPointer())) {
@@ -1092,7 +1079,7 @@
}
-void MacroAssembler::PrepareForPop(Operand total_size) {
+void MacroAssembler::PopPostamble(Operand total_size) {
AssertStackConsistency();
if (csp.Is(StackPointer())) {
// If the current stack pointer is csp, then it must be aligned to 16
bytes
@@ -1104,6 +1091,11 @@
// Don't check access size for non-immediate sizes. It's difficult to
do
// well, and it will be caught by hardware (or the simulator) anyway.
+ } else if (emit_debug_code()) {
+ // It is safe to leave csp where it is when unwinding the JavaScript
stack,
+ // but if we keep it matching StackPointer, the simulator can detect
memory
+ // accesses in the now-free part of the stack.
+ SyncSystemStackPointer();
}
}
@@ -1200,12 +1192,14 @@
void MacroAssembler::AssertStackConsistency() {
if (emit_debug_code()) {
- if (csp.Is(StackPointer())) {
- // We can't check the alignment of csp without using a scratch
register
- // (or clobbering the flags), but the processor (or simulator) will
abort
- // if it is not properly aligned during a load.
+ if (csp.Is(StackPointer()) ||
CpuFeatures::IsSupported(ALWAYS_ALIGN_CSP)) {
+ // Always check the alignment of csp if ALWAYS_ALIGN_CSP is true,
since it
+ // could have been bumped even if it is not the stack pointer. We
can't
+ // check the alignment of csp without using a scratch register (or
+ // clobbering the flags), but the processor (or simulator) will
abort if
+ // it is not properly aligned during a load.
ldr(xzr, MemOperand(csp, 0));
- } else if (FLAG_enable_slow_asserts) {
+ } else if (FLAG_enable_slow_asserts && !csp.Is(StackPointer())) {
Label ok;
// Check that csp <= StackPointer(), preserving all registers and
NZCV.
sub(StackPointer(), csp, StackPointer());
=======================================
--- /branches/bleeding_edge/src/arm64/macro-assembler-arm64.h Tue May 6
11:25:37 2014 UTC
+++ /branches/bleeding_edge/src/arm64/macro-assembler-arm64.h Tue May 6
15:56:17 2014 UTC
@@ -778,12 +778,22 @@
//
// This is necessary when pushing or otherwise adding things to the
stack, to
// satisfy the AAPCS64 constraint that the memory below the system stack
- // pointer is not accessed.
+ // pointer is not accessed. The amount pushed will be increased as
necessary
+ // to ensure csp remains aligned to 16 bytes.
//
// This method asserts that StackPointer() is not csp, since the call
does
// not make sense in that context.
inline void BumpSystemStackPointer(const Operand& space);
+ // Re-synchronizes the system stack pointer (csp) with the current stack
+ // pointer (according to StackPointer()). This function will ensure the
+ // new value of the system stack pointer is remains aligned to 16 bytes,
and
+ // is lower than or equal to the value of the current stack pointer.
+ //
+ // This method asserts that StackPointer() is not csp, since the call
does
+ // not make sense in that context.
+ inline void SyncSystemStackPointer();
+
// Helpers
------------------------------------------------------------------
// Root register.
inline void InitializeRootRegister();
@@ -2020,14 +2030,14 @@
const CPURegister& dst0, const CPURegister& dst1,
const CPURegister& dst2, const CPURegister& dst3);
- // Perform necessary maintenance operations before a push or pop.
+ // Perform necessary maintenance operations before a push or after a pop.
//
// Note that size is specified in bytes.
- void PrepareForPush(Operand total_size);
- void PrepareForPop(Operand total_size);
+ void PushPreamble(Operand total_size);
+ void PopPostamble(Operand total_size);
- void PrepareForPush(int count, int size) { PrepareForPush(count * size);
}
- void PrepareForPop(int count, int size) { PrepareForPop(count * size); }
+ void PushPreamble(int count, int size) { PushPreamble(count * size); }
+ void PopPostamble(int count, int size) { PopPostamble(count * size); }
// Call Printf. On a native build, a simple call will be generated, but
if the
// simulator is being used then a suitable pseudo-instruction is used.
The
=======================================
--- /branches/bleeding_edge/src/arm64/regexp-macro-assembler-arm64.cc Tue
Apr 29 06:42:26 2014 UTC
+++ /branches/bleeding_edge/src/arm64/regexp-macro-assembler-arm64.cc Tue
May 6 15:56:17 2014 UTC
@@ -976,6 +976,7 @@
// Set stack pointer back to first register to retain
ASSERT(csp.Is(__ StackPointer()));
__ Mov(csp, fp);
+ __ AssertStackConsistency();
// Restore registers.
__ PopCPURegList(registers_to_retain);
=======================================
--- /branches/bleeding_edge/src/cpu.cc Tue Apr 29 06:42:26 2014 UTC
+++ /branches/bleeding_edge/src/cpu.cc Tue May 6 15:56:17 2014 UTC
@@ -56,7 +56,7 @@
#endif // !V8_LIBC_MSVCRT
-#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_MIPS
+#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 || V8_HOST_ARCH_MIPS
#if V8_OS_LINUX
@@ -463,6 +463,32 @@
char* cpu_model = cpu_info.ExtractField("cpu model");
has_fpu_ = HasListItem(cpu_model, "FPU");
delete[] cpu_model;
+
+#elif V8_HOST_ARCH_ARM64
+
+ CPUInfo cpu_info;
+
+ // Extract implementor from the "CPU implementer" field.
+ char* implementer = cpu_info.ExtractField("CPU implementer");
+ if (implementer != NULL) {
+ char* end ;
+ implementer_ = strtol(implementer, &end, 0);
+ if (end == implementer) {
+ implementer_ = 0;
+ }
+ delete[] implementer;
+ }
+
+ // Extract part number from the "CPU part" field.
+ char* part = cpu_info.ExtractField("CPU part");
+ if (part != NULL) {
+ char* end ;
+ part_ = strtol(part, &end, 0);
+ if (end == part) {
+ part_ = 0;
+ }
+ delete[] part;
+ }
#endif
}
=======================================
--- /branches/bleeding_edge/src/cpu.h Tue Apr 29 06:42:26 2014 UTC
+++ /branches/bleeding_edge/src/cpu.h Tue May 6 15:56:17 2014 UTC
@@ -44,6 +44,7 @@
// arm implementer/part information
int implementer() const { return implementer_; }
static const int ARM = 0x41;
+ static const int NVIDIA = 0x4e;
static const int QUALCOMM = 0x51;
int architecture() const { return architecture_; }
int part() const { return part_; }
=======================================
--- /branches/bleeding_edge/src/flag-definitions.h Tue May 6 14:48:34 2014
UTC
+++ /branches/bleeding_edge/src/flag-definitions.h Tue May 6 15:56:17 2014
UTC
@@ -379,6 +379,11 @@
DEFINE_bool(force_long_branches, false,
"force all emitted branches to be in long mode (MIPS only)")
+// cpu-arm64.cc
+DEFINE_bool(enable_always_align_csp, true,
+ "enable alignment of csp to 16 bytes on platforms which
prefer "
+ "the register to always be aligned (ARM64 only)")
+
// bootstrapper.cc
DEFINE_string(expose_natives_as, NULL, "expose natives in global object")
DEFINE_string(expose_debug_as, NULL, "expose debug in global object")
=======================================
--- /branches/bleeding_edge/src/v8globals.h Wed Apr 30 14:33:35 2014 UTC
+++ /branches/bleeding_edge/src/v8globals.h Tue May 6 15:56:17 2014 UTC
@@ -413,7 +413,8 @@
VFP32DREGS = 6, // ARM
NEON = 7, // ARM
SAHF = 0, // x86
- FPU = 1}; // MIPS
+ FPU = 1, // MIPS
+ ALWAYS_ALIGN_CSP = 1 }; // ARM64
// Used to specify if a macro instruction must perform a smi check on
tagged
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.