Hi Ross, I think the patches were already attached. Here's it.
Also, I will have more patches coming up. Is there a process for me to put it on gerrit and follow up from there? At what point should I start using git cl? Thanks. Sirish On Wednesday, January 16, 2019 at 10:08:20 AM UTC-6, Ross McIlroy wrote: > > Hi Sirish, > > Thanks for your contributions. Could you link to the two patches so that I > can make sure they have appropriate reviewers assigned? > > Cheers, > Ross > > > On Wed, 16 Jan 2019 at 15:53, <[email protected] <javascript:>> wrote: > >> Hi all, >> >> Following the directions from https://v8.dev/docs/contribute, I am >> putting my first two patches (for review) for ARM64 that I would like to >> contribute to V8. >> >> First patch is a very simple patch - it adds default march for ARM64. >> 64-bit support in ARM started with arm version 8. >> Second patch is a probe implementation of ARM64, and probes for crc32 >> feature. This feature is later used, if present, in hashing algorithm. This >> patch improves speedometer performance by about half a percent on current >> Samsung and Pixel devices. >> >> Please review these patches, and let me know what I need to do next to >> get these patches committed. >> >> Sirish Pande >> Samsung Austin R&D Center >> >> -- >> -- >> v8-dev mailing list >> [email protected] <javascript:> >> http://groups.google.com/group/v8-dev >> --- >> You received this message because you are subscribed to the Google Groups >> "v8-dev" group. >> To unsubscribe from this group and stop receiving emails from it, send an >> email to [email protected] <javascript:>. >> For more options, visit https://groups.google.com/d/optout. >> > -- -- v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev --- You received this message because you are subscribed to the Google Groups "v8-dev" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. For more options, visit https://groups.google.com/d/optout.
>From 28859858e9fbd94d654de23e51f46e4c3a25a777 Mon Sep 17 00:00:00 2001 From: Sirish Pande <[email protected]> Date: Thu, 10 Jan 2019 12:00:32 -0600 Subject: [PATCH 2/2] [ARM64] Detect CRC32 support for ARM64, and use crc32 for hashing. Currently, we don't probe ARM64 for any supported CpuFeatures. -- Add HWCAPS flag from uapi/asm/hwcap.h -- Add crc32 bit for ARM64. -- Add support for ProbeImpl for crc32. -- Add crc32 supported Hash for ARM64 (by Kasi @ Samsung) --- src/arm64/assembler-arm64.cc | 12 +++++-- src/base/cpu.cc | 52 ++++++++++++++++++++++----- src/base/cpu.h | 5 +++ src/base/functional.cc | 69 ++++++++++++++++++++++++++++++++++++ src/cpu-features.h | 2 ++ 5 files changed, 128 insertions(+), 12 deletions(-) diff --git a/src/arm64/assembler-arm64.cc b/src/arm64/assembler-arm64.cc index a494f87d4e..823b9e5c33 100644 --- a/src/arm64/assembler-arm64.cc +++ b/src/arm64/assembler-arm64.cc @@ -44,20 +44,26 @@ namespace internal { // CpuFeatures implementation. void CpuFeatures::ProbeImpl(bool cross_compile) { - // AArch64 has no configuration options, no further probing is required. supported_ = 0; // Only use statically determined features for cross compile (snapshot). if (cross_compile) return; + // Runtime checks for certain CPU features of ARM64. + base::CPU cpu; + if (cpu.has_crc32()) + supported_ |= 1u << CRC32; + // We used to probe for coherent cache support, but on older CPUs it // causes crashes (crbug.com/524337), and newer CPUs don't even have // the feature any more. } void CpuFeatures::PrintTarget() { } -void CpuFeatures::PrintFeatures() {} - +void CpuFeatures::PrintFeatures() { + printf("CRC32=%d\n", + CpuFeatures::IsSupported(CRC32)); +} // ----------------------------------------------------------------------------- // CPURegList utilities. diff --git a/src/base/cpu.cc b/src/base/cpu.cc index 6ab0ffee29..47bb16c4a9 100644 --- a/src/base/cpu.cc +++ b/src/base/cpu.cc @@ -75,10 +75,31 @@ static V8_INLINE void __cpuid(int cpu_info[4], int info_type) { #endif // !V8_LIBC_MSVCRT -#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64 +#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 || \ + V8_HOST_ARCH_MIPS || V8_HOST_ARCH_MIPS64 #if V8_OS_LINUX +#if V8_HOST_ARCH_ARM64 + +// see <arch/arm64/include/uapi/asm/hwcap.h> kernel header. +/* + * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP + */ +#define HWCAP_FP (1 << 0) +#define HWCAP_ASIMD (1 << 1) +#define HWCAP_EVTSTRM (1 << 2) +#define HWCAP_AES (1 << 3) +#define HWCAP_PMULL (1 << 4) +#define HWCAP_SHA1 (1 << 5) +#define HWCAP_SHA2 (1 << 6) +#define HWCAP_CRC32 (1 << 7) +#define HWCAP_ATOMICS (1 << 8) +#define HWCAP_FPHP (1 << 9) +#define HWCAP_ASIMDHP (1 << 10) + +#endif // V8_HOST_ARCH_ARM64 + #if V8_HOST_ARCH_ARM // See <uapi/asm/hwcap.h> kernel header. @@ -108,9 +129,14 @@ static V8_INLINE void __cpuid(int cpu_info[4], int info_type) { #define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT) #define HWCAP_LPAE (1 << 20) +#endif + + +#if V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 + static uint32_t ReadELFHWCaps() { uint32_t result = 0; -#if V8_GLIBC_PREREQ(2, 16) +#if V8_HOST_ARCH_ARM && V8_GLIBC_PREREQ(2, 16) result = static_cast<uint32_t>(getauxval(AT_HWCAP)); #else // Read the ELF HWCAP flags by parsing /proc/self/auxv. @@ -133,7 +159,7 @@ static uint32_t ReadELFHWCaps() { return result; } -#endif // V8_HOST_ARCH_ARM +#endif // V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 #if V8_HOST_ARCH_MIPS int __detect_fp64_mode(void) { @@ -336,7 +362,8 @@ CPU::CPU() has_vfp3_d32_(false), is_fp64_mode_(false), has_non_stop_time_stamp_counter_(false), - has_msa_(false) { + has_msa_(false), + has_crc32_(false) { memcpy(vendor_, "Unknown", 8); #if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64 int cpu_info[4]; @@ -420,17 +447,19 @@ CPU::CPU() has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0; } -#elif V8_HOST_ARCH_ARM +#elif V8_HOST_ARCH_ARM || V8_HOST_ARCH_ARM64 #if V8_OS_LINUX CPUInfo cpu_info; // Extract implementor from the "CPU implementer" field. + // todo: This section needs to rewritten for ARM64 as most + // ARM64 implementations have BigLittle, and even BigMediumLittle char* implementer = cpu_info.ExtractField("CPU implementer"); if (implementer != nullptr) { char* end; - implementer_ = strtol(implementer, &end, 0); + implementer_ = (int)strtol(implementer, &end, 0); if (end == implementer) { implementer_ = 0; } @@ -440,7 +469,7 @@ CPU::CPU() char* variant = cpu_info.ExtractField("CPU variant"); if (variant != nullptr) { char* end; - variant_ = strtol(variant, &end, 0); + variant_ = (int)strtol(variant, &end, 0); if (end == variant) { variant_ = -1; } @@ -451,7 +480,7 @@ CPU::CPU() char* part = cpu_info.ExtractField("CPU part"); if (part != nullptr) { char* end; - part_ = strtol(part, &end, 0); + part_ = (int)strtol(part, &end, 0); if (end == part) { part_ = 0; } @@ -467,7 +496,7 @@ CPU::CPU() char* architecture = cpu_info.ExtractField("CPU architecture"); if (architecture != nullptr) { char* end; - architecture_ = strtol(architecture, &end, 10); + architecture_ = (int)strtol(architecture, &end, 10); if (end == architecture) { // Kernels older than 3.18 report "CPU architecture: AArch64" on ARMv8. if (strcmp(architecture, "AArch64") == 0) { @@ -509,12 +538,16 @@ CPU::CPU() // Try to extract the list of CPU features from ELF hwcaps. uint32_t hwcaps = ReadELFHWCaps(); if (hwcaps != 0) { +#if V8_HOST_ARCH_ARM has_idiva_ = (hwcaps & HWCAP_IDIVA) != 0; has_neon_ = (hwcaps & HWCAP_NEON) != 0; has_vfp_ = (hwcaps & HWCAP_VFP) != 0; has_vfp3_ = (hwcaps & (HWCAP_VFPv3 | HWCAP_VFPv3D16 | HWCAP_VFPv4)) != 0; has_vfp3_d32_ = (has_vfp3_ && ((hwcaps & HWCAP_VFPv3D16) == 0 || (hwcaps & HWCAP_VFPD32) != 0)); +#elif V8_HOST_ARCH_ARM64 + has_crc32_ = (hwcaps & HWCAP_CRC32) != 0; +#endif } else { // Try to fallback to "Features" CPUInfo field. char* features = cpu_info.ExtractField("Features"); @@ -528,6 +561,7 @@ CPU::CPU() has_vfp3_ = true; has_vfp3_d32_ = true; } + has_crc32_ = HasListItem(features, "crc32"); delete[] features; } diff --git a/src/base/cpu.h b/src/base/cpu.h index 4b4becfa20..7a181abc2b 100644 --- a/src/base/cpu.h +++ b/src/base/cpu.h @@ -47,6 +47,7 @@ class V8_BASE_EXPORT CPU final { static const int ARM = 0x41; static const int NVIDIA = 0x4e; static const int QUALCOMM = 0x51; + static const int SAMSUNG = 0x53; int architecture() const { return architecture_; } int variant() const { return variant_; } static const int NVIDIA_DENVER = 0x0; @@ -111,6 +112,9 @@ class V8_BASE_EXPORT CPU final { bool has_vfp3() const { return has_vfp3_; } bool has_vfp3_d32() const { return has_vfp3_d32_; } + // arm64 features + bool has_crc32() const { return has_crc32_; } + // mips features bool is_fp64_mode() const { return is_fp64_mode_; } bool has_msa() const { return has_msa_; } @@ -156,6 +160,7 @@ class V8_BASE_EXPORT CPU final { bool is_fp64_mode_; bool has_non_stop_time_stamp_counter_; bool has_msa_; + bool has_crc32_; }; } // namespace base diff --git a/src/base/functional.cc b/src/base/functional.cc index dffb91f3cc..ec44c34d15 100644 --- a/src/base/functional.cc +++ b/src/base/functional.cc @@ -12,6 +12,14 @@ #include <limits> +#if V8_HOST_ARCH_ARM64 +#include "src/cpu-features.h" +// arm_acle.h is in usr/lib/gcc/arm-linux-gnueabihf/6/include/arm_acle.h +// TODO: Current build system does not get to that header. +// So, instead of using builtin intrinsics: v = __crc32w(hash, v); +// I am using inline-asm. +#endif + #include "src/base/bits.h" namespace v8 { @@ -23,8 +31,26 @@ namespace { // https://gist.github.com/badboy/6267743 template <typename T> V8_INLINE size_t hash_value_unsigned(T v) { + switch (sizeof(T)) { case 4: { +#if V8_HOST_ARCH_ARM64 + if (v8::internal::CpuFeatures::IsSupported(v8::internal::CRC32)) { + uint32_t hash = 0; + __asm ( "crc32w %w[v],%w[hash],%w[v]" + : [hash] "+r" (hash), [v] "+r" (v) + : + ); + } else { + // "32 bit Mix Functions" + v = ~v + (v << 15); // v = (v << 15) - v - 1; + v = v ^ (v >> 12); + v = v + (v << 2); + v = v ^ (v >> 4); + v = v * 2057; // v = (v + (v << 3)) + (v << 11); + v = v ^ (v >> 16); + } +#else // "32 bit Mix Functions" v = ~v + (v << 15); // v = (v << 15) - v - 1; v = v ^ (v >> 12); @@ -32,11 +58,29 @@ V8_INLINE size_t hash_value_unsigned(T v) { v = v ^ (v >> 4); v = v * 2057; // v = (v + (v << 3)) + (v << 11); v = v ^ (v >> 16); +#endif return static_cast<size_t>(v); } case 8: { switch (sizeof(size_t)) { case 4: { +#if V8_HOST_ARCH_ARM64 + if (v8::internal::CpuFeatures::IsSupported(v8::internal::CRC32)) { + uint32_t hash = 0; + __asm ( "crc32x %w[v],%w[hash],%x[v]" + : [hash] "+r" (hash), [v] "+r" (v) + : + ); + } else { + // "64 bit to 32 bit Hash Functions" + v = ~v + (v << 18); // v = (v << 18) - v - 1; + v = v ^ (v >> 31); + v = v * 21; // v = (v + (v << 2)) + (v << 4); + v = v ^ (v >> 11); + v = v + (v << 6); + v = v ^ (v >> 22); + } +#else // "64 bit to 32 bit Hash Functions" v = ~v + (v << 18); // v = (v << 18) - v - 1; v = v ^ (v >> 31); @@ -44,9 +88,33 @@ V8_INLINE size_t hash_value_unsigned(T v) { v = v ^ (v >> 11); v = v + (v << 6); v = v ^ (v >> 22); +#endif return static_cast<size_t>(v); } case 8: { +#if V8_HOST_ARCH_ARM64 + if (v8::internal::CpuFeatures::IsSupported(v8::internal::CRC32)) { + uint64_t hash1 = uint64_t{0x0000000000000000}; + uint64_t hash2 = hash1; + uint64_t upper32 = 0; + __asm ( "lsr %x[upper32], %x[v],#32 \n\t" + "crc32w %w[hash1],%w[hash1],%w[v] \n\t" + "crc32w %w[hash2],%w[hash2],%w[upper32] \n\t" + "orr %x[v],%x[hash1],%x[hash2],lsl #32 \n\t" + : [hash1] "+r" (hash1), [hash2] "+r" (hash2), [upper32] "+r" (upper32), [v] "+r" (v) + : + ); + } else { + // "64 bit Mix Functions" + v = ~v + (v << 21); // v = (v << 21) - v - 1; + v = v ^ (v >> 24); + v = (v + (v << 3)) + (v << 8); // v * 265 + v = v ^ (v >> 14); + v = (v + (v << 2)) + (v << 4); // v * 21 + v = v ^ (v >> 28); + v = v + (v << 31); + } +#else // "64 bit Mix Functions" v = ~v + (v << 21); // v = (v << 21) - v - 1; v = v ^ (v >> 24); @@ -55,6 +123,7 @@ V8_INLINE size_t hash_value_unsigned(T v) { v = (v + (v << 2)) + (v << 4); // v * 21 v = v ^ (v >> 28); v = v + (v << 31); +#endif return static_cast<size_t>(v); } } diff --git a/src/cpu-features.h b/src/cpu-features.h index 310fafe272..ee64dff9b6 100644 --- a/src/cpu-features.h +++ b/src/cpu-features.h @@ -49,6 +49,8 @@ enum CpuFeature { FLOATING_POINT_EXT, VECTOR_FACILITY, MISC_INSTR_EXT2, + // ARM64 + CRC32, NUMBER_OF_CPU_FEATURES, -- 2.20.1
>From 19ab9299ef3a490f075b87ee367b626644f1d30d Mon Sep 17 00:00:00 2001 From: Sirish Pande <[email protected]> Date: Mon, 14 Jan 2019 15:57:01 -0600 Subject: [PATCH 1/2] [ARM64] ARM64 support started from armv8. --ARM64 started with arm_version 8. Make that default for building. --Enable crc feature on arm64. Most arm64 support that feautre. --At runtime probe for crc on ARM64, we will find out whether we can execute code path with crc32 or not. --- BUILD.gn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/BUILD.gn b/BUILD.gn index a00dcd73dd..144c89fea8 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -449,6 +449,9 @@ config("toolchain") { } if (v8_current_cpu == "arm64") { defines += [ "V8_TARGET_ARCH_ARM64" ] + if (current_cpu == "arm64") { + cflags += [ "-march=armv8+crc" ] + } } # Mips64el/mipsel simulators. -- 2.20.1
