This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new f5d2899  KUDU-3007 (3/3): Support building and running Kudu on aarch64 
platform
f5d2899 is described below

commit f5d2899a6483fc08a17ade8d9b26b3e6930da4e4
Author: liusheng <[email protected]>
AuthorDate: Sat Jan 11 16:17:19 2020 +0800

    KUDU-3007 (3/3): Support building and running Kudu on aarch64 platform
    
    This change has modified the Kudu code to adapt one more ARM64/aarch64
    platform. With this patch, Kudu can be built and run on both x86_64 and
    aarch64 platforms.
    
    Change-Id: I2953519c5d28de17e6b2bb7094abab0c1cd12c97
    Reviewed-on: http://gerrit.cloudera.org:8080/14964
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <[email protected]>
---
 CMakeLists.txt                                     | 30 ++++++++++++++++------
 README.adoc                                        |  3 +--
 build-support/jenkins/build-and-test.sh            |  7 +++++
 .../test/cluster/TestKuduBinaryJarExtractor.java   |  4 ++-
 src/kudu/cfile/binary_plain_block.cc               |  7 ++++-
 src/kudu/cfile/bitshuffle_arch_wrapper.cc          |  3 ++-
 src/kudu/codegen/codegen-test.cc                   |  4 +++
 src/kudu/common/columnar_serialization.cc          |  6 ++++-
 src/kudu/common/key_encoder.h                      |  6 +++++
 src/kudu/common/zp7.cc                             |  6 +++++
 src/kudu/gutil/CMakeLists.txt                      |  7 ++++-
 src/kudu/gutil/cpu.cc                              |  5 ++++
 src/kudu/gutil/cycleclock-inl.h                    | 15 ++++++++++-
 src/kudu/gutil/dynamic_annotations.h               |  1 +
 src/kudu/gutil/port.h                              |  2 ++
 src/kudu/gutil/spinlock.h                          |  9 +++++++
 src/kudu/gutil/spinlock_linux-inl.h                |  3 +++
 src/kudu/rpc/rpc-test-base.h                       |  2 +-
 src/kudu/util/block_bloom_filter.cc                | 11 ++++++++
 src/kudu/util/debug-util.cc                        |  4 +++
 src/kudu/util/debug-util.h                         |  4 +++
 src/kudu/util/group_varint-inl.h                   |  9 ++++++-
 src/kudu/util/group_varint-test.cc                 |  4 +++
 src/kudu/util/init.cc                              |  3 +++
 src/kudu/util/memory/memory.cc                     |  7 ++++-
 src/kudu/util/notification.h                       |  4 +++
 src/kudu/util/striped64.cc                         |  6 +++++
 thirdparty/build-definitions.sh                    |  3 ++-
 28 files changed, 155 insertions(+), 20 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 077305b..d5f9eb7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -151,9 +151,15 @@ include(CompilerInfo)
 
 
 # compiler flags that are common across debug/release builds
-#  -msse4.2: Enable sse4.2 compiler intrinsics.
-execute_process(COMMAND uname -p OUTPUT_VARIABLE ARCH_NAME)
-if (NOT "${ARCH_NAME}" MATCHES "aarch64")
+execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH_NAME)
+if("${ARCH_NAME}" MATCHES "aarch64")
+  # Certain platforms such as ARM do not use signed chars by default
+  # which causes issues with certain bounds checks.
+  set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fsigned-char")
+  # Turn off fp-contract on aarch64 to avoid multiply-add operation result 
difference.
+  set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -ffp-contract=off")
+else()
+  # -msse4.2: Enable sse4.2 compiler intrinsics.
   set(CXX_COMMON_FLAGS "-msse4.2")
 endif()
 #  -Wall: Enable all warnings.
@@ -409,11 +415,19 @@ if (${KUDU_USE_TSAN})
   # require all code to be position independent, and the easiest way to
   # guarantee that is via dynamic linking (not all 3rd party archives are
   # compiled with -fPIC e.g. boost).
-  if("${KUDU_LINK}" STREQUAL "a")
-    message("Using dynamic linking for TSAN")
-    set(KUDU_LINK "d")
-  elseif("${KUDU_LINK}" STREQUAL "s")
-    message(SEND_ERROR "Cannot use TSAN with static linking")
+  if(NOT "${ARCH_NAME}" MATCHES "aarch64")
+    if("${KUDU_LINK}" STREQUAL "a")
+      message("Using dynamic linking for TSAN")
+      set(KUDU_LINK "d")
+    elseif("${KUDU_LINK}" STREQUAL "s")
+      message(SEND_ERROR "Cannot use TSAN with static linking")
+    endif()
+  else()
+    # workaround for github.com/google/sanitizers/issues/1208
+    # TSAN with dynamic linking cause all of test cases failed on aarch64,
+    # we don't apply ENABLE_DIST_TEST on aarch64, so apply static linking 
direcly
+    message("Using static linking for TSAN on aarch64")
+    set(KUDU_LINK "s")
   endif()
 endif()
 
diff --git a/README.adoc b/README.adoc
index bad43b2..22036a0 100644
--- a/README.adoc
+++ b/README.adoc
@@ -384,8 +384,7 @@ linking the kudu binaries and unit tests. The full range of 
options for `KUDU_LI
 `static`, `dynamic`, and `auto`. The default is `auto` and only the first 
letter
 matters for the purpose of matching.
 
-NOTE: Dynamic linking is incompatible with ASAN and static linking is 
incompatible
-with TSAN.
+NOTE: Static linking is incompatible with TSAN.
 
 
 == Developing Kudu in Eclipse
diff --git a/build-support/jenkins/build-and-test.sh 
b/build-support/jenkins/build-and-test.sh
index f1d860f..21eaa49 100755
--- a/build-support/jenkins/build-and-test.sh
+++ b/build-support/jenkins/build-and-test.sh
@@ -194,6 +194,7 @@ if [ -n "$BUILD_ID" ]; then
   trap cleanup EXIT
 fi
 
+ARTIFACT_ARCH=$(uname -m)
 # Configure the build
 #
 # ASAN/TSAN can't build the Python bindings because the exported Kudu client
@@ -202,6 +203,12 @@ if [ "$BUILD_TYPE" = "ASAN" ]; then
   USE_CLANG=1
   CMAKE_BUILD=fastdebug
   EXTRA_BUILD_FLAGS="-DKUDU_USE_ASAN=1 -DKUDU_USE_UBSAN=1"
+  # workaround for github.com/google/sanitizers/issues/1208
+  # ASAN with dynamic linking cause all tests fail on aarch64,
+  # we don't apply ENABLE_DIST_TEST on aarch64, so use static linking.
+  if [ "$ARTIFACT_ARCH" = "aarch64" ]; then
+    EXTRA_BUILD_FLAGS="$EXTRA_BUILD_FLAGS -DKUDU_LINK=static"
+  fi
   BUILD_PYTHON=0
   BUILD_PYTHON3=0
 elif [ "$BUILD_TYPE" = "TSAN" ]; then
diff --git 
a/java/kudu-test-utils/src/test/java/org/apache/kudu/test/cluster/TestKuduBinaryJarExtractor.java
 
b/java/kudu-test-utils/src/test/java/org/apache/kudu/test/cluster/TestKuduBinaryJarExtractor.java
index 4604cb8..ad56b87 100644
--- 
a/java/kudu-test-utils/src/test/java/org/apache/kudu/test/cluster/TestKuduBinaryJarExtractor.java
+++ 
b/java/kudu-test-utils/src/test/java/org/apache/kudu/test/cluster/TestKuduBinaryJarExtractor.java
@@ -40,6 +40,7 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Properties;
 
+import com.google.gradle.osdetector.OsDetector;
 import org.junit.Rule;
 import org.junit.Test;
 import org.slf4j.Logger;
@@ -49,6 +50,7 @@ import org.apache.kudu.test.junit.RetryRule;
 
 public class TestKuduBinaryJarExtractor {
 
+  private static final OsDetector DETECTOR = new OsDetector();
   private static final Logger LOG = 
LoggerFactory.getLogger(TestKuduBinaryJarExtractor.class);
 
   @Rule
@@ -110,7 +112,7 @@ public class TestKuduBinaryJarExtractor {
     properties.setProperty("artifact.version", "1.9.0-SNAPSHOT");
     properties.setProperty("artifact.prefix", "apache-kudu-1.9.0-SNAPSHOT");
     properties.setProperty("artifact.os", os);
-    properties.setProperty("artifact.arch", "x86_64");
+    properties.setProperty("artifact.arch", DETECTOR.getArch());
     properties.store(out, "test");
   }
 
diff --git a/src/kudu/cfile/binary_plain_block.cc 
b/src/kudu/cfile/binary_plain_block.cc
index 18ec85d..fce2810 100644
--- a/src/kudu/cfile/binary_plain_block.cc
+++ b/src/kudu/cfile/binary_plain_block.cc
@@ -201,12 +201,17 @@ Status BinaryPlainBlockDecoder::ParseHeader() {
   size_t rem = num_elems_;
   while (rem >= 4) {
     if (PREDICT_TRUE(p + 16 < limit)) {
+      #ifndef __aarch64__
       p = coding::DecodeGroupVarInt32_SSE(
           p, &dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_ptr[3]);
-
+      #else
+      p = coding::DecodeGroupVarInt32_SlowButSafe(
+          p, &dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_ptr[3]);
+      #endif //__aarch64__
       // The above function should add at most 17 (4 32-bit ints plus a 
selector byte) to
       // 'p'. Thus, since we checked that (p + 16 < limit) above, we are 
guaranteed that
       // (p <= limit) now.
+
       DCHECK_LE(p, limit);
     } else {
       p = coding::DecodeGroupVarInt32_SlowButSafe(
diff --git a/src/kudu/cfile/bitshuffle_arch_wrapper.cc 
b/src/kudu/cfile/bitshuffle_arch_wrapper.cc
index 4799cd5..8d21f87 100644
--- a/src/kudu/cfile/bitshuffle_arch_wrapper.cc
+++ b/src/kudu/cfile/bitshuffle_arch_wrapper.cc
@@ -35,6 +35,7 @@
 #undef bshuf_decompress_lz4
 
 #include "kudu/gutil/cpu.h"
+// IWYU pragma: no_forward_declare base::CPU
 
 using base::CPU;
 
@@ -57,7 +58,7 @@ decltype(&bshuf_decompress_lz4) g_bshuf_decompress_lz4;
 // the cost of a 'std::once' call.
 __attribute__((constructor))
 void SelectBitshuffleFunctions() {
-#ifndef __APPLE__
+#if !defined(__APPLE__) && !defined(__aarch64__)
   if (CPU().has_avx2()) {
     g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx2;
     g_bshuf_compress_lz4 = bshuf_compress_lz4_avx2;
diff --git a/src/kudu/codegen/codegen-test.cc b/src/kudu/codegen/codegen-test.cc
index c42e095..2a91e25 100644
--- a/src/kudu/codegen/codegen-test.cc
+++ b/src/kudu/codegen/codegen-test.cc
@@ -380,7 +380,11 @@ TEST_F(CodegenTest, TestDumpMC) {
 
   const vector<string>& msgs = sink.logged_msgs();
   ASSERT_EQ(msgs.size(), 1);
+  #ifndef __aarch64__
   EXPECT_THAT(msgs[0], testing::ContainsRegex("retq"));
+  #else
+  EXPECT_THAT(msgs[0], testing::ContainsRegex("ret"));
+  #endif //__aarch64__
 }
 
 // Basic test for the CompilationManager code cache.
diff --git a/src/kudu/common/columnar_serialization.cc 
b/src/kudu/common/columnar_serialization.cc
index 6c481d9..3fd6b12 100644
--- a/src/kudu/common/columnar_serialization.cc
+++ b/src/kudu/common/columnar_serialization.cc
@@ -17,12 +17,16 @@
 
 #include "kudu/common/columnar_serialization.h"
 
+#ifdef __aarch64__
+#include "kudu/util/sse2neon.h" // IWYU pragma: keep
+#else
 #include <emmintrin.h>
 #include <immintrin.h>
+#endif
 
 #include <cstring>
 #include <ostream>
-#include <string>
+#include <string> // IWYU pragma: keep
 #include <vector>
 
 #include <glog/logging.h>
diff --git a/src/kudu/common/key_encoder.h b/src/kudu/common/key_encoder.h
index 5b2cd76..214cc57 100644
--- a/src/kudu/common/key_encoder.h
+++ b/src/kudu/common/key_encoder.h
@@ -18,8 +18,10 @@
 #ifndef KUDU_COMMON_KEYENCODER_H
 #define KUDU_COMMON_KEYENCODER_H
 
+#ifndef __aarch64__
 #include <emmintrin.h>
 #include <smmintrin.h>
+#endif
 
 #include <climits>
 #include <cstdint>
@@ -244,6 +246,9 @@ struct KeyEncoderTraits<BINARY, Buffer> {
   // REQUIRES: len == 16 or 8
   template<int LEN>
   static bool SSEEncodeChunk(const uint8_t** srcp, uint8_t** dstp) {
+    #ifdef __aarch64__
+    return false;
+    #else
     COMPILE_ASSERT(LEN == 16 || LEN == 8, invalid_length);
     __m128i data;
     if (LEN == 16) {
@@ -280,6 +285,7 @@ struct KeyEncoderTraits<BINARY, Buffer> {
     *dstp += LEN;
     *srcp += LEN;
     return true;
+    #endif //__aarch64__
   }
 
   // Non-SSE loop which encodes 'len' bytes from 'srcp' into 'dst'.
diff --git a/src/kudu/common/zp7.cc b/src/kudu/common/zp7.cc
index de817d7..9bf2835 100644
--- a/src/kudu/common/zp7.cc
+++ b/src/kudu/common/zp7.cc
@@ -38,13 +38,17 @@
 
 #ifdef __x86_64__
 #include <emmintrin.h>
+#elif defined(__aarch64__)
+#include "kudu/util/sse2neon.h"
 #endif
 
+#ifndef __aarch64__
 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5
 #define USE_INLINE_ASM_CLMUL
 #else
 #include <wmmintrin.h>
 #endif
+#endif
 
 #define N_BITS      (6)
 
@@ -85,7 +89,9 @@ static inline __m128i asm_mm_clmulepi64_si128(__m128i a, 
__m128i b) {
 // the same masks).
 //
 // This variant depends on the CLMUL instruction.
+#ifndef __aarch64__
 __attribute__((target("pclmul")))
+#endif // __aarch64__
 ATTRIBUTE_NO_SANITIZE_INTEGER
 static zp7_masks_64_t zp7_ppp_64_clmul(uint64_t mask) {
   zp7_masks_64_t r;
diff --git a/src/kudu/gutil/CMakeLists.txt b/src/kudu/gutil/CMakeLists.txt
index 53bf62d..02808d9 100644
--- a/src/kudu/gutil/CMakeLists.txt
+++ b/src/kudu/gutil/CMakeLists.txt
@@ -16,7 +16,6 @@
 # under the License.
 
 set(GUTIL_SRCS
-  atomicops-internals-x86.cc
   bits.cc
   cpu.cc
   dynamic_annotations.c
@@ -51,6 +50,12 @@ set(GUTIL_SRCS
   utf/rune.c
   walltime.cc)
 
+if (NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+set(GUTIL_SRCS
+  atomicops-internals-x86.cc
+  ${GUTIL_SRCS})
+endif()
+
 set(GUTIL_LIBS
   glog
   protobuf)
diff --git a/src/kudu/gutil/cpu.cc b/src/kudu/gutil/cpu.cc
index e108304..318d397 100644
--- a/src/kudu/gutil/cpu.cc
+++ b/src/kudu/gutil/cpu.cc
@@ -4,10 +4,12 @@
 
 #include "kudu/gutil/cpu.h"
 
+#ifndef __aarch64__
 #include <cstring>
 #include <utility>
 
 #include "kudu/gutil/integral_types.h"
+#endif //__aarch64__
 
 #if defined(__x86_64__)
 #if defined(_MSC_VER)
@@ -274,6 +276,9 @@ void CPU::Initialize() {
 #elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || 
defined(OS_LINUX))
   cpu_brand_.assign(g_lazy_cpuinfo.Get().brand());
   has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon();
+#elif defined(__aarch64__)
+  cpu_brand_.assign("ARM64");
+  has_broken_neon_ = false;
 #else
   #error unknown architecture
 #endif
diff --git a/src/kudu/gutil/cycleclock-inl.h b/src/kudu/gutil/cycleclock-inl.h
index c913d51..a7b4059 100644
--- a/src/kudu/gutil/cycleclock-inl.h
+++ b/src/kudu/gutil/cycleclock-inl.h
@@ -147,7 +147,20 @@ inline int64 CycleClock::Now() {
 }
 
 // ----------------------------------------------------------------
-#elif defined(ARMV6)  // V6 is the earliest arm that has a standard cyclecount
+#elif defined(__aarch64__)
+#include "kudu/gutil/sysinfo.h"
+inline int64 CycleClock::Now() {
+  // System timer of ARMv8 runs at a different frequency than the CPU's.
+  // The frequency is fixed, typically in the range 1-50MHz.  It can be
+  // read at CNTFRQ special register.  We assume the OS has set up
+  // the virtual timer properly.
+  int64_t virtual_timer_value;
+  asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
+  return virtual_timer_value;
+}
+
+// ----------------------------------------------------------------
+#elif defined(ARMV6)   // V6 is the earliest arm that has a standard cyclecount
 #include "kudu/gutil/sysinfo.h"
 inline int64 CycleClock::Now() {
   uint32 pmccntr;
diff --git a/src/kudu/gutil/dynamic_annotations.h 
b/src/kudu/gutil/dynamic_annotations.h
index 7e03d45..5cc5aba 100644
--- a/src/kudu/gutil/dynamic_annotations.h
+++ b/src/kudu/gutil/dynamic_annotations.h
@@ -57,6 +57,7 @@
 #ifndef __DYNAMIC_ANNOTATIONS_H__
 #define __DYNAMIC_ANNOTATIONS_H__
 
+#include <stddef.h>
 #ifndef DYNAMIC_ANNOTATIONS_ENABLED
 # define DYNAMIC_ANNOTATIONS_ENABLED 0
 #endif
diff --git a/src/kudu/gutil/port.h b/src/kudu/gutil/port.h
index f0b06d7..f5155b2 100644
--- a/src/kudu/gutil/port.h
+++ b/src/kudu/gutil/port.h
@@ -324,6 +324,8 @@ inline void* memrchr(const void* bytes, int find_char, 
size_t len) {
 // TODO(user) This is the L1 D-cache line size of our Power7 machines.
 // Need to check if this is appropriate for other PowerPC64 systems.
 #define CACHELINE_SIZE 128
+#elif defined(__aarch64__)
+#define CACHELINE_SIZE 64
 #elif defined(__arm__)
 // Cache line sizes for ARM: These values are not strictly correct since
 // cache line sizes depend on implementations, not architectures.  There
diff --git a/src/kudu/gutil/spinlock.h b/src/kudu/gutil/spinlock.h
index eced3ef..eb85d10 100644
--- a/src/kudu/gutil/spinlock.h
+++ b/src/kudu/gutil/spinlock.h
@@ -76,6 +76,9 @@ class LOCKABLE SpinLock {
       SlowLock();
     }
     ANNOTATE_RWLOCK_ACQUIRED(this, 1);
+#ifdef __aarch64__
+    __asm__ __volatile__ ("dmb ish" ::: "memory");
+#endif //__aarch64__
   }
 
   // Try to acquire this SpinLock without blocking and return true if the
@@ -89,6 +92,9 @@ class LOCKABLE SpinLock {
     if (res) {
       ANNOTATE_RWLOCK_ACQUIRED(this, 1);
     }
+#ifdef __aarch64__
+    __asm__ __volatile__ ("dmb ish" ::: "memory");
+#endif //__aarch64__
     return res;
   }
 
@@ -105,6 +111,9 @@ class LOCKABLE SpinLock {
       // for the lock.
       SlowUnlock(wait_cycles);
     }
+#ifdef __aarch64__
+    __asm__ __volatile__ ("dmb ish" ::: "memory");
+#endif //__aarch64__
   }
 
   // Determine if the lock is held.  When the lock is held by the invoking
diff --git a/src/kudu/gutil/spinlock_linux-inl.h 
b/src/kudu/gutil/spinlock_linux-inl.h
index 54e16a7..ebfe570 100644
--- a/src/kudu/gutil/spinlock_linux-inl.h
+++ b/src/kudu/gutil/spinlock_linux-inl.h
@@ -42,6 +42,9 @@
 #define FUTEX_WAKE 1
 #define FUTEX_PRIVATE_FLAG 128
 
+// Note: Instead of making direct system calls that are inlined, we rely
+//       on the syscall() function in glibc to do the right thing.
+
 static bool have_futex;
 static int futex_private_flag = FUTEX_PRIVATE_FLAG;
 
diff --git a/src/kudu/rpc/rpc-test-base.h b/src/kudu/rpc/rpc-test-base.h
index 5d19137..fb88a79 100644
--- a/src/kudu/rpc/rpc-test-base.h
+++ b/src/kudu/rpc/rpc-test-base.h
@@ -416,7 +416,7 @@ class RpcTestBase : public KuduTest {
  public:
   RpcTestBase()
     : n_worker_threads_(3),
-      service_queue_length_(100),
+      service_queue_length_(200),
       n_server_reactor_threads_(3),
       keepalive_time_ms_(1000),
       metric_entity_(METRIC_ENTITY_server.Instantiate(&metric_registry_, 
"test.rpc_test")) {
diff --git a/src/kudu/util/block_bloom_filter.cc 
b/src/kudu/util/block_bloom_filter.cc
index 2c80f79..16a5b7b 100644
--- a/src/kudu/util/block_bloom_filter.cc
+++ b/src/kudu/util/block_bloom_filter.cc
@@ -17,8 +17,12 @@
 
 #include "kudu/util/block_bloom_filter.h"
 
+#ifdef __aarch64__
+#include "kudu/util/sse2neon.h"
+#else //__aarch64__
 #include <emmintrin.h>
 #include <mm_malloc.h>
+#endif
 
 #include <algorithm>
 #include <cmath>
@@ -183,10 +187,17 @@ void BlockBloomFilter::BucketInsert(const uint32_t 
bucket_idx, const uint32_t ha
     new_bucket[i] = 1U << new_bucket[i];
   }
   for (int i = 0; i < 2; ++i) {
+#ifdef __aarch64__
+    // IWYU pragma: no_include <arm_neon.h>
+    uint8x16_t new_bucket_neon = vreinterpretq_u8_u32(vld1q_u32(new_bucket + 4 
* i));
+    uint8x16_t* existing_bucket = 
reinterpret_cast<uint8x16_t*>(&directory_[bucket_idx][4 * i]);
+    *existing_bucket = vorrq_u8(*existing_bucket, new_bucket_neon);
+#else
     __m128i new_bucket_sse = 
_mm_load_si128(reinterpret_cast<__m128i*>(new_bucket + 4 * i));
     __m128i* existing_bucket = reinterpret_cast<__m128i*>(
         &DCHECK_NOTNULL(directory_)[bucket_idx][4 * i]);
     *existing_bucket = _mm_or_si128(*existing_bucket, new_bucket_sse);
+#endif
   }
 }
 
diff --git a/src/kudu/util/debug-util.cc b/src/kudu/util/debug-util.cc
index 7bcb787..44a44aa 100644
--- a/src/kudu/util/debug-util.cc
+++ b/src/kudu/util/debug-util.cc
@@ -43,7 +43,11 @@
 #include <glog/raw_logging.h>
 #ifdef __linux__
 #define UNW_LOCAL_ONLY
+#ifdef __aarch64__
+#include <libunwind-aarch64.h>
+#else
 #include <libunwind.h>
+#endif //__aarch64__
 #endif
 
 #include "kudu/gutil/basictypes.h"
diff --git a/src/kudu/util/debug-util.h b/src/kudu/util/debug-util.h
index e8c94ea..db4e8d1 100644
--- a/src/kudu/util/debug-util.h
+++ b/src/kudu/util/debug-util.h
@@ -31,6 +31,10 @@
 #include "kudu/gutil/strings/fastmem.h"
 #include "kudu/util/status.h"
 
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_PRIVATE_FLAG 128
+
 namespace kudu {
 
 template <typename T> class ArrayView;
diff --git a/src/kudu/util/group_varint-inl.h b/src/kudu/util/group_varint-inl.h
index 27e289f..598881d 100644
--- a/src/kudu/util/group_varint-inl.h
+++ b/src/kudu/util/group_varint-inl.h
@@ -17,13 +17,18 @@
 #ifndef KUDU_UTIL_GROUP_VARINT_INL_H
 #define KUDU_UTIL_GROUP_VARINT_INL_H
 
-#include <emmintrin.h>
 #ifdef __linux__
 #include <endian.h>
 #endif
+
+#ifdef __aarch64__
+#include "kudu/util/sse2neon.h"
+#else
+#include <emmintrin.h>
 #include <smmintrin.h>
 #include <tmmintrin.h>
 #include <xmmintrin.h>
+#endif //__aarch64__
 
 #include <cstdint>
 #include <cstring>
@@ -123,6 +128,7 @@ inline const uint8_t *DecodeGroupVarInt32_SlowButSafe(
 }
 
 
+#ifndef __aarch64__
 inline void DoExtractM128(__m128i results,
                           uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
 #define SSE_USE_EXTRACT_PS
@@ -202,6 +208,7 @@ inline const uint8_t *DecodeGroupVarInt32_SSE_Add(
   return src;
 }
 
+#endif //__aarch64__
 
 // Append a set of group-varint encoded integers to the given faststring.
 inline void AppendGroupVarInt32(
diff --git a/src/kudu/util/group_varint-test.cc 
b/src/kudu/util/group_varint-test.cc
index 505da77..ac15ef1 100644
--- a/src/kudu/util/group_varint-test.cc
+++ b/src/kudu/util/group_varint-test.cc
@@ -72,6 +72,7 @@ static void DoTestRoundTripGVI32(
 
   const uint8_t *end;
 
+#ifndef __aarch64__
   if (use_sse) {
     end = DecodeGroupVarInt32_SSE(
       buf.data(), &ret[0], &ret[1], &ret[2], &ret[3]);
@@ -79,6 +80,9 @@ static void DoTestRoundTripGVI32(
     end = DecodeGroupVarInt32(
       buf.data(), &ret[0], &ret[1], &ret[2], &ret[3]);
   }
+#else
+  end = DecodeGroupVarInt32(buf.data(), &ret[0], &ret[1], &ret[2], &ret[3]);
+#endif //__aarch64__
 
   ASSERT_EQ(a, ret[0]);
   ASSERT_EQ(b, ret[1]);
diff --git a/src/kudu/util/init.cc b/src/kudu/util/init.cc
index 5267730..b6c8197 100644
--- a/src/kudu/util/init.cc
+++ b/src/kudu/util/init.cc
@@ -66,6 +66,9 @@ void CheckStandardFds() {
 
 Status CheckCPUFlags() {
   base::CPU cpu;
+  if (!cpu.has_broken_neon() && cpu.cpu_brand()=="ARM64") {
+    return Status::OK();
+  }
   if (!cpu.has_sse42()) {
     return BadCPUStatus(cpu, "SSE4.2");
   }
diff --git a/src/kudu/util/memory/memory.cc b/src/kudu/util/memory/memory.cc
index b3964df..58924f8 100644
--- a/src/kudu/util/memory/memory.cc
+++ b/src/kudu/util/memory/memory.cc
@@ -20,7 +20,12 @@
 
 #include "kudu/util/memory/memory.h"
 
+#ifdef __aarch64__
+#define _mm_free(p) free(p)
+#define _mm_malloc(a, b) malloc(a)
+#else
 #include <mm_malloc.h>
+#endif //__aarch64__
 
 #include <algorithm>
 #include <cstdlib>
@@ -30,8 +35,8 @@
 
 #include "kudu/util/alignment.h"
 #include "kudu/util/flag_tags.h"
-#include "kudu/util/memory/overwrite.h"
 #include "kudu/util/mem_tracker.h"
+#include "kudu/util/memory/overwrite.h"
 
 using std::copy;
 using std::min;
diff --git a/src/kudu/util/notification.h b/src/kudu/util/notification.h
index 968afbb..6b38b8d 100644
--- a/src/kudu/util/notification.h
+++ b/src/kudu/util/notification.h
@@ -25,6 +25,10 @@
 #include "kudu/util/countdown_latch.h"
 #endif
 
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_PRIVATE_FLAG 128
+
 namespace kudu {
 
 // This class defines a `Notification` abstraction, which allows threads
diff --git a/src/kudu/util/striped64.cc b/src/kudu/util/striped64.cc
index 789a395..6f60ff4 100644
--- a/src/kudu/util/striped64.cc
+++ b/src/kudu/util/striped64.cc
@@ -17,7 +17,13 @@
 
 #include "kudu/util/striped64.h"
 
+#ifdef __aarch64__
+#define _mm_free(p) free(p)
+#define _mm_malloc(a, b) malloc(a)
+#else
 #include <mm_malloc.h>
+#endif //__aarch64__
+
 #include <unistd.h>
 
 #include <cstdlib>
diff --git a/thirdparty/build-definitions.sh b/thirdparty/build-definitions.sh
index 19ba003..cb93032 100644
--- a/thirdparty/build-definitions.sh
+++ b/thirdparty/build-definitions.sh
@@ -454,7 +454,8 @@ build_gperftools() {
     $GPERFTOOLS_SOURCE/configure \
     --enable-frame-pointers \
     --with-pic \
-    --prefix=$PREFIX
+    --prefix=$PREFIX \
+    --enable-emergency-malloc
   fixup_libtool
   make -j$PARALLEL $EXTRA_MAKEFLAGS install
   popd

Reply via email to