[arrow] branch master updated: ARROW-16478: [C++] Refine cpu info detection

yibocai Tue, 17 May 2022 19:10:51 -0700

This is an automated email from the ASF dual-hosted git repository.

yibocai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new cde5a08006 ARROW-16478: [C++] Refine cpu info detection
cde5a08006 is described below

commit cde5a0800624649cd6558f339ded2024146cfd71
Author: Yibo Cai <[email protected]>
AuthorDate: Wed May 18 02:10:03 2022 +0000

    ARROW-16478: [C++] Refine cpu info detection
    
    This patch separates OS and ARCH depdendent code and removes CPU
    frequency detection (cycles_per_ms()) which is brittle and not very
    useful in practice.
    
    There are still many caveats, especially for Arm platform. It's better
    to adopt a mature library if we want more complete functionalities.
    E.g., github.com/pytorch/cpuinfo.
    
    Below are examples of cpu info detected on various platforms (some
    from virtual machines).
    
    Intel, Linux
    ------------
    Vendor: Intel
    Model: Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
    Features (set bits):  0  1  2  3  4  5  6  7  8  9  10  11  12
    Cache sizes: 32768 1048576 37486592
    
    AMD, Linux
    ----------
    Vendor: AMD
    Model: AMD EPYC 7251 8-Core Processor
    Features (set bits):  0  1  2  3  4  5  11  12
    Cache sizes: 32768 524288 33554432
    
    Intel, MacOS
    ------------
    Vendor: Unknown
    Model: Unknown
    Features (set bits):  0  1  2  3  4
    Cache sizes: 32768 262144 12582912
    
    Intel, Windows
    --------------
    Vendor: Intel
    Model: Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz\0\0
    Features (set bits):  0  1  2  3  4  5  6  7  8  9  10  11  12
    Cache sizes: 131072 2097152 37486592
    
    Intel, MinGW
    ------------
    Vendor: Intel
    Model: Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz\0\0\0\0\0\0\0
    Features (set bits):  0  1  2  3  4  5  11  12
    Cache sizes: 131072 524288 52428800
    
    Arm, Linux
    ----------
    Vendor: Unknown
    Model: Unknown
    Features (set bits):  32
    Cache sizes: 65536 1048576 Unknown
    
    Arm, MacOS
    ----------
    Vendor: Unknown
    Model: Unknown
    Features (set bits):  32
    Cache sizes: 65536 4194304 Unknown
    
    Closes #13112 from cyb70289/cpuinfo-refine
    
    Authored-by: Yibo Cai <[email protected]>
    Signed-off-by: Yibo Cai <[email protected]>
---
 cpp/src/arrow/compute/exec.cc        |   2 +-
 cpp/src/arrow/compute/exec.h         |   2 +-
 cpp/src/arrow/io/memory_benchmark.cc |   8 +-
 cpp/src/arrow/io/transform.h         |   2 +-
 cpp/src/arrow/util/benchmark_util.h  |   8 +-
 cpp/src/arrow/util/cpu_info.cc       | 727 ++++++++++++++++++-----------------
 cpp/src/arrow/util/cpu_info.h        | 119 +++---
 cpp/src/arrow/util/io_util_test.cc   |  27 ++
 8 files changed, 459 insertions(+), 436 deletions(-)

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index f8a522a273..186a3cdf3c 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -1046,7 +1046,7 @@ ExecContext::ExecContext(MemoryPool* pool, 
::arrow::internal::Executor* executor
   this->func_registry_ = func_registry == nullptr ? GetFunctionRegistry() : 
func_registry;
 }
 
-CpuInfo* ExecContext::cpu_info() const { return CpuInfo::GetInstance(); }
+const CpuInfo* ExecContext::cpu_info() const { return CpuInfo::GetInstance(); }
 
 // ----------------------------------------------------------------------
 // SelectionVector
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index faebddb733..742c379441 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -68,7 +68,7 @@ class ARROW_EXPORT ExecContext {
   /// default_memory_pool().
   MemoryPool* memory_pool() const { return pool_; }
 
-  ::arrow::internal::CpuInfo* cpu_info() const;
+  const ::arrow::internal::CpuInfo* cpu_info() const;
 
   /// \brief An Executor which may be used to parallelize execution.
   ::arrow::internal::Executor* executor() const { return executor_; }
diff --git a/cpp/src/arrow/io/memory_benchmark.cc 
b/cpp/src/arrow/io/memory_benchmark.cc
index 6af1807d16..1b584d17e0 100644
--- a/cpp/src/arrow/io/memory_benchmark.cc
+++ b/cpp/src/arrow/io/memory_benchmark.cc
@@ -28,12 +28,12 @@
 namespace arrow {
 
 using internal::CpuInfo;
-static CpuInfo* cpu_info = CpuInfo::GetInstance();
+static const CpuInfo* cpu_info = CpuInfo::GetInstance();
 
 static const int kNumCores = cpu_info->num_cores();
-static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::L1_CACHE);
-static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::L2_CACHE);
-static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::L3_CACHE);
+static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L1);
+static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L2);
+static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L3);
 
 constexpr size_t kMemoryPerCore = 32 * 1024 * 1024;
 using BufferPtr = std::shared_ptr<Buffer>;
diff --git a/cpp/src/arrow/io/transform.h b/cpp/src/arrow/io/transform.h
index c117f27592..7afe29b101 100644
--- a/cpp/src/arrow/io/transform.h
+++ b/cpp/src/arrow/io/transform.h
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-// Slow stream implementations, mainly for testing and benchmarking
+// Transform stream implementations
 
 #pragma once
 
diff --git a/cpp/src/arrow/util/benchmark_util.h 
b/cpp/src/arrow/util/benchmark_util.h
index 8379948bcb..79484989ac 100644
--- a/cpp/src/arrow/util/benchmark_util.h
+++ b/cpp/src/arrow/util/benchmark_util.h
@@ -27,11 +27,11 @@ namespace arrow {
 
 using internal::CpuInfo;
 
-static CpuInfo* cpu_info = CpuInfo::GetInstance();
+static const CpuInfo* cpu_info = CpuInfo::GetInstance();
 
-static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::L1_CACHE);
-static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::L2_CACHE);
-static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::L3_CACHE);
+static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L1);
+static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L2);
+static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L3);
 static const int64_t kCantFitInL3Size = kL3Size * 4;
 static const std::vector<int64_t> kMemorySizes = {kL1Size, kL2Size, kL3Size,
                                                   kCantFitInL3Size};
diff --git a/cpp/src/arrow/util/cpu_info.cc b/cpp/src/arrow/util/cpu_info.cc
index 18a1ae0a51..3ba8db216e 100644
--- a/cpp/src/arrow/util/cpu_info.cc
+++ b/cpp/src/arrow/util/cpu_info.cc
@@ -23,32 +23,26 @@
 #include <sys/sysctl.h>
 #endif
 
-#include <stdlib.h>
-#include <string.h>
-
 #ifndef _MSC_VER
 #include <unistd.h>
 #endif
 
 #ifdef _WIN32
-#if defined(_M_AMD64) || defined(_M_X64)
-#include <immintrin.h>
-#endif
 #include <intrin.h>
-#include <array>
-#include <bitset>
 
 #include "arrow/util/windows_compatibility.h"
 #endif
 
 #include <algorithm>
+#include <array>
+#include <bitset>
 #include <cctype>
 #include <cerrno>
 #include <cstdint>
 #include <fstream>
 #include <memory>
-#include <mutex>
 #include <string>
+#include <thread>
 
 #include "arrow/result.h"
 #include "arrow/util/io_util.h"
@@ -56,140 +50,31 @@
 #include "arrow/util/optional.h"
 #include "arrow/util/string.h"
 
+#undef CPUINFO_ARCH_X86
+#undef CPUINFO_ARCH_ARM
+#undef CPUINFO_ARCH_PPC
+
+#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || 
defined(_M_X64)
+#define CPUINFO_ARCH_X86
+#elif defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
+#define CPUINFO_ARCH_ARM
+#elif defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || \
+    defined(__powerpc64__)
+#define CPUINFO_ARCH_PPC
+#endif
+
 namespace arrow {
 namespace internal {
 
 namespace {
 
-using std::max;
-
-constexpr int64_t kDefaultL1CacheSize = 32 * 1024;    // Level 1: 32k
-constexpr int64_t kDefaultL2CacheSize = 256 * 1024;   // Level 2: 256k
-constexpr int64_t kDefaultL3CacheSize = 3072 * 1024;  // Level 3: 3M
-
-#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
-void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
-  __asm__ __volatile__("cpuid"
-                       : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
-                         "=d"(CPUInfo[3])
-                       : "a"(function_id), "c"(subfunction_id));
-}
-
-int64_t _xgetbv(int xcr) {
-  int out = 0;
-  __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
-  return out;
-}
-#endif
-
-#ifdef __APPLE__
-util::optional<int64_t> IntegerSysCtlByName(const char* name) {
-  size_t len = sizeof(int64_t);
-  int64_t data = 0;
-  if (sysctlbyname(name, &data, &len, nullptr, 0) == 0) {
-    return data;
-  }
-  // ENOENT is the official errno value for non-existing sysctl's,
-  // but EINVAL and ENOTSUP have been seen in the wild.
-  if (errno != ENOENT && errno != EINVAL && errno != ENOTSUP) {
-    auto st = IOErrorFromErrno(errno, "sysctlbyname failed for '", name, "'");
-    ARROW_LOG(WARNING) << st.ToString();
-  }
-  return util::nullopt;
-}
-#endif
-
-#if defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
-// There is no direct instruction to get cache size on Arm64 like '__cpuid' on 
x86;
-// Get Arm64 cache size by reading 
'/sys/devices/system/cpu/cpu0/cache/index*/size';
-// index* :
-//   index0: L1 Dcache
-//   index1: L1 Icache
-//   index2: L2 cache
-//   index3: L3 cache
-const char* kL1CacheSizeFile = 
"/sys/devices/system/cpu/cpu0/cache/index0/size";
-const char* kL2CacheSizeFile = 
"/sys/devices/system/cpu/cpu0/cache/index2/size";
-const char* kL3CacheSizeFile = 
"/sys/devices/system/cpu/cpu0/cache/index3/size";
-
-int64_t GetArm64CacheSize(const char* filename, int64_t default_size = -1) {
-  char* content = nullptr;
-  char* last_char = nullptr;
-  size_t file_len = 0;
-
-  // Read cache file to 'content' for getting cache size.
-  FILE* cache_file = fopen(filename, "r");
-  if (cache_file == nullptr) {
-    return default_size;
-  }
-  int res = getline(&content, &file_len, cache_file);
-  fclose(cache_file);
-  if (res == -1) {
-    return default_size;
-  }
-  std::unique_ptr<char, decltype(&free)> content_guard(content, &free);
-
-  errno = 0;
-  const auto cardinal_num = strtoull(content, &last_char, 0);
-  if (errno != 0) {
-    return default_size;
-  }
-  // kB, MB, or GB
-  int64_t multip = 1;
-  switch (*last_char) {
-    case 'g':
-    case 'G':
-      multip *= 1024;
-    case 'm':
-    case 'M':
-      multip *= 1024;
-    case 'k':
-    case 'K':
-      multip *= 1024;
-  }
-  return cardinal_num * multip;
-}
-#endif
+constexpr int kCacheLevels = static_cast<int>(CpuInfo::CacheLevel::Last) + 1;
 
-#if !defined(_WIN32) && !defined(__APPLE__)
-struct {
-  std::string name;
-  int64_t flag;
-} flag_mappings[] = {
-#if (defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || 
defined(_M_X64))
-    {"ssse3", CpuInfo::SSSE3},       {"sse4_1", CpuInfo::SSE4_1},
-    {"sse4_2", CpuInfo::SSE4_2},     {"popcnt", CpuInfo::POPCNT},
-    {"avx", CpuInfo::AVX},           {"avx2", CpuInfo::AVX2},
-    {"avx512f", CpuInfo::AVX512F},   {"avx512cd", CpuInfo::AVX512CD},
-    {"avx512vl", CpuInfo::AVX512VL}, {"avx512dq", CpuInfo::AVX512DQ},
-    {"avx512bw", CpuInfo::AVX512BW}, {"bmi1", CpuInfo::BMI1},
-    {"bmi2", CpuInfo::BMI2},
-#endif
-#if defined(__aarch64__)
-    {"asimd", CpuInfo::ASIMD},
-#endif
-};
-const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
-
-// Helper function to parse for hardware flags.
-// values contains a list of space-separated flags.  check to see if the flags 
we
-// care about are present.
-// Returns a bitmap of flags.
-int64_t ParseCPUFlags(const std::string& values) {
-  int64_t flags = 0;
-  for (int i = 0; i < num_flags; ++i) {
-    if (values.find(flag_mappings[i].name) != std::string::npos) {
-      flags |= flag_mappings[i].flag;
-    }
-  }
-  return flags;
-}
-#endif
+//============================== OS Dependent ==============================//
 
-#ifdef _WIN32
-bool RetrieveCacheSize(int64_t* cache_sizes) {
-  if (!cache_sizes) {
-    return false;
-  }
+#if defined(_WIN32)
+//------------------------------ WINDOWS ------------------------------//
+void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
   PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
   PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
   DWORD buffer_size = 0;
@@ -200,44 +85,62 @@ bool RetrieveCacheSize(int64_t* cache_sizes) {
           GetModuleHandle("kernel32"), "GetLogicalProcessorInformation");
 
   if (!func_pointer) {
-    return false;
+    ARROW_LOG(WARNING) << "Failed to find procedure 
GetLogicalProcessorInformation";
+    return;
   }
 
   // Get buffer size
-  if (func_pointer(buffer, &buffer_size) && GetLastError() != 
ERROR_INSUFFICIENT_BUFFER)
-    return false;
+  if (func_pointer(buffer, &buffer_size) && GetLastError() != 
ERROR_INSUFFICIENT_BUFFER) {
+    ARROW_LOG(WARNING) << "Failed to get size of processor information buffer";
+    return;
+  }
 
   buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(buffer_size);
+  if (!buffer) {
+    return;
+  }
 
-  if (!buffer || !func_pointer(buffer, &buffer_size)) {
-    return false;
+  if (!func_pointer(buffer, &buffer_size)) {
+    ARROW_LOG(WARNING) << "Failed to get processor information";
+    free(buffer);
+    return;
   }
 
   buffer_position = buffer;
   while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= buffer_size) 
{
     if (RelationCache == buffer_position->Relationship) {
       PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
-      if (cache->Level >= 1 && cache->Level <= 3) {
-        cache_sizes[cache->Level - 1] += cache->Size;
+      if (cache->Level >= 1 && cache->Level <= kCacheLevels) {
+        (*cache_sizes)[cache->Level - 1] += cache->Size;
       }
     }
     offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
     buffer_position++;
   }
 
-  if (buffer) {
-    free(buffer);
-  }
-  return true;
+  free(buffer);
 }
 
-#ifndef _M_ARM64
-// Source: https://en.wikipedia.org/wiki/CPUID
-bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name,
-                     CpuInfo::Vendor* vendor) {
-  if (!hardware_flags || !model_name || !vendor) {
-    return false;
-  }
+#if defined(CPUINFO_ARCH_X86)
+// On x86, get CPU features by cpuid, https://en.wikipedia.org/wiki/CPUID
+
+#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
+void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
+  __asm__ __volatile__("cpuid"
+                       : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
+                         "=d"(CPUInfo[3])
+                       : "a"(function_id), "c"(subfunction_id));
+}
+
+int64_t _xgetbv(int xcr) {
+  int out = 0;
+  __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
+  return out;
+}
+#endif  // MINGW
+
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+                       std::string* model_name) {
   int register_EAX_id = 1;
   int highest_valid_id = 0;
   int highest_extended_valid_id = 0;
@@ -249,15 +152,17 @@ bool RetrieveCPUInfo(int64_t* hardware_flags, 
std::string* model_name,
   highest_valid_id = cpu_info[0];
   // HEX of "GenuineIntel": 47656E75 696E6549 6E74656C
   // HEX of "AuthenticAMD": 41757468 656E7469 63414D44
-  if (cpu_info[1] == 0x756e6547 && cpu_info[2] == 0x49656e69 &&
-      cpu_info[3] == 0x6c65746e) {
+  if (cpu_info[1] == 0x756e6547 && cpu_info[3] == 0x49656e69 &&
+      cpu_info[2] == 0x6c65746e) {
     *vendor = CpuInfo::Vendor::Intel;
-  } else if (cpu_info[1] == 0x68747541 && cpu_info[2] == 0x69746e65 &&
-             cpu_info[3] == 0x444d4163) {
+  } else if (cpu_info[1] == 0x68747541 && cpu_info[3] == 0x69746e65 &&
+             cpu_info[2] == 0x444d4163) {
     *vendor = CpuInfo::Vendor::AMD;
   }
 
-  if (highest_valid_id <= register_EAX_id) return false;
+  if (highest_valid_id <= register_EAX_id) {
+    return;
+  }
 
   // EAX=1: Processor Info and Feature Bits
   __cpuidex(cpu_info.data(), register_EAX_id, 0);
@@ -308,262 +213,382 @@ bool RetrieveCPUInfo(int64_t* hardware_flags, 
std::string* model_name,
       if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
     }
   }
-
-  return true;
+}
+#elif defined(CPUINFO_ARCH_ARM)
+// Windows on Arm
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+                       std::string* model_name) {
+  *hardware_flags |= CpuInfo::ASIMD;
+  // TODO: vendor, model_name
 }
 #endif
-#endif
-
-}  // namespace
 
-CpuInfo::CpuInfo()
-    : hardware_flags_(0),
-      num_cores_(1),
-      model_name_("unknown"),
-      vendor_(Vendor::Unknown) {}
-
-std::unique_ptr<CpuInfo> g_cpu_info;
-static std::once_flag cpuinfo_initialized;
-
-CpuInfo* CpuInfo::GetInstance() {
-  std::call_once(cpuinfo_initialized, []() {
-    g_cpu_info.reset(new CpuInfo);
-    g_cpu_info->Init();
-  });
-  return g_cpu_info.get();
+#elif defined(__APPLE__)
+//------------------------------ MACOS ------------------------------//
+util::optional<int64_t> IntegerSysCtlByName(const char* name) {
+  size_t len = sizeof(int64_t);
+  int64_t data = 0;
+  if (sysctlbyname(name, &data, &len, nullptr, 0) == 0) {
+    return data;
+  }
+  // ENOENT is the official errno value for non-existing sysctl's,
+  // but EINVAL and ENOTSUP have been seen in the wild.
+  if (errno != ENOENT && errno != EINVAL && errno != ENOTSUP) {
+    auto st = IOErrorFromErrno(errno, "sysctlbyname failed for '", name, "'");
+    ARROW_LOG(WARNING) << st.ToString();
+  }
+  return util::nullopt;
 }
 
-void CpuInfo::Init() {
-  std::string line;
-  std::string name;
-  std::string value;
-
-  float max_mhz = 0;
-  int num_cores = 0;
-
-  memset(&cache_sizes_, 0, sizeof(cache_sizes_));
-
-#ifdef _WIN32
-  SYSTEM_INFO system_info;
-  GetSystemInfo(&system_info);
-  num_cores = system_info.dwNumberOfProcessors;
-
-  LARGE_INTEGER performance_frequency;
-  if (QueryPerformanceFrequency(&performance_frequency)) {
-    max_mhz = static_cast<float>(performance_frequency.QuadPart);
+void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+  static_assert(kCacheLevels >= 3, "");
+  auto c = IntegerSysCtlByName("hw.l1dcachesize");
+  if (c.has_value()) {
+    (*cache_sizes)[0] = *c;
   }
-#elif defined(__APPLE__)
-  // On macOS, get CPU information from system information base
+  c = IntegerSysCtlByName("hw.l2cachesize");
+  if (c.has_value()) {
+    (*cache_sizes)[1] = *c;
+  }
+  c = IntegerSysCtlByName("hw.l3cachesize");
+  if (c.has_value()) {
+    (*cache_sizes)[2] = *c;
+  }
+}
+
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+                       std::string* model_name) {
+  // hardware_flags
   struct SysCtlCpuFeature {
     const char* name;
     int64_t flag;
   };
   std::vector<SysCtlCpuFeature> features = {
-#if defined(__aarch64__)
+#if defined(CPUINFO_ARCH_X86)
+    {"hw.optional.sse4_2",
+     CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
+    {"hw.optional.avx1_0", CpuInfo::AVX},
+    {"hw.optional.avx2_0", CpuInfo::AVX2},
+    {"hw.optional.bmi1", CpuInfo::BMI1},
+    {"hw.optional.bmi2", CpuInfo::BMI2},
+    {"hw.optional.avx512f", CpuInfo::AVX512F},
+    {"hw.optional.avx512cd", CpuInfo::AVX512CD},
+    {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
+    {"hw.optional.avx512bw", CpuInfo::AVX512BW},
+    {"hw.optional.avx512vl", CpuInfo::AVX512VL},
+#elif defined(CPUINFO_ARCH_ARM)
     // ARM64 (note that this is exposed under Rosetta as well)
-    {"hw.optional.neon", ASIMD},
-#else
-    // x86
-    {"hw.optional.sse4_2", SSSE3 | SSE4_1 | SSE4_2 | POPCNT},
-    {"hw.optional.avx1_0", AVX},
-    {"hw.optional.avx2_0", AVX2},
-    {"hw.optional.bmi1", BMI1},
-    {"hw.optional.bmi2", BMI2},
-    {"hw.optional.avx512f", AVX512F},
-    {"hw.optional.avx512cd", AVX512CD},
-    {"hw.optional.avx512dq", AVX512DQ},
-    {"hw.optional.avx512bw", AVX512BW},
-    {"hw.optional.avx512vl", AVX512VL},
+    {"hw.optional.neon", CpuInfo::ASIMD},
 #endif
   };
   for (const auto& feature : features) {
     auto v = IntegerSysCtlByName(feature.name);
     if (v.value_or(0)) {
-      hardware_flags_ |= feature.flag;
+      *hardware_flags |= feature.flag;
     }
   }
+
+  // TODO: vendor, model_name
+}
+
 #else
-  // Read from /proc/cpuinfo
+//------------------------------ LINUX ------------------------------//
+// Get cache size, return 0 on error
+int64_t LinuxGetCacheSize(int level) {
+  const struct {
+    int sysconf_name;
+    const char* sysfs_path;
+  } kCacheSizeEntries[] = {
+      {
+          _SC_LEVEL1_DCACHE_SIZE,
+          "/sys/devices/system/cpu/cpu0/cache/index0/size",  // l1d (index1 is 
l1i)
+      },
+      {
+          _SC_LEVEL2_CACHE_SIZE,
+          "/sys/devices/system/cpu/cpu0/cache/index2/size",  // l2
+      },
+      {
+          _SC_LEVEL3_CACHE_SIZE,
+          "/sys/devices/system/cpu/cpu0/cache/index3/size",  // l3
+      },
+  };
+  static_assert(sizeof(kCacheSizeEntries) / sizeof(kCacheSizeEntries[0]) == 
kCacheLevels,
+                "");
+
+  // get cache size by sysconf()
+  errno = 0;
+  const int64_t cache_size = sysconf(kCacheSizeEntries[level].sysconf_name);
+  if (errno == 0 && cache_size > 0) {
+    return cache_size;
+  }
+
+  // get cache size from sysfs if sysconf() fails (it does happen on Arm)
+  std::ifstream cacheinfo(kCacheSizeEntries[level].sysfs_path, std::ios::in);
+  if (!cacheinfo) {
+    return 0;
+  }
+  // cacheinfo is one line like: 65536, 64K, 1M, etc.
+  uint64_t size = 0;
+  char unit = '\0';
+  cacheinfo >> size >> unit;
+  if (unit == 'K') {
+    size <<= 10;
+  } else if (unit == 'M') {
+    size <<= 20;
+  } else if (unit == 'G') {
+    size <<= 30;
+  } else if (unit != '\0') {
+    return 0;
+  }
+  return static_cast<int64_t>(size);
+}
+
+// Helper function to parse for hardware flags from /proc/cpuinfo
+// values contains a list of space-separated flags.  check to see if the flags 
we
+// care about are present.
+// Returns a bitmap of flags.
+int64_t LinuxParseCpuFlags(const std::string& values) {
+  const struct {
+    std::string name;
+    int64_t flag;
+  } flag_mappings[] = {
+#if defined(CPUINFO_ARCH_X86)
+    {"ssse3", CpuInfo::SSSE3},
+    {"sse4_1", CpuInfo::SSE4_1},
+    {"sse4_2", CpuInfo::SSE4_2},
+    {"popcnt", CpuInfo::POPCNT},
+    {"avx", CpuInfo::AVX},
+    {"avx2", CpuInfo::AVX2},
+    {"avx512f", CpuInfo::AVX512F},
+    {"avx512cd", CpuInfo::AVX512CD},
+    {"avx512vl", CpuInfo::AVX512VL},
+    {"avx512dq", CpuInfo::AVX512DQ},
+    {"avx512bw", CpuInfo::AVX512BW},
+    {"bmi1", CpuInfo::BMI1},
+    {"bmi2", CpuInfo::BMI2},
+#elif defined(CPUINFO_ARCH_ARM)
+    {"asimd", CpuInfo::ASIMD},
+#endif
+  };
+  const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
+
+  int64_t flags = 0;
+  for (int i = 0; i < num_flags; ++i) {
+    if (values.find(flag_mappings[i].name) != std::string::npos) {
+      flags |= flag_mappings[i].flag;
+    }
+  }
+  return flags;
+}
+
+void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+  for (int i = 0; i < kCacheLevels; ++i) {
+    const int64_t cache_size = LinuxGetCacheSize(i);
+    if (cache_size > 0) {
+      (*cache_sizes)[i] = cache_size;
+    }
+  }
+}
+
+// Read from /proc/cpuinfo
+// TODO: vendor, model_name for Arm
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+                       std::string* model_name) {
   std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
   while (cpuinfo) {
+    std::string line;
     std::getline(cpuinfo, line);
-    size_t colon = line.find(':');
+    const size_t colon = line.find(':');
     if (colon != std::string::npos) {
-      name = TrimString(line.substr(0, colon - 1));
-      value = TrimString(line.substr(colon + 1, std::string::npos));
+      const std::string name = TrimString(line.substr(0, colon - 1));
+      const std::string value = TrimString(line.substr(colon + 1, 
std::string::npos));
       if (name.compare("flags") == 0 || name.compare("Features") == 0) {
-        hardware_flags_ |= ParseCPUFlags(value);
-      } else if (name.compare("cpu MHz") == 0) {
-        // Every core will report a different speed.  We'll take the max, 
assuming
-        // that when impala is running, the core will not be in a lower power 
state.
-        // TODO: is there a more robust way to do this, such as
-        // Window's QueryPerformanceFrequency()
-        float mhz = static_cast<float>(atof(value.c_str()));
-        max_mhz = max(mhz, max_mhz);
-      } else if (name.compare("processor") == 0) {
-        ++num_cores;
+        *hardware_flags |= LinuxParseCpuFlags(value);
       } else if (name.compare("model name") == 0) {
-        model_name_ = value;
+        *model_name = value;
       } else if (name.compare("vendor_id") == 0) {
         if (value.compare("GenuineIntel") == 0) {
-          vendor_ = Vendor::Intel;
+          *vendor = CpuInfo::Vendor::Intel;
         } else if (value.compare("AuthenticAMD") == 0) {
-          vendor_ = Vendor::AMD;
+          *vendor = CpuInfo::Vendor::AMD;
         }
       }
     }
   }
-  if (cpuinfo.is_open()) cpuinfo.close();
-#endif
+}
+#endif  // WINDOWS, MACOS, LINUX
+
+//============================== Arch Dependent 
==============================//
+
+#if defined(CPUINFO_ARCH_X86)
+//------------------------------ X86_64 ------------------------------//
+bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* 
hardware_flags) {
+  enum {
+    USER_SIMD_NONE,
+    USER_SIMD_SSE4_2,
+    USER_SIMD_AVX,
+    USER_SIMD_AVX2,
+    USER_SIMD_AVX512,
+    USER_SIMD_MAX,
+  };
 
-#ifdef __APPLE__
-  // On macOS, get cache size from system information base
-  SetDefaultCacheSize();
-  auto c = IntegerSysCtlByName("hw.l1dcachesize");
-  if (c.has_value()) {
-    cache_sizes_[0] = *c;
+  int level = USER_SIMD_MAX;
+  // Parse the level
+  if (simd_level == "AVX512") {
+    level = USER_SIMD_AVX512;
+  } else if (simd_level == "AVX2") {
+    level = USER_SIMD_AVX2;
+  } else if (simd_level == "AVX") {
+    level = USER_SIMD_AVX;
+  } else if (simd_level == "SSE4_2") {
+    level = USER_SIMD_SSE4_2;
+  } else if (simd_level == "NONE") {
+    level = USER_SIMD_NONE;
+  } else {
+    return false;
   }
-  c = IntegerSysCtlByName("hw.l2cachesize");
-  if (c.has_value()) {
-    cache_sizes_[1] = *c;
+
+  // Disable feature as the level
+  if (level < USER_SIMD_AVX512) {
+    *hardware_flags &= ~CpuInfo::AVX512;
   }
-  c = IntegerSysCtlByName("hw.l3cachesize");
-  if (c.has_value()) {
-    cache_sizes_[2] = *c;
+  if (level < USER_SIMD_AVX2) {
+    *hardware_flags &= ~(CpuInfo::AVX2 | CpuInfo::BMI2);
   }
-#elif _WIN32
-  if (!RetrieveCacheSize(cache_sizes_)) {
-    SetDefaultCacheSize();
+  if (level < USER_SIMD_AVX) {
+    *hardware_flags &= ~CpuInfo::AVX;
   }
-#ifndef _M_ARM64
-  RetrieveCPUInfo(&hardware_flags_, &model_name_, &vendor_);
-#endif
-#else
-  SetDefaultCacheSize();
-#endif
-
-  if (max_mhz != 0) {
-    cycles_per_ms_ = static_cast<int64_t>(max_mhz);
-#ifndef _WIN32
-    cycles_per_ms_ *= 1000;
-#endif
-  } else {
-    cycles_per_ms_ = 1000000;
+  if (level < USER_SIMD_SSE4_2) {
+    *hardware_flags &= ~(CpuInfo::SSE4_2 | CpuInfo::BMI1);
   }
-  original_hardware_flags_ = hardware_flags_;
+  return true;
+}
 
-  if (num_cores > 0) {
-    num_cores_ = num_cores;
-  } else {
-    num_cores_ = 1;
+void ArchVerifyCpuRequirements(const CpuInfo* ci) {
+#if defined(ARROW_HAVE_SSE4_2)
+  if (!ci->IsDetected(CpuInfo::SSE4_2)) {
+    DCHECK(false) << "CPU does not support the Supplemental SSE4_2 instruction 
set";
   }
-
-  // Parse the user simd level
-  ParseUserSimdLevel();
+#endif
 }
 
-void CpuInfo::VerifyCpuRequirements() {
-#ifdef ARROW_HAVE_SSE4_2
-  if (!IsSupported(CpuInfo::SSSE3)) {
-    DCHECK(false) << "CPU does not support the Supplemental SSE3 instruction 
set";
+#elif defined(CPUINFO_ARCH_ARM)
+//------------------------------ AARCH64 ------------------------------//
+bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* 
hardware_flags) {
+  if (simd_level == "NONE") {
+    *hardware_flags &= ~CpuInfo::ASIMD;
+    return true;
   }
-#endif
-#if defined(ARROW_HAVE_NEON)
-  if (!IsSupported(CpuInfo::ASIMD)) {
+  return false;
+}
+
+void ArchVerifyCpuRequirements(const CpuInfo* ci) {
+  if (!ci->IsDetected(CpuInfo::ASIMD)) {
     DCHECK(false) << "CPU does not support the Armv8 Neon instruction set";
   }
-#endif
 }
 
-bool CpuInfo::CanUseSSE4_2() const {
-#if defined(ARROW_HAVE_SSE4_2)
-  return IsSupported(CpuInfo::SSE4_2);
 #else
-  return false;
-#endif
+//------------------------------ PPC, ... ------------------------------//
+bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* 
hardware_flags) {
+  return true;
 }
 
-void CpuInfo::EnableFeature(int64_t flag, bool enable) {
-  if (!enable) {
-    hardware_flags_ &= ~flag;
-  } else {
-    // Can't turn something on that can't be supported
-    DCHECK_NE(original_hardware_flags_ & flag, 0);
-    hardware_flags_ |= flag;
+void ArchVerifyCpuRequirements(const CpuInfo* ci) {}
+
+#endif  // X86, ARM, PPC
+
+}  // namespace
+
+struct CpuInfo::Impl {
+  int64_t hardware_flags = 0;
+  int num_cores = 0;
+  int64_t original_hardware_flags = 0;
+  Vendor vendor = Vendor::Unknown;
+  std::string model_name = "Unknown";
+  std::array<int64_t, kCacheLevels> cache_sizes{};
+
+  Impl() {
+    OsRetrieveCacheSize(&cache_sizes);
+    OsRetrieveCpuInfo(&hardware_flags, &vendor, &model_name);
+    original_hardware_flags = hardware_flags;
+    num_cores = 
std::max(static_cast<int>(std::thread::hardware_concurrency()), 1);
+
+    // parse user simd level
+    auto maybe_env_var = GetEnvVar("ARROW_USER_SIMD_LEVEL");
+    if (!maybe_env_var.ok()) {
+      return;
+    }
+    std::string s = *std::move(maybe_env_var);
+    std::transform(s.begin(), s.end(), s.begin(),
+                   [](unsigned char c) { return std::toupper(c); });
+    if (!ArchParseUserSimdLevel(s, &hardware_flags)) {
+      ARROW_LOG(WARNING) << "Invalid value for ARROW_USER_SIMD_LEVEL: " << s;
+    }
+  }
+
+  void EnableFeature(int64_t flag, bool enable) {
+    if (!enable) {
+      hardware_flags &= ~flag;
+    } else {
+      // Can't turn something on that can't be supported
+      DCHECK_EQ((~original_hardware_flags) & flag, 0);
+      hardware_flags |= (flag & original_hardware_flags);
+    }
   }
+};
+
+CpuInfo::~CpuInfo() = default;
+
+CpuInfo::CpuInfo() : impl_(new Impl) {}
+
+const CpuInfo* CpuInfo::GetInstance() {
+  static CpuInfo cpu_info;
+  return &cpu_info;
 }
 
-int64_t CpuInfo::hardware_flags() { return hardware_flags_; }
+int64_t CpuInfo::hardware_flags() const { return impl_->hardware_flags; }
 
-int64_t CpuInfo::CacheSize(CacheLevel level) { return cache_sizes_[level]; }
+int CpuInfo::num_cores() const { return impl_->num_cores <= 0 ? 1 : 
impl_->num_cores; }
 
-int64_t CpuInfo::cycles_per_ms() { return cycles_per_ms_; }
+CpuInfo::Vendor CpuInfo::vendor() const { return impl_->vendor; }
 
-int CpuInfo::num_cores() { return num_cores_; }
+const std::string& CpuInfo::model_name() const { return impl_->model_name; }
 
-std::string CpuInfo::model_name() { return model_name_; }
+int64_t CpuInfo::CacheSize(CacheLevel level) const {
+  constexpr int64_t kDefaultCacheSizes[] = {
+      32 * 1024,    // Level 1: 32K
+      256 * 1024,   // Level 2: 256K
+      3072 * 1024,  // Level 3: 3M
+  };
+  static_assert(
+      sizeof(kDefaultCacheSizes) / sizeof(kDefaultCacheSizes[0]) == 
kCacheLevels, "");
+
+  static_assert(static_cast<int>(CacheLevel::L1) == 0, "");
+  const int i = static_cast<int>(level);
+  if (impl_->cache_sizes[i] > 0) return impl_->cache_sizes[i];
+  if (i == 0) return kDefaultCacheSizes[0];
+  // l3 may be not available, return maximum of l2 or default size
+  return std::max(kDefaultCacheSizes[i], impl_->cache_sizes[i - 1]);
+}
 
-void CpuInfo::SetDefaultCacheSize() {
-#if defined(_SC_LEVEL1_DCACHE_SIZE) && !defined(__aarch64__)
-  // Call sysconf to query for the cache sizes
-  cache_sizes_[0] = sysconf(_SC_LEVEL1_DCACHE_SIZE);
-  cache_sizes_[1] = sysconf(_SC_LEVEL2_CACHE_SIZE);
-  cache_sizes_[2] = sysconf(_SC_LEVEL3_CACHE_SIZE);
-  ARROW_UNUSED(kDefaultL1CacheSize);
-  ARROW_UNUSED(kDefaultL2CacheSize);
-  ARROW_UNUSED(kDefaultL3CacheSize);
-#elif defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
-  cache_sizes_[0] = GetArm64CacheSize(kL1CacheSizeFile, kDefaultL1CacheSize);
-  cache_sizes_[1] = GetArm64CacheSize(kL2CacheSizeFile, kDefaultL2CacheSize);
-  cache_sizes_[2] = GetArm64CacheSize(kL3CacheSizeFile, kDefaultL3CacheSize);
-#else
-  // Provide reasonable default values if no info
-  cache_sizes_[0] = kDefaultL1CacheSize;
-  cache_sizes_[1] = kDefaultL2CacheSize;
-  cache_sizes_[2] = kDefaultL3CacheSize;
-#endif
+bool CpuInfo::IsSupported(int64_t flags) const {
+  return (impl_->hardware_flags & flags) == flags;
 }
 
-void CpuInfo::ParseUserSimdLevel() {
-  auto maybe_env_var = GetEnvVar("ARROW_USER_SIMD_LEVEL");
-  if (!maybe_env_var.ok()) {
-    // No user settings
-    return;
-  }
-  std::string s = *std::move(maybe_env_var);
-  std::transform(s.begin(), s.end(), s.begin(),
-                 [](unsigned char c) { return std::toupper(c); });
+bool CpuInfo::IsDetected(int64_t flags) const {
+  return (impl_->original_hardware_flags & flags) == flags;
+}
 
-  int level = USER_SIMD_MAX;
-  // Parse the level
-  if (s == "AVX512") {
-    level = USER_SIMD_AVX512;
-  } else if (s == "AVX2") {
-    level = USER_SIMD_AVX2;
-  } else if (s == "AVX") {
-    level = USER_SIMD_AVX;
-  } else if (s == "SSE4_2") {
-    level = USER_SIMD_SSE4_2;
-  } else if (s == "NONE") {
-    level = USER_SIMD_NONE;
-  } else if (!s.empty()) {
-    ARROW_LOG(WARNING) << "Invalid value for ARROW_USER_SIMD_LEVEL: " << s;
-  }
+void CpuInfo::VerifyCpuRequirements() const { return 
ArchVerifyCpuRequirements(this); }
 
-  // Disable feature as the level
-  if (level < USER_SIMD_AVX512) {  // Disable all AVX512 features
-    EnableFeature(AVX512, false);
-  }
-  if (level < USER_SIMD_AVX2) {  // Disable all AVX2 features
-    EnableFeature(AVX2 | BMI2, false);
-  }
-  if (level < USER_SIMD_AVX) {  // Disable all AVX features
-    EnableFeature(AVX, false);
-  }
-  if (level < USER_SIMD_SSE4_2) {  // Disable all SSE4_2 features
-    EnableFeature(SSE4_2 | BMI1, false);
-  }
+void CpuInfo::EnableFeature(int64_t flag, bool enable) {
+  impl_->EnableFeature(flag, enable);
 }
 
 }  // namespace internal
 }  // namespace arrow
+
+#undef CPUINFO_ARCH_X86
+#undef CPUINFO_ARCH_ARM
+#undef CPUINFO_ARCH_PPC
diff --git a/cpp/src/arrow/util/cpu_info.h b/cpp/src/arrow/util/cpu_info.h
index 83819c2551..949719b97e 100644
--- a/cpp/src/arrow/util/cpu_info.h
+++ b/cpp/src/arrow/util/cpu_info.h
@@ -21,8 +21,10 @@
 #pragma once
 
 #include <cstdint>
+#include <memory>
 #include <string>
 
+#include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -34,77 +36,68 @@ namespace internal {
 /// /sys/devices)
 class ARROW_EXPORT CpuInfo {
  public:
-  static constexpr int64_t SSSE3 = (1 << 1);
-  static constexpr int64_t SSE4_1 = (1 << 2);
-  static constexpr int64_t SSE4_2 = (1 << 3);
-  static constexpr int64_t POPCNT = (1 << 4);
-  static constexpr int64_t ASIMD = (1 << 5);
-  static constexpr int64_t AVX = (1 << 6);
-  static constexpr int64_t AVX2 = (1 << 7);
-  static constexpr int64_t AVX512F = (1 << 8);
-  static constexpr int64_t AVX512CD = (1 << 9);
-  static constexpr int64_t AVX512VL = (1 << 10);
-  static constexpr int64_t AVX512DQ = (1 << 11);
-  static constexpr int64_t AVX512BW = (1 << 12);
-  static constexpr int64_t BMI1 = (1 << 13);
-  static constexpr int64_t BMI2 = (1 << 14);
-
-  /// Typical AVX512 subsets consists of 
AVX512F,AVX512BW,AVX512VL,AVX512CD,AVX512DQ
+  ~CpuInfo();
+
+  /// x86 features
+  static constexpr int64_t SSSE3 = (1LL << 0);
+  static constexpr int64_t SSE4_1 = (1LL << 1);
+  static constexpr int64_t SSE4_2 = (1LL << 2);
+  static constexpr int64_t POPCNT = (1LL << 3);
+  static constexpr int64_t AVX = (1LL << 4);
+  static constexpr int64_t AVX2 = (1LL << 5);
+  static constexpr int64_t AVX512F = (1LL << 6);
+  static constexpr int64_t AVX512CD = (1LL << 7);
+  static constexpr int64_t AVX512VL = (1LL << 8);
+  static constexpr int64_t AVX512DQ = (1LL << 9);
+  static constexpr int64_t AVX512BW = (1LL << 10);
   static constexpr int64_t AVX512 = AVX512F | AVX512CD | AVX512VL | AVX512DQ | 
AVX512BW;
+  static constexpr int64_t BMI1 = (1LL << 11);
+  static constexpr int64_t BMI2 = (1LL << 12);
 
-  /// Cache enums for L1 (data), L2 and L3
-  enum CacheLevel {
-    L1_CACHE = 0,
-    L2_CACHE = 1,
-    L3_CACHE = 2,
-  };
+  /// Arm features
+  static constexpr int64_t ASIMD = (1LL << 32);
 
-  enum class Vendor : int { Unknown = 0, Intel, AMD };
+  /// Cache enums for L1 (data), L2 and L3
+  enum class CacheLevel { L1 = 0, L2, L3, Last = L3 };
 
-  static CpuInfo* GetInstance();
+  /// CPU vendors
+  enum class Vendor { Unknown, Intel, AMD };
 
-  /// Determine if the CPU meets the minimum CPU requirements and if not, 
issue an error
-  /// and terminate.
-  void VerifyCpuRequirements();
+  static const CpuInfo* GetInstance();
 
   /// Returns all the flags for this cpu
-  int64_t hardware_flags();
+  int64_t hardware_flags() const;
+
+  /// Returns the number of cores (including hyper-threaded) on this machine.
+  int num_cores() const;
+
+  /// Returns the vendor of the cpu.
+  Vendor vendor() const;
+
+  /// Returns the model name of the cpu (e.g. Intel i7-2600)
+  const std::string& model_name() const;
+
+  /// Returns the size of the cache in KB at this cache level
+  int64_t CacheSize(CacheLevel level) const;
 
   /// \brief Returns whether or not the given feature is enabled.
   ///
   /// IsSupported() is true iff IsDetected() is also true and the feature
   /// wasn't disabled by the user (for example by setting the 
ARROW_USER_SIMD_LEVEL
   /// environment variable).
-  bool IsSupported(int64_t flags) const { return (hardware_flags_ & flags) == 
flags; }
+  bool IsSupported(int64_t flags) const;
 
   /// Returns whether or not the given feature is available on the CPU.
-  bool IsDetected(int64_t flags) const {
-    return (original_hardware_flags_ & flags) == flags;
-  }
+  bool IsDetected(int64_t flags) const;
 
-  /// \brief The processor supports SSE4.2 and the Arrow libraries are built
-  /// with support for it
-  bool CanUseSSE4_2() const;
+  /// Determine if the CPU meets the minimum CPU requirements and if not, 
issue an error
+  /// and terminate.
+  void VerifyCpuRequirements() const;
 
   /// Toggle a hardware feature on and off.  It is not valid to turn on a 
feature
   /// that the underlying hardware cannot support. This is useful for testing.
   void EnableFeature(int64_t flag, bool enable);
 
-  /// Returns the size of the cache in KB at this cache level
-  int64_t CacheSize(CacheLevel level);
-
-  /// Returns the number of cpu cycles per millisecond
-  int64_t cycles_per_ms();
-
-  /// Returns the number of cores (including hyper-threaded) on this machine.
-  int num_cores();
-
-  /// Returns the model name of the cpu (e.g. Intel i7-2600)
-  std::string model_name();
-
-  /// Returns the vendor of the cpu.
-  Vendor vendor() const { return vendor_; }
-
   bool HasEfficientBmi2() const {
     // BMI2 (pext, pdep) is only efficient on Intel X86 processors.
     return vendor() == Vendor::Intel && IsSupported(BMI2);
@@ -113,30 +106,8 @@ class ARROW_EXPORT CpuInfo {
  private:
   CpuInfo();
 
-  enum UserSimdLevel {
-    USER_SIMD_NONE = 0,
-    USER_SIMD_SSE4_2,
-    USER_SIMD_AVX,
-    USER_SIMD_AVX2,
-    USER_SIMD_AVX512,
-    USER_SIMD_MAX,
-  };
-
-  void Init();
-
-  /// Inits CPU cache size variables with default values
-  void SetDefaultCacheSize();
-
-  /// Parse the SIMD level by ARROW_USER_SIMD_LEVEL env
-  void ParseUserSimdLevel();
-
-  int64_t hardware_flags_;
-  int64_t original_hardware_flags_;
-  int64_t cache_sizes_[L3_CACHE + 1];
-  int64_t cycles_per_ms_;
-  int num_cores_;
-  std::string model_name_;
-  Vendor vendor_;
+  struct Impl;
+  std::unique_ptr<Impl> impl_;
 };
 
 }  // namespace internal
diff --git a/cpp/src/arrow/util/io_util_test.cc 
b/cpp/src/arrow/util/io_util_test.cc
index efc4f2164b..a38699dfd8 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -35,6 +35,7 @@
 #include "arrow/buffer.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/bit_util.h"
+#include "arrow/util/cpu_info.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/windows_compatibility.h"
@@ -731,5 +732,31 @@ TEST(Memory, GetRSS) {
 #endif
 }
 
+// Some loose tests to check if the cpuinfo makes sense
+TEST(CpuInfo, Basic) {
+  const CpuInfo* ci = CpuInfo::GetInstance();
+
+  const int ncores = ci->num_cores();
+  ASSERT_TRUE(ncores >= 1 && ncores <= 1000) << "invalid number of cores " << 
ncores;
+
+  const auto l1 = ci->CacheSize(CpuInfo::CacheLevel::L1);
+  const auto l2 = ci->CacheSize(CpuInfo::CacheLevel::L2);
+  const auto l3 = ci->CacheSize(CpuInfo::CacheLevel::L3);
+  ASSERT_TRUE(l1 >= 4 * 1024 && l1 <= 512 * 1024) << "unexpected L1 size: " << 
l1;
+  ASSERT_TRUE(l2 >= 32 * 1024 && l2 <= 8 * 1024 * 1024) << "unexpected L2 
size: " << l2;
+  ASSERT_TRUE(l3 >= 256 * 1024 && l3 <= 1024 * 1024 * 1024)
+      << "unexpected L3 size: " << l3;
+  ASSERT_LE(l1, l2) << "L1 cache size " << l1 << " larger than L2 " << l2;
+  ASSERT_LE(l2, l3) << "L2 cache size " << l2 << " larger than L3 " << l3;
+
+  // Toggle hardware flags
+  CpuInfo* ci_rw = const_cast<CpuInfo*>(ci);
+  const int64_t original_hardware_flags = ci->hardware_flags();
+  ci_rw->EnableFeature(original_hardware_flags, false);
+  ASSERT_EQ(ci->hardware_flags(), 0);
+  ci_rw->EnableFeature(original_hardware_flags, true);
+  ASSERT_EQ(ci->hardware_flags(), original_hardware_flags);
+}
+
 }  // namespace internal
 }  // namespace arrow

[arrow] branch master updated: ARROW-16478: [C++] Refine cpu info detection

Reply via email to