This is an automated email from the ASF dual-hosted git repository.
yibocai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new cde5a08006 ARROW-16478: [C++] Refine cpu info detection
cde5a08006 is described below
commit cde5a0800624649cd6558f339ded2024146cfd71
Author: Yibo Cai <[email protected]>
AuthorDate: Wed May 18 02:10:03 2022 +0000
ARROW-16478: [C++] Refine cpu info detection
This patch separates OS and ARCH depdendent code and removes CPU
frequency detection (cycles_per_ms()) which is brittle and not very
useful in practice.
There are still many caveats, especially for Arm platform. It's better
to adopt a mature library if we want more complete functionalities.
E.g., github.com/pytorch/cpuinfo.
Below are examples of cpu info detected on various platforms (some
from virtual machines).
Intel, Linux
------------
Vendor: Intel
Model: Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Features (set bits): 0 1 2 3 4 5 6 7 8 9 10 11 12
Cache sizes: 32768 1048576 37486592
AMD, Linux
----------
Vendor: AMD
Model: AMD EPYC 7251 8-Core Processor
Features (set bits): 0 1 2 3 4 5 11 12
Cache sizes: 32768 524288 33554432
Intel, MacOS
------------
Vendor: Unknown
Model: Unknown
Features (set bits): 0 1 2 3 4
Cache sizes: 32768 262144 12582912
Intel, Windows
--------------
Vendor: Intel
Model: Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz\0\0
Features (set bits): 0 1 2 3 4 5 6 7 8 9 10 11 12
Cache sizes: 131072 2097152 37486592
Intel, MinGW
------------
Vendor: Intel
Model: Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz\0\0\0\0\0\0\0
Features (set bits): 0 1 2 3 4 5 11 12
Cache sizes: 131072 524288 52428800
Arm, Linux
----------
Vendor: Unknown
Model: Unknown
Features (set bits): 32
Cache sizes: 65536 1048576 Unknown
Arm, MacOS
----------
Vendor: Unknown
Model: Unknown
Features (set bits): 32
Cache sizes: 65536 4194304 Unknown
Closes #13112 from cyb70289/cpuinfo-refine
Authored-by: Yibo Cai <[email protected]>
Signed-off-by: Yibo Cai <[email protected]>
---
cpp/src/arrow/compute/exec.cc | 2 +-
cpp/src/arrow/compute/exec.h | 2 +-
cpp/src/arrow/io/memory_benchmark.cc | 8 +-
cpp/src/arrow/io/transform.h | 2 +-
cpp/src/arrow/util/benchmark_util.h | 8 +-
cpp/src/arrow/util/cpu_info.cc | 727 ++++++++++++++++++-----------------
cpp/src/arrow/util/cpu_info.h | 119 +++---
cpp/src/arrow/util/io_util_test.cc | 27 ++
8 files changed, 459 insertions(+), 436 deletions(-)
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index f8a522a273..186a3cdf3c 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -1046,7 +1046,7 @@ ExecContext::ExecContext(MemoryPool* pool,
::arrow::internal::Executor* executor
this->func_registry_ = func_registry == nullptr ? GetFunctionRegistry() :
func_registry;
}
-CpuInfo* ExecContext::cpu_info() const { return CpuInfo::GetInstance(); }
+const CpuInfo* ExecContext::cpu_info() const { return CpuInfo::GetInstance(); }
// ----------------------------------------------------------------------
// SelectionVector
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index faebddb733..742c379441 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -68,7 +68,7 @@ class ARROW_EXPORT ExecContext {
/// default_memory_pool().
MemoryPool* memory_pool() const { return pool_; }
- ::arrow::internal::CpuInfo* cpu_info() const;
+ const ::arrow::internal::CpuInfo* cpu_info() const;
/// \brief An Executor which may be used to parallelize execution.
::arrow::internal::Executor* executor() const { return executor_; }
diff --git a/cpp/src/arrow/io/memory_benchmark.cc
b/cpp/src/arrow/io/memory_benchmark.cc
index 6af1807d16..1b584d17e0 100644
--- a/cpp/src/arrow/io/memory_benchmark.cc
+++ b/cpp/src/arrow/io/memory_benchmark.cc
@@ -28,12 +28,12 @@
namespace arrow {
using internal::CpuInfo;
-static CpuInfo* cpu_info = CpuInfo::GetInstance();
+static const CpuInfo* cpu_info = CpuInfo::GetInstance();
static const int kNumCores = cpu_info->num_cores();
-static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::L1_CACHE);
-static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::L2_CACHE);
-static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::L3_CACHE);
+static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L1);
+static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L2);
+static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L3);
constexpr size_t kMemoryPerCore = 32 * 1024 * 1024;
using BufferPtr = std::shared_ptr<Buffer>;
diff --git a/cpp/src/arrow/io/transform.h b/cpp/src/arrow/io/transform.h
index c117f27592..7afe29b101 100644
--- a/cpp/src/arrow/io/transform.h
+++ b/cpp/src/arrow/io/transform.h
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-// Slow stream implementations, mainly for testing and benchmarking
+// Transform stream implementations
#pragma once
diff --git a/cpp/src/arrow/util/benchmark_util.h
b/cpp/src/arrow/util/benchmark_util.h
index 8379948bcb..79484989ac 100644
--- a/cpp/src/arrow/util/benchmark_util.h
+++ b/cpp/src/arrow/util/benchmark_util.h
@@ -27,11 +27,11 @@ namespace arrow {
using internal::CpuInfo;
-static CpuInfo* cpu_info = CpuInfo::GetInstance();
+static const CpuInfo* cpu_info = CpuInfo::GetInstance();
-static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::L1_CACHE);
-static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::L2_CACHE);
-static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::L3_CACHE);
+static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L1);
+static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L2);
+static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L3);
static const int64_t kCantFitInL3Size = kL3Size * 4;
static const std::vector<int64_t> kMemorySizes = {kL1Size, kL2Size, kL3Size,
kCantFitInL3Size};
diff --git a/cpp/src/arrow/util/cpu_info.cc b/cpp/src/arrow/util/cpu_info.cc
index 18a1ae0a51..3ba8db216e 100644
--- a/cpp/src/arrow/util/cpu_info.cc
+++ b/cpp/src/arrow/util/cpu_info.cc
@@ -23,32 +23,26 @@
#include <sys/sysctl.h>
#endif
-#include <stdlib.h>
-#include <string.h>
-
#ifndef _MSC_VER
#include <unistd.h>
#endif
#ifdef _WIN32
-#if defined(_M_AMD64) || defined(_M_X64)
-#include <immintrin.h>
-#endif
#include <intrin.h>
-#include <array>
-#include <bitset>
#include "arrow/util/windows_compatibility.h"
#endif
#include <algorithm>
+#include <array>
+#include <bitset>
#include <cctype>
#include <cerrno>
#include <cstdint>
#include <fstream>
#include <memory>
-#include <mutex>
#include <string>
+#include <thread>
#include "arrow/result.h"
#include "arrow/util/io_util.h"
@@ -56,140 +50,31 @@
#include "arrow/util/optional.h"
#include "arrow/util/string.h"
+#undef CPUINFO_ARCH_X86
+#undef CPUINFO_ARCH_ARM
+#undef CPUINFO_ARCH_PPC
+
+#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) ||
defined(_M_X64)
+#define CPUINFO_ARCH_X86
+#elif defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
+#define CPUINFO_ARCH_ARM
+#elif defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || \
+ defined(__powerpc64__)
+#define CPUINFO_ARCH_PPC
+#endif
+
namespace arrow {
namespace internal {
namespace {
-using std::max;
-
-constexpr int64_t kDefaultL1CacheSize = 32 * 1024; // Level 1: 32k
-constexpr int64_t kDefaultL2CacheSize = 256 * 1024; // Level 2: 256k
-constexpr int64_t kDefaultL3CacheSize = 3072 * 1024; // Level 3: 3M
-
-#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
-void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
- __asm__ __volatile__("cpuid"
- : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
- "=d"(CPUInfo[3])
- : "a"(function_id), "c"(subfunction_id));
-}
-
-int64_t _xgetbv(int xcr) {
- int out = 0;
- __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
- return out;
-}
-#endif
-
-#ifdef __APPLE__
-util::optional<int64_t> IntegerSysCtlByName(const char* name) {
- size_t len = sizeof(int64_t);
- int64_t data = 0;
- if (sysctlbyname(name, &data, &len, nullptr, 0) == 0) {
- return data;
- }
- // ENOENT is the official errno value for non-existing sysctl's,
- // but EINVAL and ENOTSUP have been seen in the wild.
- if (errno != ENOENT && errno != EINVAL && errno != ENOTSUP) {
- auto st = IOErrorFromErrno(errno, "sysctlbyname failed for '", name, "'");
- ARROW_LOG(WARNING) << st.ToString();
- }
- return util::nullopt;
-}
-#endif
-
-#if defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
-// There is no direct instruction to get cache size on Arm64 like '__cpuid' on
x86;
-// Get Arm64 cache size by reading
'/sys/devices/system/cpu/cpu0/cache/index*/size';
-// index* :
-// index0: L1 Dcache
-// index1: L1 Icache
-// index2: L2 cache
-// index3: L3 cache
-const char* kL1CacheSizeFile =
"/sys/devices/system/cpu/cpu0/cache/index0/size";
-const char* kL2CacheSizeFile =
"/sys/devices/system/cpu/cpu0/cache/index2/size";
-const char* kL3CacheSizeFile =
"/sys/devices/system/cpu/cpu0/cache/index3/size";
-
-int64_t GetArm64CacheSize(const char* filename, int64_t default_size = -1) {
- char* content = nullptr;
- char* last_char = nullptr;
- size_t file_len = 0;
-
- // Read cache file to 'content' for getting cache size.
- FILE* cache_file = fopen(filename, "r");
- if (cache_file == nullptr) {
- return default_size;
- }
- int res = getline(&content, &file_len, cache_file);
- fclose(cache_file);
- if (res == -1) {
- return default_size;
- }
- std::unique_ptr<char, decltype(&free)> content_guard(content, &free);
-
- errno = 0;
- const auto cardinal_num = strtoull(content, &last_char, 0);
- if (errno != 0) {
- return default_size;
- }
- // kB, MB, or GB
- int64_t multip = 1;
- switch (*last_char) {
- case 'g':
- case 'G':
- multip *= 1024;
- case 'm':
- case 'M':
- multip *= 1024;
- case 'k':
- case 'K':
- multip *= 1024;
- }
- return cardinal_num * multip;
-}
-#endif
+constexpr int kCacheLevels = static_cast<int>(CpuInfo::CacheLevel::Last) + 1;
-#if !defined(_WIN32) && !defined(__APPLE__)
-struct {
- std::string name;
- int64_t flag;
-} flag_mappings[] = {
-#if (defined(__i386) || defined(_M_IX86) || defined(__x86_64__) ||
defined(_M_X64))
- {"ssse3", CpuInfo::SSSE3}, {"sse4_1", CpuInfo::SSE4_1},
- {"sse4_2", CpuInfo::SSE4_2}, {"popcnt", CpuInfo::POPCNT},
- {"avx", CpuInfo::AVX}, {"avx2", CpuInfo::AVX2},
- {"avx512f", CpuInfo::AVX512F}, {"avx512cd", CpuInfo::AVX512CD},
- {"avx512vl", CpuInfo::AVX512VL}, {"avx512dq", CpuInfo::AVX512DQ},
- {"avx512bw", CpuInfo::AVX512BW}, {"bmi1", CpuInfo::BMI1},
- {"bmi2", CpuInfo::BMI2},
-#endif
-#if defined(__aarch64__)
- {"asimd", CpuInfo::ASIMD},
-#endif
-};
-const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
-
-// Helper function to parse for hardware flags.
-// values contains a list of space-separated flags. check to see if the flags
we
-// care about are present.
-// Returns a bitmap of flags.
-int64_t ParseCPUFlags(const std::string& values) {
- int64_t flags = 0;
- for (int i = 0; i < num_flags; ++i) {
- if (values.find(flag_mappings[i].name) != std::string::npos) {
- flags |= flag_mappings[i].flag;
- }
- }
- return flags;
-}
-#endif
+//============================== OS Dependent ==============================//
-#ifdef _WIN32
-bool RetrieveCacheSize(int64_t* cache_sizes) {
- if (!cache_sizes) {
- return false;
- }
+#if defined(_WIN32)
+//------------------------------ WINDOWS ------------------------------//
+void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
DWORD buffer_size = 0;
@@ -200,44 +85,62 @@ bool RetrieveCacheSize(int64_t* cache_sizes) {
GetModuleHandle("kernel32"), "GetLogicalProcessorInformation");
if (!func_pointer) {
- return false;
+ ARROW_LOG(WARNING) << "Failed to find procedure
GetLogicalProcessorInformation";
+ return;
}
// Get buffer size
- if (func_pointer(buffer, &buffer_size) && GetLastError() !=
ERROR_INSUFFICIENT_BUFFER)
- return false;
+ if (func_pointer(buffer, &buffer_size) && GetLastError() !=
ERROR_INSUFFICIENT_BUFFER) {
+ ARROW_LOG(WARNING) << "Failed to get size of processor information buffer";
+ return;
+ }
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(buffer_size);
+ if (!buffer) {
+ return;
+ }
- if (!buffer || !func_pointer(buffer, &buffer_size)) {
- return false;
+ if (!func_pointer(buffer, &buffer_size)) {
+ ARROW_LOG(WARNING) << "Failed to get processor information";
+ free(buffer);
+ return;
}
buffer_position = buffer;
while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= buffer_size)
{
if (RelationCache == buffer_position->Relationship) {
PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
- if (cache->Level >= 1 && cache->Level <= 3) {
- cache_sizes[cache->Level - 1] += cache->Size;
+ if (cache->Level >= 1 && cache->Level <= kCacheLevels) {
+ (*cache_sizes)[cache->Level - 1] += cache->Size;
}
}
offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
buffer_position++;
}
- if (buffer) {
- free(buffer);
- }
- return true;
+ free(buffer);
}
-#ifndef _M_ARM64
-// Source: https://en.wikipedia.org/wiki/CPUID
-bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name,
- CpuInfo::Vendor* vendor) {
- if (!hardware_flags || !model_name || !vendor) {
- return false;
- }
+#if defined(CPUINFO_ARCH_X86)
+// On x86, get CPU features by cpuid, https://en.wikipedia.org/wiki/CPUID
+
+#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
+void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
+ __asm__ __volatile__("cpuid"
+ : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
+ "=d"(CPUInfo[3])
+ : "a"(function_id), "c"(subfunction_id));
+}
+
+int64_t _xgetbv(int xcr) {
+ int out = 0;
+ __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
+ return out;
+}
+#endif // MINGW
+
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+ std::string* model_name) {
int register_EAX_id = 1;
int highest_valid_id = 0;
int highest_extended_valid_id = 0;
@@ -249,15 +152,17 @@ bool RetrieveCPUInfo(int64_t* hardware_flags,
std::string* model_name,
highest_valid_id = cpu_info[0];
// HEX of "GenuineIntel": 47656E75 696E6549 6E74656C
// HEX of "AuthenticAMD": 41757468 656E7469 63414D44
- if (cpu_info[1] == 0x756e6547 && cpu_info[2] == 0x49656e69 &&
- cpu_info[3] == 0x6c65746e) {
+ if (cpu_info[1] == 0x756e6547 && cpu_info[3] == 0x49656e69 &&
+ cpu_info[2] == 0x6c65746e) {
*vendor = CpuInfo::Vendor::Intel;
- } else if (cpu_info[1] == 0x68747541 && cpu_info[2] == 0x69746e65 &&
- cpu_info[3] == 0x444d4163) {
+ } else if (cpu_info[1] == 0x68747541 && cpu_info[3] == 0x69746e65 &&
+ cpu_info[2] == 0x444d4163) {
*vendor = CpuInfo::Vendor::AMD;
}
- if (highest_valid_id <= register_EAX_id) return false;
+ if (highest_valid_id <= register_EAX_id) {
+ return;
+ }
// EAX=1: Processor Info and Feature Bits
__cpuidex(cpu_info.data(), register_EAX_id, 0);
@@ -308,262 +213,382 @@ bool RetrieveCPUInfo(int64_t* hardware_flags,
std::string* model_name,
if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
}
}
-
- return true;
+}
+#elif defined(CPUINFO_ARCH_ARM)
+// Windows on Arm
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+ std::string* model_name) {
+ *hardware_flags |= CpuInfo::ASIMD;
+ // TODO: vendor, model_name
}
#endif
-#endif
-
-} // namespace
-CpuInfo::CpuInfo()
- : hardware_flags_(0),
- num_cores_(1),
- model_name_("unknown"),
- vendor_(Vendor::Unknown) {}
-
-std::unique_ptr<CpuInfo> g_cpu_info;
-static std::once_flag cpuinfo_initialized;
-
-CpuInfo* CpuInfo::GetInstance() {
- std::call_once(cpuinfo_initialized, []() {
- g_cpu_info.reset(new CpuInfo);
- g_cpu_info->Init();
- });
- return g_cpu_info.get();
+#elif defined(__APPLE__)
+//------------------------------ MACOS ------------------------------//
+util::optional<int64_t> IntegerSysCtlByName(const char* name) {
+ size_t len = sizeof(int64_t);
+ int64_t data = 0;
+ if (sysctlbyname(name, &data, &len, nullptr, 0) == 0) {
+ return data;
+ }
+ // ENOENT is the official errno value for non-existing sysctl's,
+ // but EINVAL and ENOTSUP have been seen in the wild.
+ if (errno != ENOENT && errno != EINVAL && errno != ENOTSUP) {
+ auto st = IOErrorFromErrno(errno, "sysctlbyname failed for '", name, "'");
+ ARROW_LOG(WARNING) << st.ToString();
+ }
+ return util::nullopt;
}
-void CpuInfo::Init() {
- std::string line;
- std::string name;
- std::string value;
-
- float max_mhz = 0;
- int num_cores = 0;
-
- memset(&cache_sizes_, 0, sizeof(cache_sizes_));
-
-#ifdef _WIN32
- SYSTEM_INFO system_info;
- GetSystemInfo(&system_info);
- num_cores = system_info.dwNumberOfProcessors;
-
- LARGE_INTEGER performance_frequency;
- if (QueryPerformanceFrequency(&performance_frequency)) {
- max_mhz = static_cast<float>(performance_frequency.QuadPart);
+void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+ static_assert(kCacheLevels >= 3, "");
+ auto c = IntegerSysCtlByName("hw.l1dcachesize");
+ if (c.has_value()) {
+ (*cache_sizes)[0] = *c;
}
-#elif defined(__APPLE__)
- // On macOS, get CPU information from system information base
+ c = IntegerSysCtlByName("hw.l2cachesize");
+ if (c.has_value()) {
+ (*cache_sizes)[1] = *c;
+ }
+ c = IntegerSysCtlByName("hw.l3cachesize");
+ if (c.has_value()) {
+ (*cache_sizes)[2] = *c;
+ }
+}
+
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+ std::string* model_name) {
+ // hardware_flags
struct SysCtlCpuFeature {
const char* name;
int64_t flag;
};
std::vector<SysCtlCpuFeature> features = {
-#if defined(__aarch64__)
+#if defined(CPUINFO_ARCH_X86)
+ {"hw.optional.sse4_2",
+ CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
+ {"hw.optional.avx1_0", CpuInfo::AVX},
+ {"hw.optional.avx2_0", CpuInfo::AVX2},
+ {"hw.optional.bmi1", CpuInfo::BMI1},
+ {"hw.optional.bmi2", CpuInfo::BMI2},
+ {"hw.optional.avx512f", CpuInfo::AVX512F},
+ {"hw.optional.avx512cd", CpuInfo::AVX512CD},
+ {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
+ {"hw.optional.avx512bw", CpuInfo::AVX512BW},
+ {"hw.optional.avx512vl", CpuInfo::AVX512VL},
+#elif defined(CPUINFO_ARCH_ARM)
// ARM64 (note that this is exposed under Rosetta as well)
- {"hw.optional.neon", ASIMD},
-#else
- // x86
- {"hw.optional.sse4_2", SSSE3 | SSE4_1 | SSE4_2 | POPCNT},
- {"hw.optional.avx1_0", AVX},
- {"hw.optional.avx2_0", AVX2},
- {"hw.optional.bmi1", BMI1},
- {"hw.optional.bmi2", BMI2},
- {"hw.optional.avx512f", AVX512F},
- {"hw.optional.avx512cd", AVX512CD},
- {"hw.optional.avx512dq", AVX512DQ},
- {"hw.optional.avx512bw", AVX512BW},
- {"hw.optional.avx512vl", AVX512VL},
+ {"hw.optional.neon", CpuInfo::ASIMD},
#endif
};
for (const auto& feature : features) {
auto v = IntegerSysCtlByName(feature.name);
if (v.value_or(0)) {
- hardware_flags_ |= feature.flag;
+ *hardware_flags |= feature.flag;
}
}
+
+ // TODO: vendor, model_name
+}
+
#else
- // Read from /proc/cpuinfo
+//------------------------------ LINUX ------------------------------//
+// Get cache size, return 0 on error
+int64_t LinuxGetCacheSize(int level) {
+ const struct {
+ int sysconf_name;
+ const char* sysfs_path;
+ } kCacheSizeEntries[] = {
+ {
+ _SC_LEVEL1_DCACHE_SIZE,
+ "/sys/devices/system/cpu/cpu0/cache/index0/size", // l1d (index1 is
l1i)
+ },
+ {
+ _SC_LEVEL2_CACHE_SIZE,
+ "/sys/devices/system/cpu/cpu0/cache/index2/size", // l2
+ },
+ {
+ _SC_LEVEL3_CACHE_SIZE,
+ "/sys/devices/system/cpu/cpu0/cache/index3/size", // l3
+ },
+ };
+ static_assert(sizeof(kCacheSizeEntries) / sizeof(kCacheSizeEntries[0]) ==
kCacheLevels,
+ "");
+
+ // get cache size by sysconf()
+ errno = 0;
+ const int64_t cache_size = sysconf(kCacheSizeEntries[level].sysconf_name);
+ if (errno == 0 && cache_size > 0) {
+ return cache_size;
+ }
+
+ // get cache size from sysfs if sysconf() fails (it does happen on Arm)
+ std::ifstream cacheinfo(kCacheSizeEntries[level].sysfs_path, std::ios::in);
+ if (!cacheinfo) {
+ return 0;
+ }
+ // cacheinfo is one line like: 65536, 64K, 1M, etc.
+ uint64_t size = 0;
+ char unit = '\0';
+ cacheinfo >> size >> unit;
+ if (unit == 'K') {
+ size <<= 10;
+ } else if (unit == 'M') {
+ size <<= 20;
+ } else if (unit == 'G') {
+ size <<= 30;
+ } else if (unit != '\0') {
+ return 0;
+ }
+ return static_cast<int64_t>(size);
+}
+
+// Helper function to parse for hardware flags from /proc/cpuinfo
+// values contains a list of space-separated flags. check to see if the flags
we
+// care about are present.
+// Returns a bitmap of flags.
+int64_t LinuxParseCpuFlags(const std::string& values) {
+ const struct {
+ std::string name;
+ int64_t flag;
+ } flag_mappings[] = {
+#if defined(CPUINFO_ARCH_X86)
+ {"ssse3", CpuInfo::SSSE3},
+ {"sse4_1", CpuInfo::SSE4_1},
+ {"sse4_2", CpuInfo::SSE4_2},
+ {"popcnt", CpuInfo::POPCNT},
+ {"avx", CpuInfo::AVX},
+ {"avx2", CpuInfo::AVX2},
+ {"avx512f", CpuInfo::AVX512F},
+ {"avx512cd", CpuInfo::AVX512CD},
+ {"avx512vl", CpuInfo::AVX512VL},
+ {"avx512dq", CpuInfo::AVX512DQ},
+ {"avx512bw", CpuInfo::AVX512BW},
+ {"bmi1", CpuInfo::BMI1},
+ {"bmi2", CpuInfo::BMI2},
+#elif defined(CPUINFO_ARCH_ARM)
+ {"asimd", CpuInfo::ASIMD},
+#endif
+ };
+ const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
+
+ int64_t flags = 0;
+ for (int i = 0; i < num_flags; ++i) {
+ if (values.find(flag_mappings[i].name) != std::string::npos) {
+ flags |= flag_mappings[i].flag;
+ }
+ }
+ return flags;
+}
+
+void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+ for (int i = 0; i < kCacheLevels; ++i) {
+ const int64_t cache_size = LinuxGetCacheSize(i);
+ if (cache_size > 0) {
+ (*cache_sizes)[i] = cache_size;
+ }
+ }
+}
+
+// Read from /proc/cpuinfo
+// TODO: vendor, model_name for Arm
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+ std::string* model_name) {
std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
while (cpuinfo) {
+ std::string line;
std::getline(cpuinfo, line);
- size_t colon = line.find(':');
+ const size_t colon = line.find(':');
if (colon != std::string::npos) {
- name = TrimString(line.substr(0, colon - 1));
- value = TrimString(line.substr(colon + 1, std::string::npos));
+ const std::string name = TrimString(line.substr(0, colon - 1));
+ const std::string value = TrimString(line.substr(colon + 1,
std::string::npos));
if (name.compare("flags") == 0 || name.compare("Features") == 0) {
- hardware_flags_ |= ParseCPUFlags(value);
- } else if (name.compare("cpu MHz") == 0) {
- // Every core will report a different speed. We'll take the max,
assuming
- // that when impala is running, the core will not be in a lower power
state.
- // TODO: is there a more robust way to do this, such as
- // Window's QueryPerformanceFrequency()
- float mhz = static_cast<float>(atof(value.c_str()));
- max_mhz = max(mhz, max_mhz);
- } else if (name.compare("processor") == 0) {
- ++num_cores;
+ *hardware_flags |= LinuxParseCpuFlags(value);
} else if (name.compare("model name") == 0) {
- model_name_ = value;
+ *model_name = value;
} else if (name.compare("vendor_id") == 0) {
if (value.compare("GenuineIntel") == 0) {
- vendor_ = Vendor::Intel;
+ *vendor = CpuInfo::Vendor::Intel;
} else if (value.compare("AuthenticAMD") == 0) {
- vendor_ = Vendor::AMD;
+ *vendor = CpuInfo::Vendor::AMD;
}
}
}
}
- if (cpuinfo.is_open()) cpuinfo.close();
-#endif
+}
+#endif // WINDOWS, MACOS, LINUX
+
+//============================== Arch Dependent
==============================//
+
+#if defined(CPUINFO_ARCH_X86)
+//------------------------------ X86_64 ------------------------------//
+bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
+ enum {
+ USER_SIMD_NONE,
+ USER_SIMD_SSE4_2,
+ USER_SIMD_AVX,
+ USER_SIMD_AVX2,
+ USER_SIMD_AVX512,
+ USER_SIMD_MAX,
+ };
-#ifdef __APPLE__
- // On macOS, get cache size from system information base
- SetDefaultCacheSize();
- auto c = IntegerSysCtlByName("hw.l1dcachesize");
- if (c.has_value()) {
- cache_sizes_[0] = *c;
+ int level = USER_SIMD_MAX;
+ // Parse the level
+ if (simd_level == "AVX512") {
+ level = USER_SIMD_AVX512;
+ } else if (simd_level == "AVX2") {
+ level = USER_SIMD_AVX2;
+ } else if (simd_level == "AVX") {
+ level = USER_SIMD_AVX;
+ } else if (simd_level == "SSE4_2") {
+ level = USER_SIMD_SSE4_2;
+ } else if (simd_level == "NONE") {
+ level = USER_SIMD_NONE;
+ } else {
+ return false;
}
- c = IntegerSysCtlByName("hw.l2cachesize");
- if (c.has_value()) {
- cache_sizes_[1] = *c;
+
+ // Disable feature as the level
+ if (level < USER_SIMD_AVX512) {
+ *hardware_flags &= ~CpuInfo::AVX512;
}
- c = IntegerSysCtlByName("hw.l3cachesize");
- if (c.has_value()) {
- cache_sizes_[2] = *c;
+ if (level < USER_SIMD_AVX2) {
+ *hardware_flags &= ~(CpuInfo::AVX2 | CpuInfo::BMI2);
}
-#elif _WIN32
- if (!RetrieveCacheSize(cache_sizes_)) {
- SetDefaultCacheSize();
+ if (level < USER_SIMD_AVX) {
+ *hardware_flags &= ~CpuInfo::AVX;
}
-#ifndef _M_ARM64
- RetrieveCPUInfo(&hardware_flags_, &model_name_, &vendor_);
-#endif
-#else
- SetDefaultCacheSize();
-#endif
-
- if (max_mhz != 0) {
- cycles_per_ms_ = static_cast<int64_t>(max_mhz);
-#ifndef _WIN32
- cycles_per_ms_ *= 1000;
-#endif
- } else {
- cycles_per_ms_ = 1000000;
+ if (level < USER_SIMD_SSE4_2) {
+ *hardware_flags &= ~(CpuInfo::SSE4_2 | CpuInfo::BMI1);
}
- original_hardware_flags_ = hardware_flags_;
+ return true;
+}
- if (num_cores > 0) {
- num_cores_ = num_cores;
- } else {
- num_cores_ = 1;
+void ArchVerifyCpuRequirements(const CpuInfo* ci) {
+#if defined(ARROW_HAVE_SSE4_2)
+ if (!ci->IsDetected(CpuInfo::SSE4_2)) {
+ DCHECK(false) << "CPU does not support the Supplemental SSE4_2 instruction
set";
}
-
- // Parse the user simd level
- ParseUserSimdLevel();
+#endif
}
-void CpuInfo::VerifyCpuRequirements() {
-#ifdef ARROW_HAVE_SSE4_2
- if (!IsSupported(CpuInfo::SSSE3)) {
- DCHECK(false) << "CPU does not support the Supplemental SSE3 instruction
set";
+#elif defined(CPUINFO_ARCH_ARM)
+//------------------------------ AARCH64 ------------------------------//
+bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
+ if (simd_level == "NONE") {
+ *hardware_flags &= ~CpuInfo::ASIMD;
+ return true;
}
-#endif
-#if defined(ARROW_HAVE_NEON)
- if (!IsSupported(CpuInfo::ASIMD)) {
+ return false;
+}
+
+void ArchVerifyCpuRequirements(const CpuInfo* ci) {
+ if (!ci->IsDetected(CpuInfo::ASIMD)) {
DCHECK(false) << "CPU does not support the Armv8 Neon instruction set";
}
-#endif
}
-bool CpuInfo::CanUseSSE4_2() const {
-#if defined(ARROW_HAVE_SSE4_2)
- return IsSupported(CpuInfo::SSE4_2);
#else
- return false;
-#endif
+//------------------------------ PPC, ... ------------------------------//
+bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
+ return true;
}
-void CpuInfo::EnableFeature(int64_t flag, bool enable) {
- if (!enable) {
- hardware_flags_ &= ~flag;
- } else {
- // Can't turn something on that can't be supported
- DCHECK_NE(original_hardware_flags_ & flag, 0);
- hardware_flags_ |= flag;
+void ArchVerifyCpuRequirements(const CpuInfo* ci) {}
+
+#endif // X86, ARM, PPC
+
+} // namespace
+
+struct CpuInfo::Impl {
+ int64_t hardware_flags = 0;
+ int num_cores = 0;
+ int64_t original_hardware_flags = 0;
+ Vendor vendor = Vendor::Unknown;
+ std::string model_name = "Unknown";
+ std::array<int64_t, kCacheLevels> cache_sizes{};
+
+ Impl() {
+ OsRetrieveCacheSize(&cache_sizes);
+ OsRetrieveCpuInfo(&hardware_flags, &vendor, &model_name);
+ original_hardware_flags = hardware_flags;
+ num_cores =
std::max(static_cast<int>(std::thread::hardware_concurrency()), 1);
+
+ // parse user simd level
+ auto maybe_env_var = GetEnvVar("ARROW_USER_SIMD_LEVEL");
+ if (!maybe_env_var.ok()) {
+ return;
+ }
+ std::string s = *std::move(maybe_env_var);
+ std::transform(s.begin(), s.end(), s.begin(),
+ [](unsigned char c) { return std::toupper(c); });
+ if (!ArchParseUserSimdLevel(s, &hardware_flags)) {
+ ARROW_LOG(WARNING) << "Invalid value for ARROW_USER_SIMD_LEVEL: " << s;
+ }
+ }
+
+ void EnableFeature(int64_t flag, bool enable) {
+ if (!enable) {
+ hardware_flags &= ~flag;
+ } else {
+ // Can't turn something on that can't be supported
+ DCHECK_EQ((~original_hardware_flags) & flag, 0);
+ hardware_flags |= (flag & original_hardware_flags);
+ }
}
+};
+
+CpuInfo::~CpuInfo() = default;
+
+CpuInfo::CpuInfo() : impl_(new Impl) {}
+
+const CpuInfo* CpuInfo::GetInstance() {
+ static CpuInfo cpu_info;
+ return &cpu_info;
}
-int64_t CpuInfo::hardware_flags() { return hardware_flags_; }
+int64_t CpuInfo::hardware_flags() const { return impl_->hardware_flags; }
-int64_t CpuInfo::CacheSize(CacheLevel level) { return cache_sizes_[level]; }
+int CpuInfo::num_cores() const { return impl_->num_cores <= 0 ? 1 :
impl_->num_cores; }
-int64_t CpuInfo::cycles_per_ms() { return cycles_per_ms_; }
+CpuInfo::Vendor CpuInfo::vendor() const { return impl_->vendor; }
-int CpuInfo::num_cores() { return num_cores_; }
+const std::string& CpuInfo::model_name() const { return impl_->model_name; }
-std::string CpuInfo::model_name() { return model_name_; }
+int64_t CpuInfo::CacheSize(CacheLevel level) const {
+ constexpr int64_t kDefaultCacheSizes[] = {
+ 32 * 1024, // Level 1: 32K
+ 256 * 1024, // Level 2: 256K
+ 3072 * 1024, // Level 3: 3M
+ };
+ static_assert(
+ sizeof(kDefaultCacheSizes) / sizeof(kDefaultCacheSizes[0]) ==
kCacheLevels, "");
+
+ static_assert(static_cast<int>(CacheLevel::L1) == 0, "");
+ const int i = static_cast<int>(level);
+ if (impl_->cache_sizes[i] > 0) return impl_->cache_sizes[i];
+ if (i == 0) return kDefaultCacheSizes[0];
+ // l3 may be not available, return maximum of l2 or default size
+ return std::max(kDefaultCacheSizes[i], impl_->cache_sizes[i - 1]);
+}
-void CpuInfo::SetDefaultCacheSize() {
-#if defined(_SC_LEVEL1_DCACHE_SIZE) && !defined(__aarch64__)
- // Call sysconf to query for the cache sizes
- cache_sizes_[0] = sysconf(_SC_LEVEL1_DCACHE_SIZE);
- cache_sizes_[1] = sysconf(_SC_LEVEL2_CACHE_SIZE);
- cache_sizes_[2] = sysconf(_SC_LEVEL3_CACHE_SIZE);
- ARROW_UNUSED(kDefaultL1CacheSize);
- ARROW_UNUSED(kDefaultL2CacheSize);
- ARROW_UNUSED(kDefaultL3CacheSize);
-#elif defined(__GNUC__) && defined(__linux__) && defined(__aarch64__)
- cache_sizes_[0] = GetArm64CacheSize(kL1CacheSizeFile, kDefaultL1CacheSize);
- cache_sizes_[1] = GetArm64CacheSize(kL2CacheSizeFile, kDefaultL2CacheSize);
- cache_sizes_[2] = GetArm64CacheSize(kL3CacheSizeFile, kDefaultL3CacheSize);
-#else
- // Provide reasonable default values if no info
- cache_sizes_[0] = kDefaultL1CacheSize;
- cache_sizes_[1] = kDefaultL2CacheSize;
- cache_sizes_[2] = kDefaultL3CacheSize;
-#endif
+bool CpuInfo::IsSupported(int64_t flags) const {
+ return (impl_->hardware_flags & flags) == flags;
}
-void CpuInfo::ParseUserSimdLevel() {
- auto maybe_env_var = GetEnvVar("ARROW_USER_SIMD_LEVEL");
- if (!maybe_env_var.ok()) {
- // No user settings
- return;
- }
- std::string s = *std::move(maybe_env_var);
- std::transform(s.begin(), s.end(), s.begin(),
- [](unsigned char c) { return std::toupper(c); });
+bool CpuInfo::IsDetected(int64_t flags) const {
+ return (impl_->original_hardware_flags & flags) == flags;
+}
- int level = USER_SIMD_MAX;
- // Parse the level
- if (s == "AVX512") {
- level = USER_SIMD_AVX512;
- } else if (s == "AVX2") {
- level = USER_SIMD_AVX2;
- } else if (s == "AVX") {
- level = USER_SIMD_AVX;
- } else if (s == "SSE4_2") {
- level = USER_SIMD_SSE4_2;
- } else if (s == "NONE") {
- level = USER_SIMD_NONE;
- } else if (!s.empty()) {
- ARROW_LOG(WARNING) << "Invalid value for ARROW_USER_SIMD_LEVEL: " << s;
- }
+void CpuInfo::VerifyCpuRequirements() const { return
ArchVerifyCpuRequirements(this); }
- // Disable feature as the level
- if (level < USER_SIMD_AVX512) { // Disable all AVX512 features
- EnableFeature(AVX512, false);
- }
- if (level < USER_SIMD_AVX2) { // Disable all AVX2 features
- EnableFeature(AVX2 | BMI2, false);
- }
- if (level < USER_SIMD_AVX) { // Disable all AVX features
- EnableFeature(AVX, false);
- }
- if (level < USER_SIMD_SSE4_2) { // Disable all SSE4_2 features
- EnableFeature(SSE4_2 | BMI1, false);
- }
+void CpuInfo::EnableFeature(int64_t flag, bool enable) {
+ impl_->EnableFeature(flag, enable);
}
} // namespace internal
} // namespace arrow
+
+#undef CPUINFO_ARCH_X86
+#undef CPUINFO_ARCH_ARM
+#undef CPUINFO_ARCH_PPC
diff --git a/cpp/src/arrow/util/cpu_info.h b/cpp/src/arrow/util/cpu_info.h
index 83819c2551..949719b97e 100644
--- a/cpp/src/arrow/util/cpu_info.h
+++ b/cpp/src/arrow/util/cpu_info.h
@@ -21,8 +21,10 @@
#pragma once
#include <cstdint>
+#include <memory>
#include <string>
+#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
namespace arrow {
@@ -34,77 +36,68 @@ namespace internal {
/// /sys/devices)
class ARROW_EXPORT CpuInfo {
public:
- static constexpr int64_t SSSE3 = (1 << 1);
- static constexpr int64_t SSE4_1 = (1 << 2);
- static constexpr int64_t SSE4_2 = (1 << 3);
- static constexpr int64_t POPCNT = (1 << 4);
- static constexpr int64_t ASIMD = (1 << 5);
- static constexpr int64_t AVX = (1 << 6);
- static constexpr int64_t AVX2 = (1 << 7);
- static constexpr int64_t AVX512F = (1 << 8);
- static constexpr int64_t AVX512CD = (1 << 9);
- static constexpr int64_t AVX512VL = (1 << 10);
- static constexpr int64_t AVX512DQ = (1 << 11);
- static constexpr int64_t AVX512BW = (1 << 12);
- static constexpr int64_t BMI1 = (1 << 13);
- static constexpr int64_t BMI2 = (1 << 14);
-
- /// Typical AVX512 subsets consists of
AVX512F,AVX512BW,AVX512VL,AVX512CD,AVX512DQ
+ ~CpuInfo();
+
+ /// x86 features
+ static constexpr int64_t SSSE3 = (1LL << 0);
+ static constexpr int64_t SSE4_1 = (1LL << 1);
+ static constexpr int64_t SSE4_2 = (1LL << 2);
+ static constexpr int64_t POPCNT = (1LL << 3);
+ static constexpr int64_t AVX = (1LL << 4);
+ static constexpr int64_t AVX2 = (1LL << 5);
+ static constexpr int64_t AVX512F = (1LL << 6);
+ static constexpr int64_t AVX512CD = (1LL << 7);
+ static constexpr int64_t AVX512VL = (1LL << 8);
+ static constexpr int64_t AVX512DQ = (1LL << 9);
+ static constexpr int64_t AVX512BW = (1LL << 10);
static constexpr int64_t AVX512 = AVX512F | AVX512CD | AVX512VL | AVX512DQ |
AVX512BW;
+ static constexpr int64_t BMI1 = (1LL << 11);
+ static constexpr int64_t BMI2 = (1LL << 12);
- /// Cache enums for L1 (data), L2 and L3
- enum CacheLevel {
- L1_CACHE = 0,
- L2_CACHE = 1,
- L3_CACHE = 2,
- };
+ /// Arm features
+ static constexpr int64_t ASIMD = (1LL << 32);
- enum class Vendor : int { Unknown = 0, Intel, AMD };
+ /// Cache enums for L1 (data), L2 and L3
+ enum class CacheLevel { L1 = 0, L2, L3, Last = L3 };
- static CpuInfo* GetInstance();
+ /// CPU vendors
+ enum class Vendor { Unknown, Intel, AMD };
- /// Determine if the CPU meets the minimum CPU requirements and if not,
issue an error
- /// and terminate.
- void VerifyCpuRequirements();
+ static const CpuInfo* GetInstance();
/// Returns all the flags for this cpu
- int64_t hardware_flags();
+ int64_t hardware_flags() const;
+
+ /// Returns the number of cores (including hyper-threaded) on this machine.
+ int num_cores() const;
+
+ /// Returns the vendor of the cpu.
+ Vendor vendor() const;
+
+ /// Returns the model name of the cpu (e.g. Intel i7-2600)
+ const std::string& model_name() const;
+
+ /// Returns the size of the cache in KB at this cache level
+ int64_t CacheSize(CacheLevel level) const;
/// \brief Returns whether or not the given feature is enabled.
///
/// IsSupported() is true iff IsDetected() is also true and the feature
/// wasn't disabled by the user (for example by setting the
ARROW_USER_SIMD_LEVEL
/// environment variable).
- bool IsSupported(int64_t flags) const { return (hardware_flags_ & flags) ==
flags; }
+ bool IsSupported(int64_t flags) const;
/// Returns whether or not the given feature is available on the CPU.
- bool IsDetected(int64_t flags) const {
- return (original_hardware_flags_ & flags) == flags;
- }
+ bool IsDetected(int64_t flags) const;
- /// \brief The processor supports SSE4.2 and the Arrow libraries are built
- /// with support for it
- bool CanUseSSE4_2() const;
+ /// Determine if the CPU meets the minimum CPU requirements and if not,
issue an error
+ /// and terminate.
+ void VerifyCpuRequirements() const;
/// Toggle a hardware feature on and off. It is not valid to turn on a
feature
/// that the underlying hardware cannot support. This is useful for testing.
void EnableFeature(int64_t flag, bool enable);
- /// Returns the size of the cache in KB at this cache level
- int64_t CacheSize(CacheLevel level);
-
- /// Returns the number of cpu cycles per millisecond
- int64_t cycles_per_ms();
-
- /// Returns the number of cores (including hyper-threaded) on this machine.
- int num_cores();
-
- /// Returns the model name of the cpu (e.g. Intel i7-2600)
- std::string model_name();
-
- /// Returns the vendor of the cpu.
- Vendor vendor() const { return vendor_; }
-
bool HasEfficientBmi2() const {
// BMI2 (pext, pdep) is only efficient on Intel X86 processors.
return vendor() == Vendor::Intel && IsSupported(BMI2);
@@ -113,30 +106,8 @@ class ARROW_EXPORT CpuInfo {
private:
CpuInfo();
- enum UserSimdLevel {
- USER_SIMD_NONE = 0,
- USER_SIMD_SSE4_2,
- USER_SIMD_AVX,
- USER_SIMD_AVX2,
- USER_SIMD_AVX512,
- USER_SIMD_MAX,
- };
-
- void Init();
-
- /// Inits CPU cache size variables with default values
- void SetDefaultCacheSize();
-
- /// Parse the SIMD level by ARROW_USER_SIMD_LEVEL env
- void ParseUserSimdLevel();
-
- int64_t hardware_flags_;
- int64_t original_hardware_flags_;
- int64_t cache_sizes_[L3_CACHE + 1];
- int64_t cycles_per_ms_;
- int num_cores_;
- std::string model_name_;
- Vendor vendor_;
+ struct Impl;
+ std::unique_ptr<Impl> impl_;
};
} // namespace internal
diff --git a/cpp/src/arrow/util/io_util_test.cc
b/cpp/src/arrow/util/io_util_test.cc
index efc4f2164b..a38699dfd8 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -35,6 +35,7 @@
#include "arrow/buffer.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/util/bit_util.h"
+#include "arrow/util/cpu_info.h"
#include "arrow/util/io_util.h"
#include "arrow/util/logging.h"
#include "arrow/util/windows_compatibility.h"
@@ -731,5 +732,31 @@ TEST(Memory, GetRSS) {
#endif
}
+// Some loose tests to check if the cpuinfo makes sense
+TEST(CpuInfo, Basic) {
+ const CpuInfo* ci = CpuInfo::GetInstance();
+
+ const int ncores = ci->num_cores();
+ ASSERT_TRUE(ncores >= 1 && ncores <= 1000) << "invalid number of cores " <<
ncores;
+
+ const auto l1 = ci->CacheSize(CpuInfo::CacheLevel::L1);
+ const auto l2 = ci->CacheSize(CpuInfo::CacheLevel::L2);
+ const auto l3 = ci->CacheSize(CpuInfo::CacheLevel::L3);
+ ASSERT_TRUE(l1 >= 4 * 1024 && l1 <= 512 * 1024) << "unexpected L1 size: " <<
l1;
+ ASSERT_TRUE(l2 >= 32 * 1024 && l2 <= 8 * 1024 * 1024) << "unexpected L2
size: " << l2;
+ ASSERT_TRUE(l3 >= 256 * 1024 && l3 <= 1024 * 1024 * 1024)
+ << "unexpected L3 size: " << l3;
+ ASSERT_LE(l1, l2) << "L1 cache size " << l1 << " larger than L2 " << l2;
+ ASSERT_LE(l2, l3) << "L2 cache size " << l2 << " larger than L3 " << l3;
+
+ // Toggle hardware flags
+ CpuInfo* ci_rw = const_cast<CpuInfo*>(ci);
+ const int64_t original_hardware_flags = ci->hardware_flags();
+ ci_rw->EnableFeature(original_hardware_flags, false);
+ ASSERT_EQ(ci->hardware_flags(), 0);
+ ci_rw->EnableFeature(original_hardware_flags, true);
+ ASSERT_EQ(ci->hardware_flags(), original_hardware_flags);
+}
+
} // namespace internal
} // namespace arrow