cyb70289 commented on code in PR #13112:
URL: https://github.com/apache/arrow/pull/13112#discussion_r873588423
##########
cpp/src/arrow/util/cpu_info.cc:
##########
@@ -308,262 +218,431 @@ bool RetrieveCPUInfo(int64_t* hardware_flags,
std::string* model_name,
if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
}
}
-
- return true;
+}
+#elif defined(CPUINFO_ARCH_ARM)
+// Windows on Arm
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+ std::string* model_name) {
+ *hardware_flags |= CpuInfo::ASIMD;
+ // TODO: vendor, model_name
}
#endif
-#endif
-
-} // namespace
-CpuInfo::CpuInfo()
- : hardware_flags_(0),
- num_cores_(1),
- model_name_("unknown"),
- vendor_(Vendor::Unknown) {}
-
-std::unique_ptr<CpuInfo> g_cpu_info;
-static std::once_flag cpuinfo_initialized;
-
-CpuInfo* CpuInfo::GetInstance() {
- std::call_once(cpuinfo_initialized, []() {
- g_cpu_info.reset(new CpuInfo);
- g_cpu_info->Init();
- });
- return g_cpu_info.get();
+#elif defined(__APPLE__)
+//------------------------------ MACOS ------------------------------//
+util::optional<int64_t> IntegerSysCtlByName(const char* name) {
+ size_t len = sizeof(int64_t);
+ int64_t data = 0;
+ if (sysctlbyname(name, &data, &len, nullptr, 0) == 0) {
+ return data;
+ }
+ // ENOENT is the official errno value for non-existing sysctl's,
+ // but EINVAL and ENOTSUP have been seen in the wild.
+ if (errno != ENOENT && errno != EINVAL && errno != ENOTSUP) {
+ auto st = IOErrorFromErrno(errno, "sysctlbyname failed for '", name, "'");
+ ARROW_LOG(WARNING) << st.ToString();
+ }
+ return util::nullopt;
}
-void CpuInfo::Init() {
- std::string line;
- std::string name;
- std::string value;
-
- float max_mhz = 0;
- int num_cores = 0;
-
- memset(&cache_sizes_, 0, sizeof(cache_sizes_));
-
-#ifdef _WIN32
- SYSTEM_INFO system_info;
- GetSystemInfo(&system_info);
- num_cores = system_info.dwNumberOfProcessors;
-
- LARGE_INTEGER performance_frequency;
- if (QueryPerformanceFrequency(&performance_frequency)) {
- max_mhz = static_cast<float>(performance_frequency.QuadPart);
+void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+ static_assert(kCacheLevels >= 3, "");
+ auto c = IntegerSysCtlByName("hw.l1dcachesize");
+ if (c.has_value()) {
+ (*cache_sizes)[0] = *c;
}
-#elif defined(__APPLE__)
- // On macOS, get CPU information from system information base
+ c = IntegerSysCtlByName("hw.l2cachesize");
+ if (c.has_value()) {
+ (*cache_sizes)[1] = *c;
+ }
+ c = IntegerSysCtlByName("hw.l3cachesize");
+ if (c.has_value()) {
+ (*cache_sizes)[2] = *c;
+ }
+}
+
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+ std::string* model_name) {
+ // hardware_flags
struct SysCtlCpuFeature {
const char* name;
int64_t flag;
};
std::vector<SysCtlCpuFeature> features = {
-#if defined(__aarch64__)
+#if defined(CPUINFO_ARCH_X86)
+ {"hw.optional.sse4_2",
+ CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
+ {"hw.optional.avx1_0", CpuInfo::AVX},
+ {"hw.optional.avx2_0", CpuInfo::AVX2},
+ {"hw.optional.bmi1", CpuInfo::BMI1},
+ {"hw.optional.bmi2", CpuInfo::BMI2},
+ {"hw.optional.avx512f", CpuInfo::AVX512F},
+ {"hw.optional.avx512cd", CpuInfo::AVX512CD},
+ {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
+ {"hw.optional.avx512bw", CpuInfo::AVX512BW},
+ {"hw.optional.avx512vl", CpuInfo::AVX512VL},
+#elif defined(CPUINFO_ARCH_ARM)
// ARM64 (note that this is exposed under Rosetta as well)
- {"hw.optional.neon", ASIMD},
-#else
- // x86
- {"hw.optional.sse4_2", SSSE3 | SSE4_1 | SSE4_2 | POPCNT},
- {"hw.optional.avx1_0", AVX},
- {"hw.optional.avx2_0", AVX2},
- {"hw.optional.bmi1", BMI1},
- {"hw.optional.bmi2", BMI2},
- {"hw.optional.avx512f", AVX512F},
- {"hw.optional.avx512cd", AVX512CD},
- {"hw.optional.avx512dq", AVX512DQ},
- {"hw.optional.avx512bw", AVX512BW},
- {"hw.optional.avx512vl", AVX512VL},
+ {"hw.optional.neon", CpuInfo::ASIMD},
#endif
};
for (const auto& feature : features) {
auto v = IntegerSysCtlByName(feature.name);
if (v.value_or(0)) {
- hardware_flags_ |= feature.flag;
+ *hardware_flags |= feature.flag;
}
}
+
+ // TODO: vendor, model_name
+}
+
#else
- // Read from /proc/cpuinfo
+//------------------------------ LINUX ------------------------------//
+// Get cache size, return 0 on error
+int64_t LinuxGetCacheSize(int level) {
+ const struct {
+ int sysconf_name;
+ const char* sysfs_path;
+ } kCacheSizeEntries[] = {
+ {
+ _SC_LEVEL1_DCACHE_SIZE,
+ "/sys/devices/system/cpu/cpu0/cache/index0/size", // l1d (index1 is
l1i)
+ },
+ {
+ _SC_LEVEL2_CACHE_SIZE,
+ "/sys/devices/system/cpu/cpu0/cache/index2/size", // l2
+ },
+ {
+ _SC_LEVEL3_CACHE_SIZE,
+ "/sys/devices/system/cpu/cpu0/cache/index3/size", // l3
+ },
+ };
+ static_assert(sizeof(kCacheSizeEntries) / sizeof(kCacheSizeEntries[0]) ==
kCacheLevels,
+ "");
+
+ // get cache size by sysconf()
+ errno = 0;
+ const int64_t cache_size = sysconf(kCacheSizeEntries[level].sysconf_name);
+ if (errno == 0 && cache_size > 0) {
+ return cache_size;
+ }
+
+ // get cache size from sysfs if sysconf() fails (it does happen on Arm)
+ std::ifstream cacheinfo(kCacheSizeEntries[level].sysfs_path, std::ios::in);
+ if (!cacheinfo) {
+ return 0;
+ }
+ std::string line;
+ std::getline(cacheinfo, line);
+ if (line.empty()) {
+ return 0;
+ }
+ // line: 65536, 64K, 1M, etc.
+ char* last_char;
+ errno = 0;
+ auto size = std::strtoull(line.c_str(), &last_char, 0);
+ if (errno != 0) {
+ return 0;
+ }
+ const int unit = std::toupper(static_cast<unsigned char>(*last_char));
+ if (unit == 'K') {
+ size <<= 10;
+ } else if (unit == 'M') {
+ size <<= 20;
+ } else if (unit == 'G') {
+ size <<= 30;
+ } else {
+ return 0;
+ }
+ return static_cast<int64_t>(size);
+}
+
+// Helper function to parse for hardware flags from /proc/cpuinfo
+// values contains a list of space-separated flags. check to see if the flags
we
+// care about are present.
+// Returns a bitmap of flags.
+int64_t LinuxParseCpuFlags(const std::string& values) {
+ const struct {
+ std::string name;
+ int64_t flag;
+ } flag_mappings[] = {
+#if defined(CPUINFO_ARCH_X86)
+ {"ssse3", CpuInfo::SSSE3},
+ {"sse4_1", CpuInfo::SSE4_1},
+ {"sse4_2", CpuInfo::SSE4_2},
+ {"popcnt", CpuInfo::POPCNT},
+ {"avx", CpuInfo::AVX},
+ {"avx2", CpuInfo::AVX2},
+ {"avx512f", CpuInfo::AVX512F},
+ {"avx512cd", CpuInfo::AVX512CD},
+ {"avx512vl", CpuInfo::AVX512VL},
+ {"avx512dq", CpuInfo::AVX512DQ},
+ {"avx512bw", CpuInfo::AVX512BW},
+ {"bmi1", CpuInfo::BMI1},
+ {"bmi2", CpuInfo::BMI2},
+#elif defined(CPUINFO_ARCH_ARM)
+ {"asimd", CpuInfo::ASIMD},
+#endif
+ };
+ const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
+
+ int64_t flags = 0;
+ for (int i = 0; i < num_flags; ++i) {
+ if (values.find(flag_mappings[i].name) != std::string::npos) {
+ flags |= flag_mappings[i].flag;
+ }
+ }
+ return flags;
+}
+
+void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+ for (int i = 0; i < kCacheLevels; ++i) {
+ const int64_t cache_size = LinuxGetCacheSize(i);
+ if (cache_size > 0) {
+ (*cache_sizes)[i] = cache_size;
+ }
+ }
+}
+
+// Read from /proc/cpuinfo
+// TODO: vendor, model_name for Arm
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
+ std::string* model_name) {
std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
while (cpuinfo) {
+ std::string line;
std::getline(cpuinfo, line);
- size_t colon = line.find(':');
+ const size_t colon = line.find(':');
if (colon != std::string::npos) {
- name = TrimString(line.substr(0, colon - 1));
- value = TrimString(line.substr(colon + 1, std::string::npos));
+ const std::string name = TrimString(line.substr(0, colon - 1));
+ const std::string value = TrimString(line.substr(colon + 1,
std::string::npos));
if (name.compare("flags") == 0 || name.compare("Features") == 0) {
- hardware_flags_ |= ParseCPUFlags(value);
- } else if (name.compare("cpu MHz") == 0) {
- // Every core will report a different speed. We'll take the max,
assuming
- // that when impala is running, the core will not be in a lower power
state.
- // TODO: is there a more robust way to do this, such as
- // Window's QueryPerformanceFrequency()
- float mhz = static_cast<float>(atof(value.c_str()));
- max_mhz = max(mhz, max_mhz);
- } else if (name.compare("processor") == 0) {
- ++num_cores;
+ *hardware_flags |= LinuxParseCpuFlags(value);
} else if (name.compare("model name") == 0) {
- model_name_ = value;
+ *model_name = value;
} else if (name.compare("vendor_id") == 0) {
if (value.compare("GenuineIntel") == 0) {
- vendor_ = Vendor::Intel;
+ *vendor = CpuInfo::Vendor::Intel;
} else if (value.compare("AuthenticAMD") == 0) {
- vendor_ = Vendor::AMD;
+ *vendor = CpuInfo::Vendor::AMD;
}
}
}
}
- if (cpuinfo.is_open()) cpuinfo.close();
-#endif
+}
+#endif // WINDOWS, MACOS, LINUX
+
+//============================== Arch Dependent
==============================//
+
+#if defined(CPUINFO_ARCH_X86)
+//------------------------------ X86_64 ------------------------------//
+bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
+ enum {
+ USER_SIMD_NONE,
+ USER_SIMD_SSE4_2,
+ USER_SIMD_AVX,
+ USER_SIMD_AVX2,
+ USER_SIMD_AVX512,
+ USER_SIMD_MAX,
+ };
-#ifdef __APPLE__
- // On macOS, get cache size from system information base
- SetDefaultCacheSize();
- auto c = IntegerSysCtlByName("hw.l1dcachesize");
- if (c.has_value()) {
- cache_sizes_[0] = *c;
+ int level = USER_SIMD_MAX;
+ // Parse the level
+ if (simd_level == "AVX512") {
+ level = USER_SIMD_AVX512;
+ } else if (simd_level == "AVX2") {
+ level = USER_SIMD_AVX2;
+ } else if (simd_level == "AVX") {
+ level = USER_SIMD_AVX;
+ } else if (simd_level == "SSE4_2") {
+ level = USER_SIMD_SSE4_2;
+ } else if (simd_level == "NONE") {
+ level = USER_SIMD_NONE;
+ } else {
+ return false;
}
- c = IntegerSysCtlByName("hw.l2cachesize");
- if (c.has_value()) {
- cache_sizes_[1] = *c;
+
+ // Disable feature as the level
+ if (level < USER_SIMD_AVX512) {
+ *hardware_flags &= ~CpuInfo::AVX512;
}
- c = IntegerSysCtlByName("hw.l3cachesize");
- if (c.has_value()) {
- cache_sizes_[2] = *c;
+ if (level < USER_SIMD_AVX2) {
+ *hardware_flags &= ~(CpuInfo::AVX2 | CpuInfo::BMI2);
}
-#elif _WIN32
- if (!RetrieveCacheSize(cache_sizes_)) {
- SetDefaultCacheSize();
+ if (level < USER_SIMD_AVX) {
+ *hardware_flags &= ~CpuInfo::AVX;
}
-#ifndef _M_ARM64
- RetrieveCPUInfo(&hardware_flags_, &model_name_, &vendor_);
-#endif
-#else
- SetDefaultCacheSize();
-#endif
-
- if (max_mhz != 0) {
- cycles_per_ms_ = static_cast<int64_t>(max_mhz);
-#ifndef _WIN32
- cycles_per_ms_ *= 1000;
-#endif
- } else {
- cycles_per_ms_ = 1000000;
+ if (level < USER_SIMD_SSE4_2) {
+ *hardware_flags &= ~(CpuInfo::SSE4_2 | CpuInfo::BMI1);
}
- original_hardware_flags_ = hardware_flags_;
+ return true;
+}
- if (num_cores > 0) {
- num_cores_ = num_cores;
- } else {
- num_cores_ = 1;
+void ArchVerifyCpuRequirements(const CpuInfo* ci) {
+#if defined(ARROW_HAVE_SSE4_2)
+ if (!ci->IsDetected(CpuInfo::SSE4_2)) {
+ DCHECK(false) << "CPU does not support the Supplemental SSE4_2 instruction
set";
}
-
- // Parse the user simd level
- ParseUserSimdLevel();
+#endif
}
-void CpuInfo::VerifyCpuRequirements() {
-#ifdef ARROW_HAVE_SSE4_2
- if (!IsSupported(CpuInfo::SSSE3)) {
- DCHECK(false) << "CPU does not support the Supplemental SSE3 instruction
set";
+#elif defined(CPUINFO_ARCH_ARM)
+//------------------------------ AARCH64 ------------------------------//
+bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
+ if (simd_level == "NONE") {
+ *hardware_flags &= ~CpuInfo::ASIMD;
+ return true;
}
-#endif
-#if defined(ARROW_HAVE_NEON)
- if (!IsSupported(CpuInfo::ASIMD)) {
+ return false;
+}
+
+void ArchVerifyCpuRequirements(const CpuInfo* ci) {
+ if (!ci->IsDetected(CpuInfo::ASIMD)) {
DCHECK(false) << "CPU does not support the Armv8 Neon instruction set";
}
-#endif
}
-bool CpuInfo::CanUseSSE4_2() const {
-#if defined(ARROW_HAVE_SSE4_2)
- return IsSupported(CpuInfo::SSE4_2);
#else
- return false;
-#endif
+//------------------------------ PPC, ... ------------------------------//
+bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
+ return true;
}
-void CpuInfo::EnableFeature(int64_t flag, bool enable) {
- if (!enable) {
- hardware_flags_ &= ~flag;
- } else {
- // Can't turn something on that can't be supported
- DCHECK_NE(original_hardware_flags_ & flag, 0);
- hardware_flags_ |= flag;
+void ArchVerifyCpuRequirements(const CpuInfo* ci) {}
+
+#endif // X86, ARM, PPC
+
+} // namespace
+
+struct CpuInfo::Impl {
+ int64_t hardware_flags = 0;
+ int num_cores = 0;
+ int64_t original_hardware_flags = 0;
+ Vendor vendor = Vendor::Unknown;
+ std::string model_name = "Unknown";
+ std::array<int64_t, kCacheLevels> cache_sizes{};
+
+ Impl() {
+ OsRetrieveCacheSize(&cache_sizes);
+ OsRetrieveCpuInfo(&hardware_flags, &vendor, &model_name);
+ original_hardware_flags = hardware_flags;
+ num_cores =
std::max(static_cast<int>(std::thread::hardware_concurrency()), 1);
+
+ // parse user simd level
+ auto maybe_env_var = GetEnvVar("ARROW_USER_SIMD_LEVEL");
+ if (!maybe_env_var.ok()) {
+ return;
+ }
+ std::string& s = maybe_env_var.ValueUnsafe();
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]