IMPALA-5077: add NUMA and current cpu to CpuInfo NUMA info is found using the /sys filesystem.
The current CPU can be found using sched_getcpu(), which is supported on all recent Linux kernels (unfortunately CentOS 5 shipped with an older kernel, so we need a fallback). Testing: Confirmed that this built on a range of different Linux distros, including CentOS 5, which is missing support for features like sched_getcpu(). Change-Id: I0525228a56bcf20c45f78ee1ba1d300c74cf4d05 Reviewed-on: http://gerrit.cloudera.org:8080/6402 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0ff1e6e8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0ff1e6e8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0ff1e6e8 Branch: refs/heads/master Commit: 0ff1e6e8d7e51fca6cd330527329008a3eee4e06 Parents: 529a5f9 Author: Tim Armstrong <[email protected]> Authored: Tue Mar 14 15:13:04 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Thu Mar 16 09:35:57 2017 +0000 ---------------------------------------------------------------------- CMakeLists.txt | 2 +- be/CMakeLists.txt | 4 ++ be/src/common/.gitignore | 2 + be/src/common/CMakeLists.txt | 4 ++ be/src/common/config.h.in | 26 ++++++++++ be/src/util/cpu-info.cc | 100 +++++++++++++++++++++++++++++++++++--- be/src/util/cpu-info.h | 40 +++++++++++++-- 7 files changed, 168 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0ff1e6e8/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index cc21604..1aff6b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,7 +118,7 @@ if (CMAKE_DEBUG) set(Boost_DEBUG TRUE) endif() -find_package(Boost REQUIRED COMPONENTS thread regex system filesystem date_time) +find_package(Boost REQUIRED COMPONENTS thread regex filesystem system date_time) include_directories(${Boost_INCLUDE_DIRS}) set(LIBS ${LIBS} ${Boost_LIBRARIES}) message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS}) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0ff1e6e8/be/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 1e094a0..03c3663 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -278,6 +278,10 @@ endif() EXECUTE_PROCESS(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY} ${CMAKE_CURRENT_SOURCE_DIR}/build/latest) +# Determine what functions are available on the current platform. +INCLUDE(CheckFunctionExists) +CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU) + # This is a list of impala library dependencies. Individual libraries # must not specify library dependencies in their own CMakeLists.txt file. # Enclose the impala libraries in -Wl,--start-group and -Wl,--end-group http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0ff1e6e8/be/src/common/.gitignore ---------------------------------------------------------------------- diff --git a/be/src/common/.gitignore b/be/src/common/.gitignore new file mode 100644 index 0000000..d53d6f3 --- /dev/null +++ b/be/src/common/.gitignore @@ -0,0 +1,2 @@ +# Generated files +config.h http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0ff1e6e8/be/src/common/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/common/CMakeLists.txt b/be/src/common/CMakeLists.txt index fb799d9..65edfc0 100644 --- a/be/src/common/CMakeLists.txt +++ b/be/src/common/CMakeLists.txt @@ -48,3 +48,7 @@ add_library(GlobalFlags add_dependencies(GlobalFlags thrift-deps) ADD_BE_TEST(atomic-test) + +# Generate config.h from config.h.in, filling in variables from CMake +CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in + ${CMAKE_CURRENT_BINARY_DIR}/config.h) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0ff1e6e8/be/src/common/config.h.in ---------------------------------------------------------------------- diff --git a/be/src/common/config.h.in b/be/src/common/config.h.in new file mode 100644 index 0000000..fcae626 --- /dev/null +++ b/be/src/common/config.h.in @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#ifndef IMPALA_COMMON_CONFIG_H +#define IMPALA_COMMON_CONFIG_H + +/// This is a template that is populated by CMake with config information + +#cmakedefine HAVE_SCHED_GETCPU + +#endif http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0ff1e6e8/be/src/util/cpu-info.cc ---------------------------------------------------------------------- diff --git a/be/src/util/cpu-info.cc b/be/src/util/cpu-info.cc index 6329ca8..e5d2a83 100644 --- a/be/src/util/cpu-info.cc +++ b/be/src/util/cpu-info.cc @@ -21,22 +21,29 @@ #include <sys/sysctl.h> #endif -#include <boost/algorithm/string.hpp> -#include <iostream> -#include <fstream> -#include <gutil/strings/substitute.h> #include <mmintrin.h> -#include <sstream> +#include <sched.h> #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <algorithm> +#include <fstream> +#include <iostream> +#include <sstream> +#include <boost/algorithm/string.hpp> +#include <boost/filesystem.hpp> +#include <sys/sysinfo.h> +#include "common/config.h" +#include "gutil/strings/substitute.h" #include "util/pretty-printer.h" +#include "util/string-parser.h" #include "common/names.h" using boost::algorithm::contains; using boost::algorithm::trim; +namespace fs = boost::filesystem; using std::max; DECLARE_bool(abort_on_config_error); @@ -66,7 +73,10 @@ int64_t CpuInfo::hardware_flags_ = 0; int64_t CpuInfo::original_hardware_flags_; int64_t CpuInfo::cycles_per_ms_; int CpuInfo::num_cores_ = 1; +int CpuInfo::max_num_cores_; string CpuInfo::model_name_ = "unknown"; +int CpuInfo::max_num_numa_nodes_; +unique_ptr<int[]> CpuInfo::core_to_numa_node_; static struct { string name; @@ -143,12 +153,70 @@ void CpuInfo::Init() { } else { num_cores_ = 1; } - if (FLAGS_num_cores > 0) num_cores_ = FLAGS_num_cores; + max_num_cores_ = get_nprocs_conf(); + // Print a warning if something is wrong with sched_getcpu(). +#ifdef HAVE_SCHED_GETCPU + if (sched_getcpu() == -1) { + LOG(WARNING) << "Kernel does not support getcpu(). Performance may be impacted."; + } +#else + LOG(WARNING) << "Built on a system without sched_getcpu() support. Performance may" + << " be impacted."; +#endif + + InitNuma(); initialized_ = true; } +void CpuInfo::InitNuma() { + // Use the NUMA info in the /sys filesystem. which is part of the Linux ABI: + // see https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node and + // https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu + // The filesystem entries are only present if the kernel was compiled with NUMA support. + core_to_numa_node_.reset(new int[max_num_cores_]); + + if (!fs::is_directory("/sys/devices/system/node")) { + LOG(WARNING) << "/sys/devices/system/node is not present - no NUMA support"; + // Assume a single NUMA node. + max_num_numa_nodes_ = 1; + std::fill_n(core_to_numa_node_.get(), max_num_cores_, 0); + return; + } + + // Search for node subdirectories - node0, node1, node2, etc to determine possible + // NUMA nodes. + fs::directory_iterator dir_it("/sys/devices/system/node"); + max_num_numa_nodes_ = 0; + for (; dir_it != fs::directory_iterator(); ++dir_it) { + const string filename = dir_it->path().filename().string(); + if (filename.find("node") == 0) ++max_num_numa_nodes_; + } + if (max_num_numa_nodes_ == 0) { + LOG(WARNING) << "Could not find nodes in /sys/devices/system/node"; + max_num_numa_nodes_ = 1; + } + + // Check which NUMA node each core belongs to based on the existence of a symlink + // to the node subdirectory. + for (int core = 0; core < max_num_cores_; ++core) { + bool found_numa_node = false; + for (int node = 0; node < max_num_numa_nodes_; ++node) { + if (fs::exists(Substitute("/sys/devices/system/cpu/cpu$0/node$1", core, node))) { + core_to_numa_node_[core] = node; + found_numa_node = true; + break; + } + } + if (!found_numa_node) { + LOG(WARNING) << "Could not determine NUMA node for core " << core + << " from /sys/devices/system/cpu/"; + core_to_numa_node_[core] = 0; + } + } +} + void CpuInfo::VerifyCpuRequirements() { if (!CpuInfo::IsSupported(CpuInfo::SSSE3)) { LOG(ERROR) << "CPU does not support the Supplemental SSE3 (SSSE3) instruction set. " @@ -188,6 +256,19 @@ void CpuInfo::EnableFeature(long flag, bool enable) { } } +int CpuInfo::GetCurrentCore() { + // sched_getcpu() is not supported on some old kernels/glibcs (like the versions that + // shipped with CentOS 5). In that case just pretend we're always running on CPU 0 + // so that we can build and run with degraded perf. +#ifdef HAVE_SCHED_GETCPU + int cpu = sched_getcpu(); + // The syscall may not be supported even if the function exists. + return cpu == -1 ? 0 : cpu; +#else + return 0; +#endif +} + void CpuInfo::GetCacheInfo(long cache_sizes[NUM_CACHE_LEVELS], long cache_line_sizes[NUM_CACHE_LEVELS]) { #ifdef __APPLE__ @@ -237,6 +318,7 @@ string CpuInfo::DebugString() { stream << "Cpu Info:" << endl << " Model: " << model_name_ << endl << " Cores: " << num_cores_ << endl + << " Max Possible Cores: " << max_num_cores_ << endl << " " << L1 << endl << " " << L2 << endl << " " << L3 << endl @@ -246,6 +328,12 @@ string CpuInfo::DebugString() { stream << " " << flag_mappings[i].name << endl; } } + stream << " Numa Nodes: " << max_num_numa_nodes_ << endl; + stream << " Numa Nodes of Cores:"; + for (int core = 0; core < max_num_cores_; ++core) { + stream << " " << core << "->" << core_to_numa_node_[core] << " |"; + } + stream << endl; return stream.str(); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0ff1e6e8/be/src/util/cpu-info.h ---------------------------------------------------------------------- diff --git a/be/src/util/cpu-info.h b/be/src/util/cpu-info.h index 868d2dd..28af3e5 100644 --- a/be/src/util/cpu-info.h +++ b/be/src/util/cpu-info.h @@ -19,6 +19,7 @@ #ifndef IMPALA_UTIL_CPU_INFO_H #define IMPALA_UTIL_CPU_INFO_H +#include <memory> #include <string> #include <boost/cstdint.hpp> @@ -50,8 +51,7 @@ class CpuInfo { /// Initialize CpuInfo. static void Init(); - /// Determine if the CPU meets the minimum CPU requirements and if not, issue an error - /// and terminate. + /// Determine if the CPU meets the minimum CPU requirements and if not, log an error. static void VerifyCpuRequirements(); /// Determine if the CPU scaling governor is set to 'performance' and if not, issue an @@ -83,12 +83,36 @@ class CpuInfo { return cycles_per_ms_; } - /// Returns the number of cores (including hyper-threaded) on this machine. + /// Returns the number of cores (including hyper-threaded) on this machine that are + /// available for use by Impala (either the number of online cores or the value of + /// the --num_cores command-line flag). static int num_cores() { DCHECK(initialized_); return num_cores_; } + /// Returns the maximum number of cores that will be online in the system, including + /// any offline cores or cores that could be added via hot-plugging. + static int GetMaxNumCores() { return max_num_cores_; } + + /// Returns the core that the current thread is running on. Always in range + /// [0, GetMaxNumCores()). Note that the thread may be migrated to a different core + /// at any time by the scheduler, so the caller should not assume the answer will + /// remain stable. + static int GetCurrentCore(); + + /// Returns the maximum number of NUMA nodes that will be online in the system, + /// including any that may be offline or disabled. + static int GetMaxNumNumaNodes() { return max_num_numa_nodes_; } + + /// Returns the NUMA node of the core provided. 'core' must be in the range + /// [0, GetMaxNumCores()). + static int GetNumaNodeOfCore(int core) { + DCHECK_LE(0, core); + DCHECK_LT(core, max_num_numa_nodes_); + return core_to_numa_node_[core]; + } + /// Returns the model name of the cpu (e.g. Intel i7-2600) static std::string model_name() { DCHECK(initialized_); @@ -127,6 +151,9 @@ class CpuInfo { }; private: + /// Initialize NUMA-related state - called from Init(); + static void InitNuma(); + /// Populates the arguments with information about this machine's caches. /// The values returned are not reliable in some environments, e.g. RHEL5 on EC2, so /// so we will keep this as a private method. @@ -138,7 +165,14 @@ class CpuInfo { static int64_t original_hardware_flags_; static int64_t cycles_per_ms_; static int num_cores_; + static int max_num_cores_; static std::string model_name_; + + /// Maximum possible number of NUMA nodes. + static int max_num_numa_nodes_; + + /// Array with 'max_num_cores_' entries, each of which is the NUMA node of that core. + static std::unique_ptr<int[]> core_to_numa_node_; }; }
