Kudu: Exclude non-Kudu symbols during stub client generation Previously boost related symbols (and others) would get defined in the Kudu client stub with a non-functional implementation. If these implementations were used at runtime they would crash Impala.
Change-Id: I54292095692ce38c255a8df48cf8f3f655d797b0 Reviewed-on: http://gerrit.cloudera.org:8080/2864 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/cd193f06 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/cd193f06 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/cd193f06 Branch: refs/heads/master Commit: cd193f063f02bf7921c2d0f5b1231f91deed6694 Parents: 11ea79c Author: Casey Ching <[email protected]> Authored: Mon Apr 25 13:54:57 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Sat May 14 01:30:01 2016 -0700 ---------------------------------------------------------------------- bin/bootstrap_toolchain.py | 174 +++++++++++++++++++++------------------- 1 file changed, 91 insertions(+), 83 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cd193f06/bin/bootstrap_toolchain.py ---------------------------------------------------------------------- diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py index 3009332..2589d1f 100755 --- a/bin/bootstrap_toolchain.py +++ b/bin/bootstrap_toolchain.py @@ -124,6 +124,80 @@ def bootstrap(packages): write_version_file(toolchain_root, pkg_name, pkg_version, compiler, get_platform_release_label()) +def check_output(cmd_args): + """Run the command and return the output. Raise an exception if the command returns + a non-zero return code. Similar to subprocess.check_output() which is only provided + in python 2.7. + """ + process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + stdout, _ = process.communicate() + if process.wait() != 0: + raise Exception("Command with args '%s' failed with exit code %s:\n%s" + % (cmd_args, process.returncode, stdout)) + return stdout + +def package_directory(toolchain_root, pkg_name, pkg_version): + dir_name = "{0}-{1}".format(pkg_name, pkg_version) + return os.path.join(toolchain_root, dir_name) + +def version_file_path(toolchain_root, pkg_name, pkg_version): + return os.path.join(package_directory(toolchain_root, pkg_name, pkg_version), + "toolchain_package_version.txt") + +def check_custom_toolchain(toolchain_root, packages): + missing = [] + for p in packages: + pkg_name, pkg_version = unpack_name_and_version(p) + pkg_dir = package_directory(toolchain_root, pkg_name, pkg_version) + if not os.path.isdir(pkg_dir): + missing.append((p, pkg_dir)) + + if missing: + print("The following packages are not in their expected locations.") + for p, pkg_dir in missing: + print(" %s (expected directory %s to exist)" % (p, pkg_dir)) + print("Pre-built toolchain archives not available for your platform.") + print("Clone and build native toolchain from source using this repository:") + print(" https://github.com/cloudera/native-toolchain") + raise Exception("Toolchain bootstrap failed: required packages were missing") + +def check_for_existing_package(toolchain_root, pkg_name, pkg_version, compiler): + """Return true if toolchain_root already contains the package with the correct + version and compiler. + """ + version_file = version_file_path(toolchain_root, pkg_name, pkg_version) + if not os.path.exists(version_file): + return False + + label = get_platform_release_label() + pkg_version_string = "{0}-{1}-{2}-{3}".format(pkg_name, pkg_version, compiler, label) + with open(version_file) as f: + return f.read().strip() == pkg_version_string + +def write_version_file(toolchain_root, pkg_name, pkg_version, compiler, label): + with open(version_file_path(toolchain_root, pkg_name, pkg_version), 'w') as f: + f.write("{0}-{1}-{2}-{3}".format(pkg_name, pkg_version, compiler, label)) + +def remove_existing_package(toolchain_root, pkg_name, pkg_version): + dir_path = package_directory(toolchain_root, pkg_name, pkg_version) + if os.path.exists(dir_path): + print "Removing existing package directory {0}".format(dir_path) + shutil.rmtree(dir_path) + +def unpack_name_and_version(package): + """A package definition is either a string where the version is fetched from the + environment or a tuple where the package name and the package version are fully + specified. + """ + if isinstance(package, basestring): + env_var = "IMPALA_{0}_VERSION".format(package).replace("-", "_").upper() + try: + return package, os.environ[env_var] + except KeyError: + raise Exception("Could not find version for {0} in environment var {1}".format( + package, env_var)) + return package[0], package[1] + def build_kudu_stub(toolchain_root, kudu_version, compiler): # When Kudu isn't supported, the CentOS 7 package will be downloaded and the client # lib will be replaced with a stubbed client. @@ -159,7 +233,7 @@ def build_kudu_stub(toolchain_root, kudu_version, compiler): # Extract the symbols and write the stubbed client source. There is a special method # kudu::client::GetShortVersionString() that is overridden so that the stub can be # identified by the caller. - get_short_version_symbol = "_ZN4kudu6client21GetShortVersionStringEv" + get_short_version_sig = "kudu::client::GetShortVersionString()" nm_out = check_output([nm_path, "--defined-only", "-D", client_lib_path]) stub_build_dir = tempfile.mkdtemp() stub_client_src_file = open(os.path.join(stub_build_dir, "kudu_client.cc"), "w") @@ -178,22 +252,30 @@ std::string GetShortVersionString() { return kFakeKuduVersion; } }} """) found_start_version_symbol = False + cpp_filt_path = os.path.join(binutils_dir, "bin", "c++filt") for line in nm_out.splitlines(): - addr, sym_type, name = line.split(" ") - if name in ["_init", "_fini"]: + addr, sym_type, mangled_name = line.split(" ") + # Skip special functions an anything that isn't a strong symbol. Any symbols that + # get passed this check must be related to Kudu. If a symbol unrelated to Kudu + # (ex: a boost symbol) gets defined in the stub, there's a chance the symbol could + # get used and crash Impala. + if mangled_name in ["_init", "_fini"] or sym_type not in "Tt": continue - if name == get_short_version_symbol: + demangled_name = check_output([cpp_filt_path, mangled_name]).strip() + assert "kudu" in demangled_name, \ + "Symbol doesn't appear to be related to Kudu: " + demangled_name + if demangled_name == get_short_version_sig: found_start_version_symbol = True continue - if sym_type.upper() in "TW": - stub_client_src_file.write(""" + stub_client_src_file.write(""" extern "C" void %s() { KuduNotSupported(); } -""" % name) +""" % mangled_name) + if not found_start_version_symbol: - raise Exception("Expected to find symbol " + get_short_version_symbol + - " corresponding to kudu::client::GetShortVersionString() but it was not found.") + raise Exception("Expected to find symbol a corresponding to" + " %s but it was not found." % get_short_version_sig) stub_client_src_file.flush() # The soname is needed to avoid problem in packaging builds. Without the soname, @@ -221,80 +303,6 @@ extern "C" void %s() { finally: shutil.rmtree(stub_build_dir) -def check_output(cmd_args): - """Run the command and return the output. Raise an exception if the command returns - a non-zero return code. Similar to subprocess.check_output() which is only provided - in python 2.7. - """ - process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - stdout, _ = process.communicate() - if process.wait() != 0: - raise Exception("Command with args '%s' failed with exit code %s:\n%s" - % (cmd_args, process.returncode, stdout)) - return stdout - -def package_directory(toolchain_root, pkg_name, pkg_version): - dir_name = "{0}-{1}".format(pkg_name, pkg_version) - return os.path.join(toolchain_root, dir_name) - -def version_file_path(toolchain_root, pkg_name, pkg_version): - return os.path.join(package_directory(toolchain_root, pkg_name, pkg_version), - "toolchain_package_version.txt") - -def check_custom_toolchain(toolchain_root, packages): - missing = [] - for p in packages: - pkg_name, pkg_version = unpack_name_and_version(p) - pkg_dir = package_directory(toolchain_root, pkg_name, pkg_version) - if not os.path.isdir(pkg_dir): - missing.append((p, pkg_dir)) - - if missing: - print("The following packages are not in their expected locations.") - for p, pkg_dir in missing: - print(" %s (expected directory %s to exist)" % (p, pkg_dir)) - print("Pre-built toolchain archives not available for your platform.") - print("Clone and build native toolchain from source using this repository:") - print(" https://github.com/cloudera/native-toolchain") - raise Exception("Toolchain bootstrap failed: required packages were missing") - -def check_for_existing_package(toolchain_root, pkg_name, pkg_version, compiler): - """Return true if toolchain_root already contains the package with the correct - version and compiler. - """ - version_file = version_file_path(toolchain_root, pkg_name, pkg_version) - if not os.path.exists(version_file): - return False - - label = get_platform_release_label() - pkg_version_string = "{0}-{1}-{2}-{3}".format(pkg_name, pkg_version, compiler, label) - with open(version_file) as f: - return f.read().strip() == pkg_version_string - -def write_version_file(toolchain_root, pkg_name, pkg_version, compiler, label): - with open(version_file_path(toolchain_root, pkg_name, pkg_version), 'w') as f: - f.write("{0}-{1}-{2}-{3}".format(pkg_name, pkg_version, compiler, label)) - -def remove_existing_package(toolchain_root, pkg_name, pkg_version): - dir_path = package_directory(toolchain_root, pkg_name, pkg_version) - if os.path.exists(dir_path): - print "Removing existing package directory {0}".format(dir_path) - shutil.rmtree(dir_path) - -def unpack_name_and_version(package): - """A package definition is either a string where the version is fetched from the - environment or a tuple where the package name and the package version are fully - specified. - """ - if isinstance(package, basestring): - env_var = "IMPALA_{0}_VERSION".format(package).replace("-", "_").upper() - try: - return package, os.environ[env_var] - except KeyError: - raise Exception("Could not find version for {0} in environment var {1}".format( - package, env_var)) - return package[0], package[1] - if __name__ == "__main__": packages = ["avro", "binutils", "boost", "breakpad", "bzip2", "gcc", "gflags", "glog", "gperftools", "gtest", "kudu", "llvm", ("llvm", "3.8.0-asserts-p1"), "lz4",
