IMPALA-6008: Creating a UDF from a shared library with a .ll extenion crashes impala
Impala crashes on creating a UDF from a shared library (.so file) which was renamed to have .ll extension. CreateFile() call in GetSymbols() fails and returns on error and does not close the codegen object. This patch closes the codegen object on failure. This avoids hitting a DCHECK later up in the stack. The chain of failures also invokes the DiagnosticHandlerFn. RuntimeState object is NULL when the DiagnosticHandlerFn gets called in this case. This change also adds a check before accessing it for logging. [localhost:21000] > create function foo4 (string, string) returns string location '/tmp/bad_udf.ll' symbol='MyAwesomeUdf'; Query: create function foo4 (string, string) returns string location '/tmp/bad_udf.ll' symbol='MyAwesomeUdf' ERROR: AnalysisException: Could not load binary: /tmp/bad_udf.ll LLVM diagnostic error: Invalid bitcode signature Change-Id: Id060668802ca9c80367cdc0e8a823b968d549bbb Reviewed-on: http://gerrit.cloudera.org:8080/9154 Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com> Tested-by: Impala Public Jenkins Reviewed-on: http://gerrit.cloudera.org:8080/9424 Tested-by: Tim Armstrong <tarmstr...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7c4689c8 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7c4689c8 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7c4689c8 Branch: refs/heads/2.x Commit: 7c4689c81f10f50d07aee6ac3057bda3d64d2403 Parents: 84fffd4 Author: aphadke <apha...@cloudera.com> Authored: Mon Jan 29 11:50:33 2018 -0800 Committer: Tim Armstrong <tarmstr...@cloudera.com> Committed: Fri Feb 23 22:51:01 2018 +0000 ---------------------------------------------------------------------- be/src/codegen/llvm-codegen.cc | 34 +++++++++++++------- .../queries/QueryTest/udf-errors.test | 6 ++++ tests/query_test/test_udfs.py | 22 +++++++++++-- 3 files changed, 48 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/7c4689c8/be/src/codegen/llvm-codegen.cc ---------------------------------------------------------------------- diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc index 892b395..3796d76 100644 --- a/be/src/codegen/llvm-codegen.cc +++ b/be/src/codegen/llvm-codegen.cc @@ -215,9 +215,14 @@ Status LlvmCodeGen::CreateFromFile(RuntimeState* state, ObjectPool* pool, SCOPED_TIMER((*codegen)->profile_->total_time_counter()); unique_ptr<llvm::Module> loaded_module; - RETURN_IF_ERROR((*codegen)->LoadModuleFromFile(file, &loaded_module)); - - return (*codegen)->Init(std::move(loaded_module)); + Status status = (*codegen)->LoadModuleFromFile(file, &loaded_module); + if (!status.ok()) goto error; + status = (*codegen)->Init(std::move(loaded_module)); + if (!status.ok()) goto error; + return Status::OK(); +error: + (*codegen)->Close(); + return status; } Status LlvmCodeGen::CreateFromMemory(RuntimeState* state, ObjectPool* pool, @@ -242,9 +247,15 @@ Status LlvmCodeGen::CreateFromMemory(RuntimeState* state, ObjectPool* pool, unique_ptr<llvm::MemoryBuffer> module_ir_buf( llvm::MemoryBuffer::getMemBuffer(module_ir, "", false)); unique_ptr<llvm::Module> loaded_module; - RETURN_IF_ERROR((*codegen)->LoadModuleFromMemory(std::move(module_ir_buf), - module_name, &loaded_module)); - return (*codegen)->Init(std::move(loaded_module)); + Status status = (*codegen)->LoadModuleFromMemory(std::move(module_ir_buf), + module_name, &loaded_module); + if (!status.ok()) goto error; + status = (*codegen)->Init(std::move(loaded_module)); + if (!status.ok()) goto error; + return Status::OK(); +error: + (*codegen)->Close(); + return status; } Status LlvmCodeGen::LoadModuleFromFile( @@ -276,9 +287,8 @@ Status LlvmCodeGen::LoadModuleFromMemory(unique_ptr<llvm::MemoryBuffer> module_i llvm::ErrorOr<unique_ptr<llvm::Module>> tmp_module = getLazyBitcodeModule(std::move(module_ir_buf), context(), false); if (!tmp_module) { - stringstream ss; - ss << "Could not parse module " << module_name << ": " << tmp_module.getError(); - return Status(ss.str()); + string diagnostic_err = diagnostic_handler_.GetErrorString(); + return Status(diagnostic_err); } *module = std::move(tmp_module.get()); @@ -1690,8 +1700,10 @@ void LlvmCodeGen::DiagnosticHandler::DiagnosticHandlerFn( diagnostic_printer << "LLVM diagnostic error: "; info.print(diagnostic_printer); error_msg.flush(); - LOG(INFO) << "Query " << codegen->state_->query_id() << " encountered a " - << codegen->diagnostic_handler_.error_str_; + if (codegen->state_) { + LOG(INFO) << "Query " << codegen->state_->query_id() << " encountered a " + << codegen->diagnostic_handler_.error_str_; + } } } http://git-wip-us.apache.org/repos/asf/impala/blob/7c4689c8/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test index 9698717..81e459a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test +++ b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test @@ -20,6 +20,12 @@ symbol='FnDoesNotExist'; Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so ==== ---- QUERY +create function if not exists foo (string, string) returns string location +'$FILESYSTEM_PREFIX/test-warehouse/$DATABASE_bad_udf.ll' symbol='MyAwesomeUdf'; +---- CATCH +Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/$DATABASE_bad_udf.ll +==== +---- QUERY # This test is run with codegen disabled. Interpretation only handles up to 20 arguments. create function if not exists twenty_args(int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int) returns int http://git-wip-us.apache.org/repos/asf/impala/blob/7c4689c8/tests/query_test/test_udfs.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py index 1ff716a..1be25e5 100644 --- a/tests/query_test/test_udfs.py +++ b/tests/query_test/test_udfs.py @@ -18,7 +18,11 @@ from copy import copy import os import pytest -from subprocess import check_call +import random +import threading +import time +import tempfile +from subprocess import call, check_call from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_cluster import ImpalaCluster @@ -327,8 +331,20 @@ class TestUdfExecution(TestUdfBase): # Aim to exercise two failure cases: # 1. too many arguments # 2. IR UDF - if vector.get_value('exec_option')['disable_codegen']: - self.run_test_case('QueryTest/udf-errors', vector, use_db=unique_database) + fd, dir_name = tempfile.mkstemp() + try: + with open(dir_name, "w") as f: + f.write("Hello World") + check_call(["hadoop", "fs", "-put", "-f", f.name, "/test-warehouse/" + + unique_database + "_bad_udf.ll"]) + if vector.get_value('exec_option')['disable_codegen']: + self.run_test_case('QueryTest/udf-errors', vector, use_db=unique_database) + finally: + if os.path.exists(f.name): + os.remove(f.name) + call(["hadoop", "fs", "-rm", "-f", "/test-warehouse/" + + unique_database + "_bad_udf.ll"]) + os.close(fd) # Run serially because this will blow the process limit, potentially causing other # queries to fail