IMPALA-6008: Creating a UDF from a shared library with a .ll extenion
crashes impala

Impala crashes on creating a UDF from a shared library (.so file) which
was renamed to have .ll extension. CreateFile() call in GetSymbols()
fails and returns on error and does not close the codegen object. This
patch closes the codegen object on failure. This avoids hitting a DCHECK
later up in the stack.
The chain of failures also invokes the DiagnosticHandlerFn. RuntimeState
object is NULL when the DiagnosticHandlerFn gets called in this case.
This change also adds a check before accessing it for logging.

[localhost:21000] > create function foo4 (string, string) returns string
location '/tmp/bad_udf.ll' symbol='MyAwesomeUdf';
Query: create function foo4 (string, string) returns string location
'/tmp/bad_udf.ll' symbol='MyAwesomeUdf'
ERROR: AnalysisException: Could not load binary: /tmp/bad_udf.ll
LLVM diagnostic error: Invalid bitcode signature

Change-Id: Id060668802ca9c80367cdc0e8a823b968d549bbb
Reviewed-on: http://gerrit.cloudera.org:8080/9154
Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com>
Tested-by: Impala Public Jenkins
Reviewed-on: http://gerrit.cloudera.org:8080/9424
Tested-by: Tim Armstrong <tarmstr...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7c4689c8
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7c4689c8
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7c4689c8

Branch: refs/heads/2.x
Commit: 7c4689c81f10f50d07aee6ac3057bda3d64d2403
Parents: 84fffd4
Author: aphadke <apha...@cloudera.com>
Authored: Mon Jan 29 11:50:33 2018 -0800
Committer: Tim Armstrong <tarmstr...@cloudera.com>
Committed: Fri Feb 23 22:51:01 2018 +0000

----------------------------------------------------------------------
 be/src/codegen/llvm-codegen.cc                  | 34 +++++++++++++-------
 .../queries/QueryTest/udf-errors.test           |  6 ++++
 tests/query_test/test_udfs.py                   | 22 +++++++++++--
 3 files changed, 48 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/7c4689c8/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index 892b395..3796d76 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -215,9 +215,14 @@ Status LlvmCodeGen::CreateFromFile(RuntimeState* state, 
ObjectPool* pool,
   SCOPED_TIMER((*codegen)->profile_->total_time_counter());
 
   unique_ptr<llvm::Module> loaded_module;
-  RETURN_IF_ERROR((*codegen)->LoadModuleFromFile(file, &loaded_module));
-
-  return (*codegen)->Init(std::move(loaded_module));
+  Status status = (*codegen)->LoadModuleFromFile(file, &loaded_module);
+  if (!status.ok()) goto error;
+  status = (*codegen)->Init(std::move(loaded_module));
+  if (!status.ok()) goto error;
+  return Status::OK();
+error:
+  (*codegen)->Close();
+  return status;
 }
 
 Status LlvmCodeGen::CreateFromMemory(RuntimeState* state, ObjectPool* pool,
@@ -242,9 +247,15 @@ Status LlvmCodeGen::CreateFromMemory(RuntimeState* state, 
ObjectPool* pool,
   unique_ptr<llvm::MemoryBuffer> module_ir_buf(
       llvm::MemoryBuffer::getMemBuffer(module_ir, "", false));
   unique_ptr<llvm::Module> loaded_module;
-  RETURN_IF_ERROR((*codegen)->LoadModuleFromMemory(std::move(module_ir_buf),
-      module_name, &loaded_module));
-  return (*codegen)->Init(std::move(loaded_module));
+  Status status = (*codegen)->LoadModuleFromMemory(std::move(module_ir_buf),
+      module_name, &loaded_module);
+  if (!status.ok()) goto error;
+  status = (*codegen)->Init(std::move(loaded_module));
+  if (!status.ok()) goto error;
+  return Status::OK();
+error:
+  (*codegen)->Close();
+  return status;
 }
 
 Status LlvmCodeGen::LoadModuleFromFile(
@@ -276,9 +287,8 @@ Status 
LlvmCodeGen::LoadModuleFromMemory(unique_ptr<llvm::MemoryBuffer> module_i
   llvm::ErrorOr<unique_ptr<llvm::Module>> tmp_module =
       getLazyBitcodeModule(std::move(module_ir_buf), context(), false);
   if (!tmp_module) {
-    stringstream ss;
-    ss << "Could not parse module " << module_name << ": " << 
tmp_module.getError();
-    return Status(ss.str());
+    string diagnostic_err = diagnostic_handler_.GetErrorString();
+    return Status(diagnostic_err);
   }
 
   *module = std::move(tmp_module.get());
@@ -1690,8 +1700,10 @@ void LlvmCodeGen::DiagnosticHandler::DiagnosticHandlerFn(
     diagnostic_printer << "LLVM diagnostic error: ";
     info.print(diagnostic_printer);
     error_msg.flush();
-    LOG(INFO) << "Query " << codegen->state_->query_id() << " encountered a "
-        << codegen->diagnostic_handler_.error_str_;
+    if (codegen->state_) {
+      LOG(INFO) << "Query " << codegen->state_->query_id() << " encountered a "
+          << codegen->diagnostic_handler_.error_str_;
+    }
   }
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/7c4689c8/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test 
b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
index 9698717..81e459a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
@@ -20,6 +20,12 @@ symbol='FnDoesNotExist';
 Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so
 ====
 ---- QUERY
+create function if not exists foo (string, string) returns string location
+'$FILESYSTEM_PREFIX/test-warehouse/$DATABASE_bad_udf.ll' symbol='MyAwesomeUdf';
+---- CATCH
+Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/$DATABASE_bad_udf.ll
+====
+---- QUERY
 # This test is run with codegen disabled. Interpretation only handles up to 20 
arguments.
 create function if not exists twenty_args(int, int, int, int, int, int,
     int, int, int, int, int, int, int, int, int, int, int, int, int, int) 
returns int

http://git-wip-us.apache.org/repos/asf/impala/blob/7c4689c8/tests/query_test/test_udfs.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 1ff716a..1be25e5 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -18,7 +18,11 @@
 from copy import copy
 import os
 import pytest
-from subprocess import check_call
+import random
+import threading
+import time
+import tempfile
+from subprocess import call, check_call
 
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_cluster import ImpalaCluster
@@ -327,8 +331,20 @@ class TestUdfExecution(TestUdfBase):
     # Aim to exercise two failure cases:
     # 1. too many arguments
     # 2. IR UDF
-    if vector.get_value('exec_option')['disable_codegen']:
-      self.run_test_case('QueryTest/udf-errors', vector, 
use_db=unique_database)
+    fd, dir_name = tempfile.mkstemp()
+    try:
+      with open(dir_name, "w") as f:
+        f.write("Hello World")
+      check_call(["hadoop", "fs", "-put", "-f", f.name, "/test-warehouse/" +
+                unique_database + "_bad_udf.ll"])
+      if vector.get_value('exec_option')['disable_codegen']:
+        self.run_test_case('QueryTest/udf-errors', vector, 
use_db=unique_database)
+    finally:
+      if os.path.exists(f.name):
+        os.remove(f.name)
+      call(["hadoop", "fs", "-rm", "-f", "/test-warehouse/" +
+                  unique_database + "_bad_udf.ll"])
+      os.close(fd)
 
   # Run serially because this will blow the process limit, potentially causing 
other
   # queries to fail

Reply via email to