Repository: impala
Updated Branches:
  refs/heads/master 0c8eba076 -> 24b4ed0b2


IMPALA-6008: Creating a UDF from a shared library with a .ll extenion
crashes impala

Impala crashes on creating a UDF from a shared library (.so file) which
was renamed to have .ll extension. CreateFile() call in GetSymbols()
fails and returns on error and does not close the codegen object. This
patch closes the codegen object on failure. This avoids hitting a DCHECK
later up in the stack.
The chain of failures also invokes the DiagnosticHandlerFn. RuntimeState
object is NULL when the DiagnosticHandlerFn gets called in this case.
This change also adds a check before accessing it for logging.

[localhost:21000] > create function foo4 (string, string) returns string
location '/tmp/bad_udf.ll' symbol='MyAwesomeUdf';
Query: create function foo4 (string, string) returns string location
'/tmp/bad_udf.ll' symbol='MyAwesomeUdf'
ERROR: AnalysisException: Could not load binary: /tmp/bad_udf.ll
LLVM diagnostic error: Invalid bitcode signature

Change-Id: Id060668802ca9c80367cdc0e8a823b968d549bbb
Reviewed-on: http://gerrit.cloudera.org:8080/9154
Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7ce519f9
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7ce519f9
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7ce519f9

Branch: refs/heads/master
Commit: 7ce519f92b7497ea5c9b1519348f3509aa57ad7d
Parents: 0c8eba0
Author: aphadke <apha...@cloudera.com>
Authored: Mon Jan 29 11:50:33 2018 -0800
Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org>
Committed: Fri Feb 23 03:50:39 2018 +0000

----------------------------------------------------------------------
 be/src/codegen/llvm-codegen.cc                  | 34 +++++++++++++-------
 .../queries/QueryTest/udf-errors.test           |  6 ++++
 tests/query_test/test_udfs.py                   | 22 +++++++++++--
 3 files changed, 48 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/7ce519f9/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index 892b395..3796d76 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -215,9 +215,14 @@ Status LlvmCodeGen::CreateFromFile(RuntimeState* state, 
ObjectPool* pool,
   SCOPED_TIMER((*codegen)->profile_->total_time_counter());
 
   unique_ptr<llvm::Module> loaded_module;
-  RETURN_IF_ERROR((*codegen)->LoadModuleFromFile(file, &loaded_module));
-
-  return (*codegen)->Init(std::move(loaded_module));
+  Status status = (*codegen)->LoadModuleFromFile(file, &loaded_module);
+  if (!status.ok()) goto error;
+  status = (*codegen)->Init(std::move(loaded_module));
+  if (!status.ok()) goto error;
+  return Status::OK();
+error:
+  (*codegen)->Close();
+  return status;
 }
 
 Status LlvmCodeGen::CreateFromMemory(RuntimeState* state, ObjectPool* pool,
@@ -242,9 +247,15 @@ Status LlvmCodeGen::CreateFromMemory(RuntimeState* state, 
ObjectPool* pool,
   unique_ptr<llvm::MemoryBuffer> module_ir_buf(
       llvm::MemoryBuffer::getMemBuffer(module_ir, "", false));
   unique_ptr<llvm::Module> loaded_module;
-  RETURN_IF_ERROR((*codegen)->LoadModuleFromMemory(std::move(module_ir_buf),
-      module_name, &loaded_module));
-  return (*codegen)->Init(std::move(loaded_module));
+  Status status = (*codegen)->LoadModuleFromMemory(std::move(module_ir_buf),
+      module_name, &loaded_module);
+  if (!status.ok()) goto error;
+  status = (*codegen)->Init(std::move(loaded_module));
+  if (!status.ok()) goto error;
+  return Status::OK();
+error:
+  (*codegen)->Close();
+  return status;
 }
 
 Status LlvmCodeGen::LoadModuleFromFile(
@@ -276,9 +287,8 @@ Status 
LlvmCodeGen::LoadModuleFromMemory(unique_ptr<llvm::MemoryBuffer> module_i
   llvm::ErrorOr<unique_ptr<llvm::Module>> tmp_module =
       getLazyBitcodeModule(std::move(module_ir_buf), context(), false);
   if (!tmp_module) {
-    stringstream ss;
-    ss << "Could not parse module " << module_name << ": " << 
tmp_module.getError();
-    return Status(ss.str());
+    string diagnostic_err = diagnostic_handler_.GetErrorString();
+    return Status(diagnostic_err);
   }
 
   *module = std::move(tmp_module.get());
@@ -1690,8 +1700,10 @@ void LlvmCodeGen::DiagnosticHandler::DiagnosticHandlerFn(
     diagnostic_printer << "LLVM diagnostic error: ";
     info.print(diagnostic_printer);
     error_msg.flush();
-    LOG(INFO) << "Query " << codegen->state_->query_id() << " encountered a "
-        << codegen->diagnostic_handler_.error_str_;
+    if (codegen->state_) {
+      LOG(INFO) << "Query " << codegen->state_->query_id() << " encountered a "
+          << codegen->diagnostic_handler_.error_str_;
+    }
   }
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/7ce519f9/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test 
b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
index 9698717..81e459a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
@@ -20,6 +20,12 @@ symbol='FnDoesNotExist';
 Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so
 ====
 ---- QUERY
+create function if not exists foo (string, string) returns string location
+'$FILESYSTEM_PREFIX/test-warehouse/$DATABASE_bad_udf.ll' symbol='MyAwesomeUdf';
+---- CATCH
+Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/$DATABASE_bad_udf.ll
+====
+---- QUERY
 # This test is run with codegen disabled. Interpretation only handles up to 20 
arguments.
 create function if not exists twenty_args(int, int, int, int, int, int,
     int, int, int, int, int, int, int, int, int, int, int, int, int, int) 
returns int

http://git-wip-us.apache.org/repos/asf/impala/blob/7ce519f9/tests/query_test/test_udfs.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 1ff716a..1be25e5 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -18,7 +18,11 @@
 from copy import copy
 import os
 import pytest
-from subprocess import check_call
+import random
+import threading
+import time
+import tempfile
+from subprocess import call, check_call
 
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_cluster import ImpalaCluster
@@ -327,8 +331,20 @@ class TestUdfExecution(TestUdfBase):
     # Aim to exercise two failure cases:
     # 1. too many arguments
     # 2. IR UDF
-    if vector.get_value('exec_option')['disable_codegen']:
-      self.run_test_case('QueryTest/udf-errors', vector, 
use_db=unique_database)
+    fd, dir_name = tempfile.mkstemp()
+    try:
+      with open(dir_name, "w") as f:
+        f.write("Hello World")
+      check_call(["hadoop", "fs", "-put", "-f", f.name, "/test-warehouse/" +
+                unique_database + "_bad_udf.ll"])
+      if vector.get_value('exec_option')['disable_codegen']:
+        self.run_test_case('QueryTest/udf-errors', vector, 
use_db=unique_database)
+    finally:
+      if os.path.exists(f.name):
+        os.remove(f.name)
+      call(["hadoop", "fs", "-rm", "-f", "/test-warehouse/" +
+                  unique_database + "_bad_udf.ll"])
+      os.close(fd)
 
   # Run serially because this will blow the process limit, potentially causing 
other
   # queries to fail

Reply via email to