Repository: incubator-impala
Updated Branches:
  refs/heads/master 2e6375285 -> ac2217b69


IMPALA-5252: Fix crash in HiveUdfCall::GetStringVal() when mem_limit exceeded

We need to check for AllocateLocal() returning NULL. CopyFrom() takes
care of that for us.  Also adjust a few other places in the code base
that didn't have the check.

The new test reproduces the crash, but in order to get this test file to
execute, I had to move the xfail to be a function decorator. Apparently
xfail as a statement causes the test to not run at all. We should run
all of these queries even if they are non-determistic to at least verify
that impalad does not crash.

Change-Id: Iafefef24479164cc4d2b99191d2de28eb8b311b6
Reviewed-on: http://gerrit.cloudera.org:8080/6761
Reviewed-by: Dan Hecht <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/741421de
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/741421de
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/741421de

Branch: refs/heads/master
Commit: 741421de09f4236d9a45c00753501d6e6abe90ca
Parents: 2e63752
Author: Dan Hecht <[email protected]>
Authored: Thu Apr 27 16:03:31 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Sat Apr 29 02:23:51 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/aggregate-functions-ir.cc                   |  6 ++----
 be/src/exprs/hive-udf-call.cc                            |  6 +-----
 be/src/exprs/udf-builtins-ir.cc                          |  6 ++----
 be/src/udf/uda-test.cc                                   |  3 +--
 be/src/udf/udf-test.cc                                   |  1 +
 be/src/udf_samples/uda-sample.cc                         | 11 ++++++-----
 .../queries/QueryTest/alloc-fail-update.test             |  9 +++++++++
 tests/custom_cluster/test_alloc_fail.py                  | 10 +++++-----
 8 files changed, 27 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/exprs/aggregate-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/aggregate-functions-ir.cc 
b/be/src/exprs/aggregate-functions-ir.cc
index 72a78b9..611945a 100644
--- a/be/src/exprs/aggregate-functions-ir.cc
+++ b/be/src/exprs/aggregate-functions-ir.cc
@@ -1291,10 +1291,8 @@ StringVal 
AggregateFunctions::ReservoirSampleFinalize(FunctionContext* ctx,
     if (i < (src_state->num_samples() - 1)) out << ", ";
   }
   const string& out_str = out.str();
-  StringVal result_str(ctx, out_str.size());
-  if (LIKELY(!result_str.is_null)) {
-    memcpy(result_str.ptr, out_str.c_str(), result_str.len);
-  }
+  StringVal result_str = StringVal::CopyFrom(ctx,
+      reinterpret_cast<const uint8_t*>(out_str.c_str()), out_str.size());
   src_state->Delete(ctx);
   return result_str;
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/exprs/hive-udf-call.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc
index cd27dab..169b603 100644
--- a/be/src/exprs/hive-udf-call.cc
+++ b/be/src/exprs/hive-udf-call.cc
@@ -323,15 +323,11 @@ DoubleVal HiveUdfCall::GetDoubleVal(ExprContext* ctx, 
const TupleRow* row) {
 StringVal HiveUdfCall::GetStringVal(ExprContext* ctx, const TupleRow* row) {
   DCHECK_EQ(type_.type, TYPE_STRING);
   StringVal result = *reinterpret_cast<StringVal*>(Evaluate(ctx, row));
-
   // Copy the string into a local allocation with the usual lifetime for expr 
results.
   // Needed because the UDF output buffer is owned by the Java UDF executor 
and may be
   // freed or reused by the next call into the Java UDF executor.
   FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_);
-  uint8_t* local_alloc = fn_ctx->impl()->AllocateLocal(result.len);
-  memcpy(local_alloc, result.ptr, result.len);
-  result.ptr = local_alloc;
-  return result;
+  return StringVal::CopyFrom(fn_ctx, result.ptr, result.len);
 }
 
 TimestampVal HiveUdfCall::GetTimestampVal(ExprContext* ctx, const TupleRow* 
row) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/exprs/udf-builtins-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/udf-builtins-ir.cc b/be/src/exprs/udf-builtins-ir.cc
index 376a0d0..452c5b4 100644
--- a/be/src/exprs/udf-builtins-ir.cc
+++ b/be/src/exprs/udf-builtins-ir.cc
@@ -509,10 +509,8 @@ StringVal UdfBuiltins::PrintVector(FunctionContext* 
context, const StringVal& ar
   }
   ss << ">";
   const string& str = ss.str();
-  StringVal result(context, str.size());
-  if (UNLIKELY(result.is_null)) return StringVal::null();
-  memcpy(result.ptr, str.c_str(), str.size());
-  return result;
+  return StringVal::CopyFrom(context, reinterpret_cast<const 
uint8_t*>(str.c_str()),
+      str.size());
 }
 
 DoubleVal UdfBuiltins::VectorGet(FunctionContext* context, const BigIntVal& 
index,

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/udf/uda-test.cc
----------------------------------------------------------------------
diff --git a/be/src/udf/uda-test.cc b/be/src/udf/uda-test.cc
index 3ca145f..d589c75 100644
--- a/be/src/udf/uda-test.cc
+++ b/be/src/udf/uda-test.cc
@@ -139,8 +139,7 @@ void MinMerge(FunctionContext* context, const BufferVal& 
src, BufferVal* dst) {
 StringVal MinFinalize(FunctionContext* context, const BufferVal& val) {
   const MinState* state = reinterpret_cast<const MinState*>(val);
   if (state->value == NULL) return StringVal::null();
-  StringVal result = StringVal(context, state->len);
-  memcpy(result.ptr, state->value, state->len);
+  StringVal result = StringVal::CopyFrom(context, state->value, state->len);
   context->Free(state->value);
   return result;
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/udf/udf-test.cc
----------------------------------------------------------------------
diff --git a/be/src/udf/udf-test.cc b/be/src/udf/udf-test.cc
index d35ca48..796bcb3 100644
--- a/be/src/udf/udf-test.cc
+++ b/be/src/udf/udf-test.cc
@@ -47,6 +47,7 @@ StringVal UpperUdf(FunctionContext* context, const StringVal& 
input) {
   if (input.is_null) return StringVal::null();
   // Create a new StringVal object that's the same length as the input
   StringVal result = StringVal(context, input.len);
+  if (result.is_null) return StringVal::null();
   for (int i = 0; i < input.len; ++i) {
     result.ptr[i] = toupper(input.ptr[i]);
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/udf_samples/uda-sample.cc
----------------------------------------------------------------------
diff --git a/be/src/udf_samples/uda-sample.cc b/be/src/udf_samples/uda-sample.cc
index 23ce807..e8ff4fa 100644
--- a/be/src/udf_samples/uda-sample.cc
+++ b/be/src/udf_samples/uda-sample.cc
@@ -87,14 +87,15 @@ void StringConcatUpdate(FunctionContext* context, const 
StringVal& arg1,
     const StringVal& arg2, StringVal* val) {
   if (val->is_null) {
     val->is_null = false;
-    *val = StringVal(context, arg1.len);
-    memcpy(val->ptr, arg1.ptr, arg1.len);
+    *val = StringVal::CopyFrom(context, arg1.ptr, arg1.len);
   } else {
     int new_len = val->len + arg1.len + arg2.len;
     StringVal new_val(context, new_len);
-    memcpy(new_val.ptr, val->ptr, val->len);
-    memcpy(new_val.ptr + val->len, arg2.ptr, arg2.len);
-    memcpy(new_val.ptr + val->len + arg2.len, arg1.ptr, arg1.len);
+    if (!new_val.is_null) {
+      memcpy(new_val.ptr, val->ptr, val->len);
+      memcpy(new_val.ptr + val->len, arg2.ptr, arg2.len);
+      memcpy(new_val.ptr + val->len + arg2.len, arg1.ptr, arg1.len);
+    }
     *val = new_val;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test 
b/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test
index c107ee8..d2b6f6a 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test
@@ -50,3 +50,12 @@ select ndv(l_partkey), distinctpc(l_suppkey) from 
tpch.lineitem
 ---- CATCH
 failed to allocate
 ====
+---- QUERY
+# IMPALA-5252: Verify HiveUdfCall allocations are checked.
+create function replace_string(string) returns string
+location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
+symbol='org.apache.impala.ReplaceStringUdf';
+select replace_string(l_comment) from tpch.lineitem limit 10;
+---- CATCH
+failed to allocate
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/tests/custom_cluster/test_alloc_fail.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_alloc_fail.py 
b/tests/custom_cluster/test_alloc_fail.py
index 28cdc19..f1ae3dc 100644
--- a/tests/custom_cluster/test_alloc_fail.py
+++ b/tests/custom_cluster/test_alloc_fail.py
@@ -33,14 +33,14 @@ class TestAllocFail(CustomClusterTestSuite):
   def test_alloc_fail_init(self, vector):
     self.run_test_case('QueryTest/alloc-fail-init', vector)
 
+  # TODO: Rewrite or remove the non-deterministic test.
+  @pytest.mark.xfail(run=True, reason="IMPALA-2925: the execution is not 
deterministic "
+                     "so some tests sometimes don't fail as expected")
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args("--stress_free_pool_alloc=3")
-  def test_alloc_fail_update(self, vector):
-    # TODO: Rewrite or remove the non-deterministic test.
-    pytest.xfail("IMPALA-2925: the execution is not deterministic so some "
-                 "tests sometimes don't fail as expected")
+  def test_alloc_fail_update(self, vector, unique_database):
     # Note that this test relies on pre-aggregation to exercise the 
Serialize() path so
     # query option 'num_nodes' must not be 1. 
CustomClusterTestSuite.add_test_dimensions()
     # already filters out vectors with 'num_nodes' != 0 so just assert it here.
     assert vector.get_value('exec_option')['num_nodes'] == 0
-    self.run_test_case('QueryTest/alloc-fail-update', vector)
+    self.run_test_case('QueryTest/alloc-fail-update', vector, 
use_db=unique_database)

Reply via email to