Repository: incubator-impala Updated Branches: refs/heads/master 2e6375285 -> ac2217b69
IMPALA-5252: Fix crash in HiveUdfCall::GetStringVal() when mem_limit exceeded We need to check for AllocateLocal() returning NULL. CopyFrom() takes care of that for us. Also adjust a few other places in the code base that didn't have the check. The new test reproduces the crash, but in order to get this test file to execute, I had to move the xfail to be a function decorator. Apparently xfail as a statement causes the test to not run at all. We should run all of these queries even if they are non-determistic to at least verify that impalad does not crash. Change-Id: Iafefef24479164cc4d2b99191d2de28eb8b311b6 Reviewed-on: http://gerrit.cloudera.org:8080/6761 Reviewed-by: Dan Hecht <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/741421de Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/741421de Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/741421de Branch: refs/heads/master Commit: 741421de09f4236d9a45c00753501d6e6abe90ca Parents: 2e63752 Author: Dan Hecht <[email protected]> Authored: Thu Apr 27 16:03:31 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Sat Apr 29 02:23:51 2017 +0000 ---------------------------------------------------------------------- be/src/exprs/aggregate-functions-ir.cc | 6 ++---- be/src/exprs/hive-udf-call.cc | 6 +----- be/src/exprs/udf-builtins-ir.cc | 6 ++---- be/src/udf/uda-test.cc | 3 +-- be/src/udf/udf-test.cc | 1 + be/src/udf_samples/uda-sample.cc | 11 ++++++----- .../queries/QueryTest/alloc-fail-update.test | 9 +++++++++ tests/custom_cluster/test_alloc_fail.py | 10 +++++----- 8 files changed, 27 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/exprs/aggregate-functions-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/aggregate-functions-ir.cc b/be/src/exprs/aggregate-functions-ir.cc index 72a78b9..611945a 100644 --- a/be/src/exprs/aggregate-functions-ir.cc +++ b/be/src/exprs/aggregate-functions-ir.cc @@ -1291,10 +1291,8 @@ StringVal AggregateFunctions::ReservoirSampleFinalize(FunctionContext* ctx, if (i < (src_state->num_samples() - 1)) out << ", "; } const string& out_str = out.str(); - StringVal result_str(ctx, out_str.size()); - if (LIKELY(!result_str.is_null)) { - memcpy(result_str.ptr, out_str.c_str(), result_str.len); - } + StringVal result_str = StringVal::CopyFrom(ctx, + reinterpret_cast<const uint8_t*>(out_str.c_str()), out_str.size()); src_state->Delete(ctx); return result_str; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/exprs/hive-udf-call.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc index cd27dab..169b603 100644 --- a/be/src/exprs/hive-udf-call.cc +++ b/be/src/exprs/hive-udf-call.cc @@ -323,15 +323,11 @@ DoubleVal HiveUdfCall::GetDoubleVal(ExprContext* ctx, const TupleRow* row) { StringVal HiveUdfCall::GetStringVal(ExprContext* ctx, const TupleRow* row) { DCHECK_EQ(type_.type, TYPE_STRING); StringVal result = *reinterpret_cast<StringVal*>(Evaluate(ctx, row)); - // Copy the string into a local allocation with the usual lifetime for expr results. // Needed because the UDF output buffer is owned by the Java UDF executor and may be // freed or reused by the next call into the Java UDF executor. FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_); - uint8_t* local_alloc = fn_ctx->impl()->AllocateLocal(result.len); - memcpy(local_alloc, result.ptr, result.len); - result.ptr = local_alloc; - return result; + return StringVal::CopyFrom(fn_ctx, result.ptr, result.len); } TimestampVal HiveUdfCall::GetTimestampVal(ExprContext* ctx, const TupleRow* row) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/exprs/udf-builtins-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/udf-builtins-ir.cc b/be/src/exprs/udf-builtins-ir.cc index 376a0d0..452c5b4 100644 --- a/be/src/exprs/udf-builtins-ir.cc +++ b/be/src/exprs/udf-builtins-ir.cc @@ -509,10 +509,8 @@ StringVal UdfBuiltins::PrintVector(FunctionContext* context, const StringVal& ar } ss << ">"; const string& str = ss.str(); - StringVal result(context, str.size()); - if (UNLIKELY(result.is_null)) return StringVal::null(); - memcpy(result.ptr, str.c_str(), str.size()); - return result; + return StringVal::CopyFrom(context, reinterpret_cast<const uint8_t*>(str.c_str()), + str.size()); } DoubleVal UdfBuiltins::VectorGet(FunctionContext* context, const BigIntVal& index, http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/udf/uda-test.cc ---------------------------------------------------------------------- diff --git a/be/src/udf/uda-test.cc b/be/src/udf/uda-test.cc index 3ca145f..d589c75 100644 --- a/be/src/udf/uda-test.cc +++ b/be/src/udf/uda-test.cc @@ -139,8 +139,7 @@ void MinMerge(FunctionContext* context, const BufferVal& src, BufferVal* dst) { StringVal MinFinalize(FunctionContext* context, const BufferVal& val) { const MinState* state = reinterpret_cast<const MinState*>(val); if (state->value == NULL) return StringVal::null(); - StringVal result = StringVal(context, state->len); - memcpy(result.ptr, state->value, state->len); + StringVal result = StringVal::CopyFrom(context, state->value, state->len); context->Free(state->value); return result; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/udf/udf-test.cc ---------------------------------------------------------------------- diff --git a/be/src/udf/udf-test.cc b/be/src/udf/udf-test.cc index d35ca48..796bcb3 100644 --- a/be/src/udf/udf-test.cc +++ b/be/src/udf/udf-test.cc @@ -47,6 +47,7 @@ StringVal UpperUdf(FunctionContext* context, const StringVal& input) { if (input.is_null) return StringVal::null(); // Create a new StringVal object that's the same length as the input StringVal result = StringVal(context, input.len); + if (result.is_null) return StringVal::null(); for (int i = 0; i < input.len; ++i) { result.ptr[i] = toupper(input.ptr[i]); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/be/src/udf_samples/uda-sample.cc ---------------------------------------------------------------------- diff --git a/be/src/udf_samples/uda-sample.cc b/be/src/udf_samples/uda-sample.cc index 23ce807..e8ff4fa 100644 --- a/be/src/udf_samples/uda-sample.cc +++ b/be/src/udf_samples/uda-sample.cc @@ -87,14 +87,15 @@ void StringConcatUpdate(FunctionContext* context, const StringVal& arg1, const StringVal& arg2, StringVal* val) { if (val->is_null) { val->is_null = false; - *val = StringVal(context, arg1.len); - memcpy(val->ptr, arg1.ptr, arg1.len); + *val = StringVal::CopyFrom(context, arg1.ptr, arg1.len); } else { int new_len = val->len + arg1.len + arg2.len; StringVal new_val(context, new_len); - memcpy(new_val.ptr, val->ptr, val->len); - memcpy(new_val.ptr + val->len, arg2.ptr, arg2.len); - memcpy(new_val.ptr + val->len + arg2.len, arg1.ptr, arg1.len); + if (!new_val.is_null) { + memcpy(new_val.ptr, val->ptr, val->len); + memcpy(new_val.ptr + val->len, arg2.ptr, arg2.len); + memcpy(new_val.ptr + val->len + arg2.len, arg1.ptr, arg1.len); + } *val = new_val; } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test b/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test index c107ee8..d2b6f6a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test +++ b/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-update.test @@ -50,3 +50,12 @@ select ndv(l_partkey), distinctpc(l_suppkey) from tpch.lineitem ---- CATCH failed to allocate ==== +---- QUERY +# IMPALA-5252: Verify HiveUdfCall allocations are checked. +create function replace_string(string) returns string +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.ReplaceStringUdf'; +select replace_string(l_comment) from tpch.lineitem limit 10; +---- CATCH +failed to allocate +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741421de/tests/custom_cluster/test_alloc_fail.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_alloc_fail.py b/tests/custom_cluster/test_alloc_fail.py index 28cdc19..f1ae3dc 100644 --- a/tests/custom_cluster/test_alloc_fail.py +++ b/tests/custom_cluster/test_alloc_fail.py @@ -33,14 +33,14 @@ class TestAllocFail(CustomClusterTestSuite): def test_alloc_fail_init(self, vector): self.run_test_case('QueryTest/alloc-fail-init', vector) + # TODO: Rewrite or remove the non-deterministic test. + @pytest.mark.xfail(run=True, reason="IMPALA-2925: the execution is not deterministic " + "so some tests sometimes don't fail as expected") @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("--stress_free_pool_alloc=3") - def test_alloc_fail_update(self, vector): - # TODO: Rewrite or remove the non-deterministic test. - pytest.xfail("IMPALA-2925: the execution is not deterministic so some " - "tests sometimes don't fail as expected") + def test_alloc_fail_update(self, vector, unique_database): # Note that this test relies on pre-aggregation to exercise the Serialize() path so # query option 'num_nodes' must not be 1. CustomClusterTestSuite.add_test_dimensions() # already filters out vectors with 'num_nodes' != 0 so just assert it here. assert vector.get_value('exec_option')['num_nodes'] == 0 - self.run_test_case('QueryTest/alloc-fail-update', vector) + self.run_test_case('QueryTest/alloc-fail-update', vector, use_db=unique_database)
