IMPALA-4266: Java udf returning string can give incorrect results The memory management of string results was wrong: strings returned from Exprs must live until the next time FreeLocalAllocations() is called. Otherwise the buffer holding the string is freed or reused by the next UDF call. The fix is to copy string values into a buffer with the right lifetime.
Testing: Added a regression test based on Bharath's example that reproduced the bug reliably. Change-Id: I705d271814cb1143f67d8a12f4fd87bab7a8e161 Reviewed-on: http://gerrit.cloudera.org:8080/4941 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/381e7190 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/381e7190 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/381e7190 Branch: refs/heads/hadoop-next Commit: 381e719065e9699fc2757295241594d548db6e1d Parents: 10fa472 Author: Tim Armstrong <[email protected]> Authored: Thu Nov 3 17:36:42 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Tue Nov 8 02:47:11 2016 +0000 ---------------------------------------------------------------------- be/src/exprs/hive-udf-call.cc | 11 ++++++- .../queries/QueryTest/java-udf.test | 23 +++++++++++++++ .../queries/QueryTest/load-java-udfs.test | 6 ++++ .../org/apache/impala/ReplaceStringUdf.java | 31 ++++++++++++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/381e7190/be/src/exprs/hive-udf-call.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc index 7d82431..cd27dab 100644 --- a/be/src/exprs/hive-udf-call.cc +++ b/be/src/exprs/hive-udf-call.cc @@ -322,7 +322,16 @@ DoubleVal HiveUdfCall::GetDoubleVal(ExprContext* ctx, const TupleRow* row) { StringVal HiveUdfCall::GetStringVal(ExprContext* ctx, const TupleRow* row) { DCHECK_EQ(type_.type, TYPE_STRING); - return *reinterpret_cast<StringVal*>(Evaluate(ctx, row)); + StringVal result = *reinterpret_cast<StringVal*>(Evaluate(ctx, row)); + + // Copy the string into a local allocation with the usual lifetime for expr results. + // Needed because the UDF output buffer is owned by the Java UDF executor and may be + // freed or reused by the next call into the Java UDF executor. + FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_); + uint8_t* local_alloc = fn_ctx->impl()->AllocateLocal(result.len); + memcpy(local_alloc, result.ptr, result.len); + result.ptr = local_alloc; + return result; } TimestampVal HiveUdfCall::GetTimestampVal(ExprContext* ctx, const TupleRow* row) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/381e7190/testdata/workloads/functional-query/queries/QueryTest/java-udf.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test index 1002e29..c473fdf 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test +++ b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test @@ -290,3 +290,26 @@ INT 999 999 ==== +---- QUERY +drop table if exists replace_string_input +==== +---- QUERY +create table replace_string_input as +values('toast'), ('scone'), ('stuff'), ('sssss'), ('yes'), ('scone'), ('stuff'); +==== +---- QUERY +# Regression test for IMPALA-4266: memory management bugs with output strings from +# Java UDFS, exposed by using the UDF as a grouping key in an aggregation. +# The UDF replaces "s" with "ss" in the strings. +select distinct udf_test.replace_string(_c0) as es +from replace_string_input +order by 1; +---- TYPES +string +---- RESULTS +'sscone' +'ssssssssss' +'sstuff' +'toasst' +'yess' +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/381e7190/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test b/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test index 3f86a9e..ab0c8bb 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test @@ -29,6 +29,8 @@ drop function if exists udf_test.throws_exception(); drop function if exists java_udfs_test.identity; +drop function if exists udf_test.replace_string(string); + create function udf_test.hive_pi() returns double location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'; @@ -121,4 +123,8 @@ symbol='org.apache.impala.TestUdf'; create function udf_test.throws_exception() returns boolean location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' symbol='org.apache.impala.TestUdfException'; + +create function udf_test.replace_string(string) returns string +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.ReplaceStringUdf'; ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/381e7190/tests/test-hive-udfs/src/main/java/org/apache/impala/ReplaceStringUdf.java ---------------------------------------------------------------------- diff --git a/tests/test-hive-udfs/src/main/java/org/apache/impala/ReplaceStringUdf.java b/tests/test-hive-udfs/src/main/java/org/apache/impala/ReplaceStringUdf.java new file mode 100644 index 0000000..c33d846 --- /dev/null +++ b/tests/test-hive-udfs/src/main/java/org/apache/impala/ReplaceStringUdf.java @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala; + +import java.text.ParseException; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.Text; + +public class ReplaceStringUdf extends UDF { + public Text evaluate(Text para) throws ParseException { + if ((null == para) || ("".equals(para.toString()))) { + return new Text(""); + } + return new Text(para.toString().replace("s", "ss")); + } +}
