IMPALA-4266: Java udf returning string can give incorrect results

The memory management of string results was wrong: strings returned from
Exprs must live until the next time FreeLocalAllocations() is called.
Otherwise the buffer holding the string is freed or reused by the next
UDF call. The fix is to copy string values into a buffer with the
right lifetime.

Testing:
Added a regression test based on Bharath's example that reproduced the
bug reliably.

Change-Id: I705d271814cb1143f67d8a12f4fd87bab7a8e161
Reviewed-on: http://gerrit.cloudera.org:8080/4941
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/381e7190
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/381e7190
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/381e7190

Branch: refs/heads/hadoop-next
Commit: 381e719065e9699fc2757295241594d548db6e1d
Parents: 10fa472
Author: Tim Armstrong <[email protected]>
Authored: Thu Nov 3 17:36:42 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Tue Nov 8 02:47:11 2016 +0000

----------------------------------------------------------------------
 be/src/exprs/hive-udf-call.cc                   | 11 ++++++-
 .../queries/QueryTest/java-udf.test             | 23 +++++++++++++++
 .../queries/QueryTest/load-java-udfs.test       |  6 ++++
 .../org/apache/impala/ReplaceStringUdf.java     | 31 ++++++++++++++++++++
 4 files changed, 70 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/381e7190/be/src/exprs/hive-udf-call.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc
index 7d82431..cd27dab 100644
--- a/be/src/exprs/hive-udf-call.cc
+++ b/be/src/exprs/hive-udf-call.cc
@@ -322,7 +322,16 @@ DoubleVal HiveUdfCall::GetDoubleVal(ExprContext* ctx, 
const TupleRow* row) {
 
 StringVal HiveUdfCall::GetStringVal(ExprContext* ctx, const TupleRow* row) {
   DCHECK_EQ(type_.type, TYPE_STRING);
-  return *reinterpret_cast<StringVal*>(Evaluate(ctx, row));
+  StringVal result = *reinterpret_cast<StringVal*>(Evaluate(ctx, row));
+
+  // Copy the string into a local allocation with the usual lifetime for expr 
results.
+  // Needed because the UDF output buffer is owned by the Java UDF executor 
and may be
+  // freed or reused by the next call into the Java UDF executor.
+  FunctionContext* fn_ctx = ctx->fn_context(fn_context_index_);
+  uint8_t* local_alloc = fn_ctx->impl()->AllocateLocal(result.len);
+  memcpy(local_alloc, result.ptr, result.len);
+  result.ptr = local_alloc;
+  return result;
 }
 
 TimestampVal HiveUdfCall::GetTimestampVal(ExprContext* ctx, const TupleRow* 
row) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/381e7190/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test 
b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
index 1002e29..c473fdf 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test
@@ -290,3 +290,26 @@ INT
 999
 999
 ====
+---- QUERY
+drop table if exists replace_string_input
+====
+---- QUERY
+create table replace_string_input as
+values('toast'), ('scone'), ('stuff'), ('sssss'), ('yes'), ('scone'), 
('stuff');
+====
+---- QUERY
+# Regression test for IMPALA-4266: memory management bugs with output strings 
from
+# Java UDFS, exposed by using the UDF as a grouping key in an aggregation.
+# The UDF replaces "s" with "ss" in the strings.
+select distinct udf_test.replace_string(_c0) as es
+from replace_string_input
+order by 1;
+---- TYPES
+string
+---- RESULTS
+'sscone'
+'ssssssssss'
+'sstuff'
+'toasst'
+'yess'
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/381e7190/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test 
b/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test
index 3f86a9e..ab0c8bb 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/load-java-udfs.test
@@ -29,6 +29,8 @@ drop function if exists udf_test.throws_exception();
 
 drop function if exists java_udfs_test.identity;
 
+drop function if exists udf_test.replace_string(string);
+
 create function udf_test.hive_pi() returns double
 location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar'
 symbol='org.apache.hadoop.hive.ql.udf.UDFPI';
@@ -121,4 +123,8 @@ symbol='org.apache.impala.TestUdf';
 create function udf_test.throws_exception() returns boolean
 location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
 symbol='org.apache.impala.TestUdfException';
+
+create function udf_test.replace_string(string) returns string
+location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar'
+symbol='org.apache.impala.ReplaceStringUdf';
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/381e7190/tests/test-hive-udfs/src/main/java/org/apache/impala/ReplaceStringUdf.java
----------------------------------------------------------------------
diff --git 
a/tests/test-hive-udfs/src/main/java/org/apache/impala/ReplaceStringUdf.java 
b/tests/test-hive-udfs/src/main/java/org/apache/impala/ReplaceStringUdf.java
new file mode 100644
index 0000000..c33d846
--- /dev/null
+++ b/tests/test-hive-udfs/src/main/java/org/apache/impala/ReplaceStringUdf.java
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala;
+
+import java.text.ParseException;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.Text;
+
+public class ReplaceStringUdf extends UDF {
+  public Text evaluate(Text para) throws ParseException {
+    if ((null == para) || ("".equals(para.toString()))) {
+      return new Text("");
+    }
+    return new Text(para.toString().replace("s", "ss"));
+  }
+}

Reply via email to