IMPALA-7822: handle overflows in repeat() builtin

We need to carefully check that the intermediate value fits in an
int64_t and the final size fits in an int. If they don't we
raise an error and fail the query.

Testing:
Added a couple of backend tests to exercise the
overflow check code paths.

Change-Id: I872ce77bc2cb29116881c27ca2a5216f722cdb2a
Reviewed-on: http://gerrit.cloudera.org:8080/11889
Reviewed-by: Thomas Marshall <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1760339a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1760339a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1760339a

Branch: refs/heads/branch-3.1.0
Commit: 1760339a5fb4375a22c3579e5eeb505335bcf8f6
Parents: ac2b7c9
Author: Tim Armstrong <[email protected]>
Authored: Tue Nov 6 13:31:38 2018 -0800
Committer: Zoltan Borok-Nagy <[email protected]>
Committed: Tue Nov 13 12:51:40 2018 +0100

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc                           |  8 ++++++--
 be/src/exprs/string-functions-ir.cc                 | 16 +++++++++++++++-
 .../queries/QueryTest/large_strings.test            |  9 +++++++--
 3 files changed, 28 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/1760339a/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index dd6f1b3..bab16ca 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -611,8 +611,8 @@ class ExprTest : public testing::Test {
     Status status = executor_->Exec(stmt, &result_types);
     status = executor_->FetchResult(&result_row);
     ASSERT_FALSE(status.ok());
-    ASSERT_TRUE(EndsWith(status.msg().msg(), error_string)) << "Actual: "
-        << status.msg().msg() << endl << "Expected: " << error_string;
+    ASSERT_TRUE(EndsWith(status.msg().msg(), error_string)) << "Actual: '"
+        << status.msg().msg() << "'" << endl << "Expected: '" << error_string 
<< "'";
   }
 
   template <typename T> void TestFixedPointComparisons(bool test_boundaries) {
@@ -4082,6 +4082,10 @@ TEST_F(ExprTest, StringFunctions) {
   TestIsNull("repeat(NULL, 6)", TYPE_STRING);
   TestIsNull("repeat('ab', NULL)", TYPE_STRING);
   TestIsNull("repeat(NULL, NULL)", TYPE_STRING);
+  TestErrorString("repeat('x', 1024 * 1024 * 1024 * 10)", "Number of repeats 
in "
+      "repeat() call is larger than allowed limit of 1 GB character data.\n");
+  TestErrorString("repeat('xx', 1024 * 1024 * 1024)", "repeat() result is 
larger than "
+      "allowed limit of 1 GB character data.\n");
 
   TestValue("ascii('')", TYPE_INT, 0);
   TestValue("ascii('abcde')", TYPE_INT, 'a');

http://git-wip-us.apache.org/repos/asf/impala/blob/1760339a/be/src/exprs/string-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/string-functions-ir.cc 
b/be/src/exprs/string-functions-ir.cc
index f5e8213..258f2cb 100644
--- a/be/src/exprs/string-functions-ir.cc
+++ b/be/src/exprs/string-functions-ir.cc
@@ -93,7 +93,21 @@ StringVal StringFunctions::Repeat(
     FunctionContext* context, const StringVal& str, const BigIntVal& n) {
   if (str.is_null || n.is_null) return StringVal::null();
   if (str.len == 0 || n.val <= 0) return StringVal();
-  StringVal result(context, str.len * n.val);
+  if (n.val > StringVal::MAX_LENGTH) {
+    context->SetError("Number of repeats in repeat() call is larger than 
allowed limit "
+        "of 1 GB character data.");
+    return StringVal::null();
+  }
+  static_assert(numeric_limits<int64_t>::max() / numeric_limits<int>::max()
+      >= StringVal::MAX_LENGTH,
+      "multiplying StringVal::len with positive int fits in int64_t");
+  int64_t out_len = str.len * n.val;
+  if (out_len > StringVal::MAX_LENGTH) {
+    context->SetError(
+        "repeat() result is larger than allowed limit of 1 GB character 
data.");
+    return StringVal::null();
+  }
+  StringVal result(context, static_cast<int>(out_len));
   if (UNLIKELY(result.is_null)) return StringVal::null();
   uint8_t* ptr = result.ptr;
   for (int64_t i = 0; i < n.val; ++i) {

http://git-wip-us.apache.org/repos/asf/impala/blob/1760339a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test 
b/testdata/workloads/functional-query/queries/QueryTest/large_strings.test
index 1d930db..0cca71b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/large_strings.test
@@ -129,7 +129,7 @@ select length(repeat(s, 10)) from (
     select l_comment from tpch.lineitem) t1
   ) t2
 ---- CATCH
-String length larger than allowed limit of 1 GB character data
+repeat() result is larger than allowed limit of 1 GB character data
 =====
 ---- QUERY
 select length(lpad(s, 1073741830, '!')) from (
@@ -225,4 +225,9 @@ select cast(fnv_hash(concat(l_comment, 'e')) as string) as 
h from tpch_parquet.l
 INT,INT
 ---- RESULTS
 611468161,611468161
-=====
\ No newline at end of file
+=====
+---- QUERY
+select repeat('the quick brown fox', 1024 * 1024 * 100)
+---- CATCH
+repeat() result is larger than allowed limit of 1 GB character data
+=====

Reply via email to