IMPALA-7822: handle overflows in repeat() builtin We need to carefully check that the intermediate value fits in an int64_t and the final size fits in an int. If they don't we raise an error and fail the query.
Testing: Added a couple of backend tests to exercise the overflow check code paths. Change-Id: I872ce77bc2cb29116881c27ca2a5216f722cdb2a Reviewed-on: http://gerrit.cloudera.org:8080/11889 Reviewed-by: Thomas Marshall <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1760339a Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1760339a Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1760339a Branch: refs/heads/branch-3.1.0 Commit: 1760339a5fb4375a22c3579e5eeb505335bcf8f6 Parents: ac2b7c9 Author: Tim Armstrong <[email protected]> Authored: Tue Nov 6 13:31:38 2018 -0800 Committer: Zoltan Borok-Nagy <[email protected]> Committed: Tue Nov 13 12:51:40 2018 +0100 ---------------------------------------------------------------------- be/src/exprs/expr-test.cc | 8 ++++++-- be/src/exprs/string-functions-ir.cc | 16 +++++++++++++++- .../queries/QueryTest/large_strings.test | 9 +++++++-- 3 files changed, 28 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/1760339a/be/src/exprs/expr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index dd6f1b3..bab16ca 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -611,8 +611,8 @@ class ExprTest : public testing::Test { Status status = executor_->Exec(stmt, &result_types); status = executor_->FetchResult(&result_row); ASSERT_FALSE(status.ok()); - ASSERT_TRUE(EndsWith(status.msg().msg(), error_string)) << "Actual: " - << status.msg().msg() << endl << "Expected: " << error_string; + ASSERT_TRUE(EndsWith(status.msg().msg(), error_string)) << "Actual: '" + << status.msg().msg() << "'" << endl << "Expected: '" << error_string << "'"; } template <typename T> void TestFixedPointComparisons(bool test_boundaries) { @@ -4082,6 +4082,10 @@ TEST_F(ExprTest, StringFunctions) { TestIsNull("repeat(NULL, 6)", TYPE_STRING); TestIsNull("repeat('ab', NULL)", TYPE_STRING); TestIsNull("repeat(NULL, NULL)", TYPE_STRING); + TestErrorString("repeat('x', 1024 * 1024 * 1024 * 10)", "Number of repeats in " + "repeat() call is larger than allowed limit of 1 GB character data.\n"); + TestErrorString("repeat('xx', 1024 * 1024 * 1024)", "repeat() result is larger than " + "allowed limit of 1 GB character data.\n"); TestValue("ascii('')", TYPE_INT, 0); TestValue("ascii('abcde')", TYPE_INT, 'a'); http://git-wip-us.apache.org/repos/asf/impala/blob/1760339a/be/src/exprs/string-functions-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/string-functions-ir.cc b/be/src/exprs/string-functions-ir.cc index f5e8213..258f2cb 100644 --- a/be/src/exprs/string-functions-ir.cc +++ b/be/src/exprs/string-functions-ir.cc @@ -93,7 +93,21 @@ StringVal StringFunctions::Repeat( FunctionContext* context, const StringVal& str, const BigIntVal& n) { if (str.is_null || n.is_null) return StringVal::null(); if (str.len == 0 || n.val <= 0) return StringVal(); - StringVal result(context, str.len * n.val); + if (n.val > StringVal::MAX_LENGTH) { + context->SetError("Number of repeats in repeat() call is larger than allowed limit " + "of 1 GB character data."); + return StringVal::null(); + } + static_assert(numeric_limits<int64_t>::max() / numeric_limits<int>::max() + >= StringVal::MAX_LENGTH, + "multiplying StringVal::len with positive int fits in int64_t"); + int64_t out_len = str.len * n.val; + if (out_len > StringVal::MAX_LENGTH) { + context->SetError( + "repeat() result is larger than allowed limit of 1 GB character data."); + return StringVal::null(); + } + StringVal result(context, static_cast<int>(out_len)); if (UNLIKELY(result.is_null)) return StringVal::null(); uint8_t* ptr = result.ptr; for (int64_t i = 0; i < n.val; ++i) { http://git-wip-us.apache.org/repos/asf/impala/blob/1760339a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test b/testdata/workloads/functional-query/queries/QueryTest/large_strings.test index 1d930db..0cca71b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test +++ b/testdata/workloads/functional-query/queries/QueryTest/large_strings.test @@ -129,7 +129,7 @@ select length(repeat(s, 10)) from ( select l_comment from tpch.lineitem) t1 ) t2 ---- CATCH -String length larger than allowed limit of 1 GB character data +repeat() result is larger than allowed limit of 1 GB character data ===== ---- QUERY select length(lpad(s, 1073741830, '!')) from ( @@ -225,4 +225,9 @@ select cast(fnv_hash(concat(l_comment, 'e')) as string) as h from tpch_parquet.l INT,INT ---- RESULTS 611468161,611468161 -===== \ No newline at end of file +===== +---- QUERY +select repeat('the quick brown fox', 1024 * 1024 * 100) +---- CATCH +repeat() result is larger than allowed limit of 1 GB character data +=====
