IMPALA-6771: Fix in-predicate set up bug Fixes a bug that introduced default initialized values in the set data structure used to check for set membership that can cause wrong results.
Testing: Added a test case that checks for the same. Change-Id: I7e776dbcb7ee4a9b64e1295134a27d332f5415b6 Reviewed-on: http://gerrit.cloudera.org:8080/9891 Reviewed-by: Sailesh Mukil <sail...@cloudera.com> Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/bd63208b Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/bd63208b Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/bd63208b Branch: refs/heads/2.x Commit: bd63208bfcfcfa893b979e76358cd40f71114979 Parents: 4c6e1db Author: Bikramjeet Vig <bikramjeet....@cloudera.com> Authored: Mon Apr 2 13:55:48 2018 -0700 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Wed Apr 11 22:56:00 2018 +0000 ---------------------------------------------------------------------- be/src/exprs/in-predicate.h | 3 ++- .../functional-query/queries/QueryTest/exprs.test | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/bd63208b/be/src/exprs/in-predicate.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/in-predicate.h b/be/src/exprs/in-predicate.h index 2d439ba..87ae8b8 100644 --- a/be/src/exprs/in-predicate.h +++ b/be/src/exprs/in-predicate.h @@ -350,7 +350,8 @@ void InPredicate::SetLookupPrepare( state->contains_null = false; // Collect all values in a vector to use the bulk insert API to avoid N^2 behavior // with flat_set. - vector<SetType> element_list(ctx->GetNumArgs()); + std::vector<SetType> element_list; + element_list.reserve(ctx->GetNumArgs() - 1); for (int i = 1; i < ctx->GetNumArgs(); ++i) { DCHECK(ctx->IsArgConstant(i)); T* arg = reinterpret_cast<T*>(ctx->GetConstantArg(i)); http://git-wip-us.apache.org/repos/asf/impala/blob/bd63208b/testdata/workloads/functional-query/queries/QueryTest/exprs.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test index 2902c97..1e5a589 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test @@ -2994,3 +2994,15 @@ select cast('2001-1-2' as timestamp) ---- TYPES timestamp ==== +---- QUERY +# IMPALA-6771: Test that the in-predicate set does not have default initialized +# values that can result in wrong results. For a string column the default initialized +# value is an empty string. +select count(*) from functional.alltypes +where regexp_replace(string_col, '1', '') +in ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9') +---- RESULTS +6570 +---- TYPES +bigint +====