IMPALA-5871: KuduPartitionExpr incorrectly handles its child types KuduPartitionExpr takes input rows and sends them to Kudu to determine the partition the rows correspond to in a particular table's partitioning scheme. This is then used to partition and sort rows before sending them to Kudu when performing an INSERT.
If the input types are not the same as (but are compatible with) the types of the columns in the table, we need to cast the input rows. KuduPartitionExpr.analyze() actually already does this, but the casts are dropped for the sort step during the INSERT in most cases. As a result, attempting to insert a string value into a Kudu timestamp column causes a crash. Inserting a numeric value into a different but compatibly typed col (eg. tinyint into an int col) will cause the sort during a Kudu INSERT to operate on garbage values, potentially degrading performance and causing INSERTs to fail due to Kudu timeouts (see IMPALA-3742). Testing: - Added an e2e test in kudu_insert.test Change-Id: I44cf31e46a77f3e7c92cf6b9112653808a001705 Reviewed-on: http://gerrit.cloudera.org:8080/7922 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/64e28021 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/64e28021 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/64e28021 Branch: refs/heads/master Commit: 64e28021957f6993aea5ceb3ad626fb577597107 Parents: 1b70eb6 Author: Thomas Tauber-Marshall <[email protected]> Authored: Thu Aug 31 09:15:22 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Fri Sep 1 21:31:55 2017 +0000 ---------------------------------------------------------------------- be/src/exprs/kudu-partition-expr.cc | 1 + .../impala/analysis/KuduPartitionExpr.java | 4 +-- .../queries/QueryTest/kudu_insert.test | 27 +++++++++++++++----- 3 files changed, 23 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/64e28021/be/src/exprs/kudu-partition-expr.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/kudu-partition-expr.cc b/be/src/exprs/kudu-partition-expr.cc index 6f35268..2cc96f0 100644 --- a/be/src/exprs/kudu-partition-expr.cc +++ b/be/src/exprs/kudu-partition-expr.cc @@ -71,6 +71,7 @@ IntVal KuduPartitionExpr::GetIntVal(ScalarExprEvaluator* eval, int col = tkudu_partition_expr_.referenced_columns[i]; const ColumnDescriptor& col_desc = table_desc_->col_descs()[col]; PrimitiveType type = col_desc.type().type; + DCHECK_EQ(GetChild(i)->type().type, type); Status s = WriteKuduValue(col, type, val, false, row_.get()); // This can only fail if we set a col to an incorect type, which would be a bug in // planning, so we can DCHECK. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/64e28021/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java b/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java index cc42804..8d52d59 100644 --- a/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java +++ b/fe/src/main/java/org/apache/impala/analysis/KuduPartitionExpr.java @@ -73,8 +73,8 @@ public class KuduPartitionExpr extends Expr { // IMPALA-5294: If one of the children is a NullLiteral, it has to be cast to a type // to be passed to the BE. for (int i = 0; i < children_.size(); ++i) { - children_.get(i).castTo( - targetTable_.getColumns().get(partitionColPos_.get(i)).getType()); + children_.set(i, children_.get(i).castTo( + targetTable_.getColumns().get(partitionColPos_.get(i)).getType())); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/64e28021/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test index 76ad779..1150898 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test +++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_insert.test @@ -287,22 +287,35 @@ NumRowErrors: 7299 ---- QUERY # Table with all supported types as primary key and distribution columns create table allkeytypes (i1 tinyint, i2 smallint, i3 int, i4 bigint, name string, - valf float, vald double, primary key (i1, i2, i3, i4, name)) partition by - hash partitions 3, range (partition value = (1,1,1,1,'1'), - partition value = (2,2,2,2,'2'), partition value = (3,3,3,3,'3')) stored as kudu + valt timestamp, valf float, vald double, primary key (i1, i2, i3, i4, name, valt)) + partition by hash partitions 3, range + (partition value = (1,1,1,1,'1','2009-01-01 00:01:00'), + partition value = (2,2,2,2,'2','2009-01-01 00:02:00.100000000'), + partition value = (3,3,3,3,'3','2009-01-01 00:03:00.300000000')) stored as kudu ---- RESULTS ==== ---- QUERY insert into allkeytypes select cast(id as tinyint), smallint_col, int_col, - cast (bigint_col/10 as bigint), string_col, float_col, double_col - from functional.alltypes where id > 0 and id < 10 + cast (bigint_col/10 as bigint), string_col, timestamp_col, float_col, double_col + from functional.alltypes where id > 1 and id < 10 ---- RESULTS -: 3 +: 2 ---- RUNTIME_PROFILE -NumModifiedRows: 3 +NumModifiedRows: 2 NumRowErrors: 6 ==== ---- QUERY +# IMPALA-5871 - test that a cast is correctly added when inserting a string into a Kudu +# timestamp partition column with distributed exec. +set exec_single_node_rows_threshold=0; +insert into allkeytypes values (1,1,1,1,'1','2009-01-01 00:01:00',null,null) +---- RESULTS +: 1 +---- RUNTIME_PROFILE +NumModifiedRows: 1 +NumRowErrors: 0 +==== +---- QUERY # Table with default values create table tbl_with_defaults (a int primary key, b int null default 10, c int not null default 100, d int default 1000, e int null, f int not null,
