This is an automated email from the ASF dual-hosted git repository. dkuzmenko pushed a commit to branch branch-4.1 in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/branch-4.1 by this push: new 8583b373e70 HIVE-29080: Fix Incorrect results for queries on Decimal column Partitioned table with Vectorization enabled (#5965) 8583b373e70 is described below commit 8583b373e7026ae284cab92e0cd4472536a38ab9 Author: Butao Zhang <zhangbu...@apache.org> AuthorDate: Sun Jul 13 02:54:27 2025 +0800 HIVE-29080: Fix Incorrect results for queries on Decimal column Partitioned table with Vectorization enabled (#5965) Co-authored-by: Indhumathi <in...@visa.com> --- .../hive/ql/exec/vector/VectorizedRowBatchCtx.java | 2 +- .../clientpositive/vector_decimal_partition.q | 15 ++++- .../llap/vector_decimal_partition.q.out | 76 ++++++++++++++++++++++ .../hive/ql/exec/vector/Decimal64ColumnVector.java | 7 ++ 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 0abbd59b9e9..a0906cfb033 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -576,7 +576,7 @@ public void addPartitionColsToBatch(ColumnVector col, Object value, int colIndex dv.isNull[0] = true; dv.isRepeating = true; } else { - dv.fill(((HiveDecimal) value).longValue()); + dv.fill((HiveDecimal) value); } } else { DecimalColumnVector dv = (DecimalColumnVector) col; diff --git a/ql/src/test/queries/clientpositive/vector_decimal_partition.q b/ql/src/test/queries/clientpositive/vector_decimal_partition.q index dc8e667a47c..2cb4f4e15fd 100644 --- a/ql/src/test/queries/clientpositive/vector_decimal_partition.q +++ b/ql/src/test/queries/clientpositive/vector_decimal_partition.q @@ -13,4 +13,17 @@ INSERT INTO decimal_part PARTITION (nr_bank = 88) VALUES (1, 'test'); INSERT INTO decimal_part PARTITION (nr_bank = 8801) VALUES (1, '8801'); EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), nr_bank FROM decimal_part GROUP BY nr_bank; -SELECT count(*), nr_bank FROM decimal_part GROUP BY nr_bank; \ No newline at end of file +SELECT count(*), nr_bank FROM decimal_part GROUP BY nr_bank; + +DROP TABLE IF EXISTS decimal_part1; + +CREATE EXTERNAL TABLE decimal_part1 (quantity INT) PARTITIONED BY (sales_percent DECIMAL(10,2)) STORED AS ORC; +INSERT INTO decimal_part1 VALUES (1, 24518.01); +INSERT INTO decimal_part1 VALUES (2, 24518.02); + +set hive.auto.convert.join=true; + +SELECT count(*), sales_percent FROM decimal_part1 GROUP BY sales_percent; +SELECT d1.quantity,d1.sales_percent FROM decimal_part1 d1 JOIN decimal_part1 d2 ON d1.sales_percent=d2.sales_percent; +SET hive.vectorized.execution.enabled=false; +SELECT d1.quantity,d1.sales_percent FROM decimal_part1 d1 JOIN decimal_part1 d2 ON d1.sales_percent=d2.sales_percent; diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_partition.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_partition.q.out index 83e7d2a0502..7d1651b1077 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_partition.q.out @@ -178,3 +178,79 @@ POSTHOOK: Input: default@decimal_part@nr_bank=8801 #### A masked pattern was here #### 1 88 1 8801 +PREHOOK: query: DROP TABLE IF EXISTS decimal_part1 +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: DROP TABLE IF EXISTS decimal_part1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: CREATE EXTERNAL TABLE decimal_part1 (quantity INT) PARTITIONED BY (sales_percent DECIMAL(10,2)) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_part1 +POSTHOOK: query: CREATE EXTERNAL TABLE decimal_part1 (quantity INT) PARTITIONED BY (sales_percent DECIMAL(10,2)) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_part1 +PREHOOK: query: INSERT INTO decimal_part1 VALUES (1, 24518.01) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_part1 +POSTHOOK: query: INSERT INTO decimal_part1 VALUES (1, 24518.01) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_part1 +POSTHOOK: Output: default@decimal_part1@sales_percent=24518.01 +POSTHOOK: Lineage: decimal_part1 PARTITION(sales_percent=24518.01).quantity SCRIPT [] +PREHOOK: query: INSERT INTO decimal_part1 VALUES (2, 24518.02) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@decimal_part1 +POSTHOOK: query: INSERT INTO decimal_part1 VALUES (2, 24518.02) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@decimal_part1 +POSTHOOK: Output: default@decimal_part1@sales_percent=24518.02 +POSTHOOK: Lineage: decimal_part1 PARTITION(sales_percent=24518.02).quantity SCRIPT [] +PREHOOK: query: SELECT count(*), sales_percent FROM decimal_part1 GROUP BY sales_percent +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_part1 +PREHOOK: Input: default@decimal_part1@sales_percent=24518.01 +PREHOOK: Input: default@decimal_part1@sales_percent=24518.02 +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*), sales_percent FROM decimal_part1 GROUP BY sales_percent +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_part1 +POSTHOOK: Input: default@decimal_part1@sales_percent=24518.01 +POSTHOOK: Input: default@decimal_part1@sales_percent=24518.02 +#### A masked pattern was here #### +1 24518.02 +1 24518.01 +PREHOOK: query: SELECT d1.quantity,d1.sales_percent FROM decimal_part1 d1 JOIN decimal_part1 d2 ON d1.sales_percent=d2.sales_percent +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_part1 +PREHOOK: Input: default@decimal_part1@sales_percent=24518.01 +PREHOOK: Input: default@decimal_part1@sales_percent=24518.02 +#### A masked pattern was here #### +POSTHOOK: query: SELECT d1.quantity,d1.sales_percent FROM decimal_part1 d1 JOIN decimal_part1 d2 ON d1.sales_percent=d2.sales_percent +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_part1 +POSTHOOK: Input: default@decimal_part1@sales_percent=24518.01 +POSTHOOK: Input: default@decimal_part1@sales_percent=24518.02 +#### A masked pattern was here #### +1 24518.01 +2 24518.02 +PREHOOK: query: SELECT d1.quantity,d1.sales_percent FROM decimal_part1 d1 JOIN decimal_part1 d2 ON d1.sales_percent=d2.sales_percent +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_part1 +PREHOOK: Input: default@decimal_part1@sales_percent=24518.01 +PREHOOK: Input: default@decimal_part1@sales_percent=24518.02 +#### A masked pattern was here #### +POSTHOOK: query: SELECT d1.quantity,d1.sales_percent FROM decimal_part1 d1 JOIN decimal_part1 d2 ON d1.sales_percent=d2.sales_percent +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_part1 +POSTHOOK: Input: default@decimal_part1@sales_percent=24518.01 +POSTHOOK: Input: default@decimal_part1@sales_percent=24518.02 +#### A masked pattern was here #### +1 24518.01 +2 24518.02 diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java index a06ce39b46c..3753d337d8b 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java @@ -41,6 +41,13 @@ public Decimal64ColumnVector(int size, int precision, int scale) { scratchHiveDecWritable = new HiveDecimalWritable(); } + // Fill the vector entries with provided value + public void fill(HiveDecimal value) { + isRepeating = true; + isNull[0] = false; + set(0, value); + } + /** * Set a Decimal64 field from a HiveDecimalWritable. *