This is an automated email from the ASF dual-hosted git repository.
soumyakanti3578 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 9e316c3203a HIVE-29598: Fix vectorized outer join wrong results due to
stale scratch column values (#6486)
9e316c3203a is described below
commit 9e316c3203acc5631082b0bb3c976385164d431b
Author: Ryu Kobayashi <[email protected]>
AuthorDate: Fri May 22 04:37:21 2026 +0900
HIVE-29598: Fix vectorized outer join wrong results due to stale scratch
column values (#6486)
Co-authored-by: konstantinb <[email protected]>
---
.../VectorMapJoinOuterGenerateResultOperator.java | 14 +-
...stVectorMapJoinOuterGenerateResultOperator.java | 361 +++++++++++++++++++++
.../queries/clientpositive/vector_outer_join7.q | 43 +++
.../clientpositive/llap/vector_outer_join7.q.out | 84 +++++
.../hive/ql/exec/vector/BytesColumnVector.java | 7 +
.../hadoop/hive/ql/exec/vector/ColumnVector.java | 26 ++
.../hive/ql/exec/vector/DecimalColumnVector.java | 5 +
.../hive/ql/exec/vector/DoubleColumnVector.java | 5 +
.../exec/vector/IntervalDayTimeColumnVector.java | 5 +
.../hive/ql/exec/vector/LongColumnVector.java | 5 +
.../hive/ql/exec/vector/TimestampColumnVector.java | 5 +
.../hive/ql/exec/vector/TestBytesColumnVector.java | 29 ++
.../ql/exec/vector/TestDecimalColumnVector.java | 44 +++
.../ql/exec/vector/TestDoubleColumnVector.java | 46 +++
.../vector/TestIntervalDayTimeColumnVector.java | 43 +++
.../hive/ql/exec/vector/TestLongColumnVector.java | 46 +++
.../ql/exec/vector/TestTimestampColumnVector.java | 16 +
17 files changed, 774 insertions(+), 10 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
index e83b178e4dc..903517c1077 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
@@ -587,16 +587,12 @@ protected void generateOuterNulls(VectorizedRowBatch
batch, int[] noMatchs,
// key as null, too.
//
for (int column : outerSmallTableKeyColumnMap) {
- ColumnVector colVector = batch.cols[column];
- colVector.noNulls = false;
- colVector.isNull[batchIndex] = true;
+ batch.cols[column].clearValue(batchIndex);
}
// Small table values are set to null.
for (int column : smallTableValueColumnMap) {
- ColumnVector colVector = batch.cols[column];
- colVector.noNulls = false;
- colVector.isNull[batchIndex] = true;
+ batch.cols[column].clearValue(batchIndex);
}
}
}
@@ -746,15 +742,13 @@ protected void
generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws Hi
//
for (int column : outerSmallTableKeyColumnMap) {
ColumnVector colVector = batch.cols[column];
- colVector.noNulls = false;
- colVector.isNull[0] = true;
+ colVector.clearValue(0);
colVector.isRepeating = true;
}
for (int column : smallTableValueColumnMap) {
ColumnVector colVector = batch.cols[column];
- colVector.noNulls = false;
- colVector.isNull[0] = true;
+ colVector.clearValue(0);
colVector.isRepeating = true;
}
}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java
new file mode 100644
index 00000000000..35553d9cb44
--- /dev/null
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java
@@ -0,0 +1,361 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VoidColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * HIVE-29598: verifies {@link VectorMapJoinOuterGenerateResultOperator} clears
+ * every small-table slot for unmatched rows, so stale values cannot carry over
+ * past the null marking.
+ */
+class TestVectorMapJoinOuterGenerateResultOperator {
+
+ /** Concrete subclass that exposes the generateOuterNulls* methods to tests.
*/
+ private static final class TestableOuterOp extends
VectorMapJoinOuterGenerateResultOperator {
+ @Override
+ protected String getLoggingPrefix() {
+ throw new UnsupportedOperationException("stubbed only to instantiate
abstract class under test");
+ }
+
+ @Override
+ public void processBatch(VectorizedRowBatch batch) {
+ throw new UnsupportedOperationException("stubbed only to instantiate
abstract class under test");
+ }
+ }
+
+ /**
+ * Records {@code clearSlotValue} invocations to verify the operator
dispatches
+ * through {@code clearValue}, not just produces the slot-clearing side
effect.
+ */
+ private static class TrackingLongColumnVector extends LongColumnVector {
+ final List<Integer> clearedIndices = new ArrayList<>();
+
+ TrackingLongColumnVector(int size) {
+ super(size);
+ }
+
+ @Override
+ protected void clearSlotValue(int elementNum) {
+ super.clearSlotValue(elementNum);
+ clearedIndices.add(elementNum);
+ }
+ }
+
+ @Test
+ void
generateOuterNullsCallsClearValueOnEachMappedColumnForEachUnmatchedRow() throws
HiveException, IOException {
+ TestableOuterOp op = new TestableOuterOp();
+ op.outerSmallTableKeyColumnMap = new int[] {0};
+ op.smallTableValueColumnMap = new int[] {1, 2};
+
+ VectorizedRowBatch batch = new VectorizedRowBatch(3, 4);
+ TrackingLongColumnVector keyCol = new TrackingLongColumnVector(4);
+ TrackingLongColumnVector valCol1 = new TrackingLongColumnVector(4);
+ TrackingLongColumnVector valCol2 = new TrackingLongColumnVector(4);
+ keyCol.vector[1] = 99L;
+ valCol1.vector[1] = 88L;
+ valCol2.vector[3] = 77L;
+ batch.cols[0] = keyCol;
+ batch.cols[1] = valCol1;
+ batch.cols[2] = valCol2;
+
+ int[] noMatchs = new int[] {1, 3};
+ op.generateOuterNulls(batch, noMatchs, noMatchs.length);
+
+ assertEquals(Arrays.asList(1, 3), keyCol.clearedIndices);
+ assertEquals(Arrays.asList(1, 3), valCol1.clearedIndices);
+ assertEquals(Arrays.asList(1, 3), valCol2.clearedIndices);
+
+ assertFalse(keyCol.noNulls);
+ assertTrue(keyCol.isNull[1]);
+ assertTrue(keyCol.isNull[3]);
+ assertFalse(keyCol.isNull[0]);
+ assertFalse(keyCol.isNull[2]);
+
+ assertEquals(0L, keyCol.vector[1]);
+ assertEquals(0L, valCol1.vector[1]);
+ assertEquals(0L, valCol2.vector[3]);
+ }
+
+ @Test
+ void
generateOuterNullsRepeatedAllCallsClearValueAtIndexZeroForEachMappedColumn()
throws HiveException {
+ TestableOuterOp op = new TestableOuterOp();
+ op.outerSmallTableKeyColumnMap = new int[] {0};
+ op.smallTableValueColumnMap = new int[] {1};
+
+ VectorizedRowBatch batch = new VectorizedRowBatch(2, 4);
+ TrackingLongColumnVector keyCol = new TrackingLongColumnVector(4);
+ TrackingLongColumnVector valCol = new TrackingLongColumnVector(4);
+ keyCol.vector[0] = 42L;
+ valCol.vector[0] = 84L;
+ batch.cols[0] = keyCol;
+ batch.cols[1] = valCol;
+
+ op.generateOuterNullsRepeatedAll(batch);
+
+ assertEquals(Arrays.asList(0), keyCol.clearedIndices);
+ assertEquals(Arrays.asList(0), valCol.clearedIndices);
+
+ // isRepeating is set by the operator, not by clearValue.
+ assertFalse(keyCol.noNulls);
+ assertTrue(keyCol.isNull[0]);
+ assertTrue(keyCol.isRepeating);
+ assertFalse(valCol.noNulls);
+ assertTrue(valCol.isNull[0]);
+ assertTrue(valCol.isRepeating);
+
+ assertEquals(0L, keyCol.vector[0]);
+ assertEquals(0L, valCol.vector[0]);
+ }
+
+ @Test
+ void generateOuterNullsSetsBookkeepingOnTypeWithNoClearSlotValueOverride()
throws HiveException, IOException {
+ // VoidColumnVector inherits the base no-op clearSlotValue — verifies the
+ // operator still drives the null-marking through clearValue() on a type
+ // without a per-slot value to zero.
+ TestableOuterOp op = new TestableOuterOp();
+ op.outerSmallTableKeyColumnMap = new int[] {};
+ op.smallTableValueColumnMap = new int[] {0};
+
+ VectorizedRowBatch batch = new VectorizedRowBatch(1, 4);
+ VoidColumnVector voidCol = new VoidColumnVector(4);
+ batch.cols[0] = voidCol;
+
+ int[] noMatchs = new int[] {1, 3};
+ op.generateOuterNulls(batch, noMatchs, noMatchs.length);
+
+ assertFalse(voidCol.noNulls);
+ assertTrue(voidCol.isNull[1]);
+ assertTrue(voidCol.isNull[3]);
+ assertFalse(voidCol.isNull[0]);
+ assertFalse(voidCol.isNull[2]);
+ }
+
+ /**
+ * For each {@link ColumnVector} subclass whose {@code clearSlotValue} is
+ * overridden, verifies the operator's call through {@code clearValue}
reaches
+ * the override and clears the slot to the type's cleared state.
+ */
+ @ParameterizedTest(name = "{0}")
+ @MethodSource("modifiedColumnVectorTypes")
+ void generateOuterNullsClearsSlotForEachModifiedType(
+ String typeName,
+ ColumnVector cv,
+ Runnable preLoad,
+ Runnable assertSlotCleared) throws HiveException, IOException {
+
+ TestableOuterOp op = new TestableOuterOp();
+ op.outerSmallTableKeyColumnMap = new int[] {};
+ op.smallTableValueColumnMap = new int[] {0};
+
+ VectorizedRowBatch batch = new VectorizedRowBatch(1, 4);
+ preLoad.run();
+ batch.cols[0] = cv;
+
+ int[] noMatchs = new int[] {2};
+ op.generateOuterNulls(batch, noMatchs, noMatchs.length);
+
+ assertTrue(cv.isNull[2]);
+ assertFalse(cv.noNulls);
+ assertSlotCleared.run();
+ }
+
+ @ParameterizedTest(name = "{0}")
+ @MethodSource("modifiedColumnVectorTypesAtSlotZero")
+ void generateOuterNullsRepeatedAllClearsSlotForEachModifiedType(
+ String typeName,
+ ColumnVector cv,
+ Runnable preLoad,
+ Runnable assertSlotCleared) throws HiveException {
+
+ TestableOuterOp op = new TestableOuterOp();
+ op.outerSmallTableKeyColumnMap = new int[] {};
+ op.smallTableValueColumnMap = new int[] {0};
+
+ VectorizedRowBatch batch = new VectorizedRowBatch(1, 4);
+ preLoad.run();
+ batch.cols[0] = cv;
+
+ op.generateOuterNullsRepeatedAll(batch);
+
+ assertTrue(cv.isNull[0]);
+ assertFalse(cv.noNulls);
+ assertTrue(cv.isRepeating);
+ assertSlotCleared.run();
+ }
+
+ static Stream<Arguments> modifiedColumnVectorTypesAtSlotZero() {
+ final LongColumnVector longCv = new LongColumnVector(4);
+ final DoubleColumnVector doubleCv = new DoubleColumnVector(4);
+ final BytesColumnVector bytesCv = new BytesColumnVector(4);
+ final DecimalColumnVector decCv = new DecimalColumnVector(4, 18, 4);
+ final Decimal64ColumnVector dec64Cv = new Decimal64ColumnVector(4, 18, 4);
+ final TimestampColumnVector tsCv = new TimestampColumnVector(4);
+ final IntervalDayTimeColumnVector ivCv = new
IntervalDayTimeColumnVector(4);
+
+ return Stream.of(
+ Arguments.of(
+ "LongColumnVector",
+ longCv,
+ (Runnable) () -> longCv.vector[0] = 999L,
+ (Runnable) () -> assertEquals(0L, longCv.vector[0])),
+ Arguments.of(
+ "DoubleColumnVector",
+ doubleCv,
+ (Runnable) () -> doubleCv.vector[0] = 3.14,
+ (Runnable) () -> assertEquals(0.0, doubleCv.vector[0])),
+ Arguments.of(
+ "BytesColumnVector",
+ bytesCv,
+ (Runnable) () -> {
+ bytesCv.vector[0] = "stale".getBytes(StandardCharsets.UTF_8);
+ bytesCv.start[0] = 1;
+ bytesCv.length[0] = 3;
+ },
+ (Runnable) () -> {
+ assertNull(bytesCv.vector[0]);
+ assertEquals(0, bytesCv.start[0]);
+ assertEquals(0, bytesCv.length[0]);
+ }),
+ Arguments.of(
+ "DecimalColumnVector",
+ decCv,
+ (Runnable) () -> decCv.vector[0].setFromLong(999L),
+ (Runnable) () -> assertEquals(0L,
decCv.vector[0].serialize64(decCv.scale))),
+ Arguments.of(
+ "Decimal64ColumnVector",
+ dec64Cv,
+ (Runnable) () -> dec64Cv.vector[0] = 999L,
+ (Runnable) () -> assertEquals(0L, dec64Cv.vector[0])),
+ Arguments.of(
+ "TimestampColumnVector",
+ tsCv,
+ (Runnable) () -> {
+ tsCv.time[0] = 1234567890000L;
+ tsCv.nanos[0] = 999;
+ },
+ (Runnable) () -> {
+ assertEquals(0L, tsCv.time[0]);
+ assertEquals(1, tsCv.nanos[0]);
+ }),
+ Arguments.of(
+ "IntervalDayTimeColumnVector",
+ ivCv,
+ (Runnable) () -> ivCv.set(0, new HiveIntervalDayTime(5, 0)),
+ (Runnable) () -> {
+ assertEquals(0L, ivCv.getTotalSeconds(0));
+ assertEquals(1, ivCv.getNanos(0));
+ })
+ );
+ }
+
+ static Stream<Arguments> modifiedColumnVectorTypes() {
+ final LongColumnVector longCv = new LongColumnVector(4);
+ final DoubleColumnVector doubleCv = new DoubleColumnVector(4);
+ final BytesColumnVector bytesCv = new BytesColumnVector(4);
+ final DecimalColumnVector decCv = new DecimalColumnVector(4, 18, 4);
+ final Decimal64ColumnVector dec64Cv = new Decimal64ColumnVector(4, 18, 4);
+ final TimestampColumnVector tsCv = new TimestampColumnVector(4);
+ final IntervalDayTimeColumnVector ivCv = new
IntervalDayTimeColumnVector(4);
+
+ return Stream.of(
+ Arguments.of(
+ "LongColumnVector",
+ longCv,
+ (Runnable) () -> longCv.vector[2] = 999L,
+ (Runnable) () -> assertEquals(0L, longCv.vector[2])),
+ Arguments.of(
+ "DoubleColumnVector",
+ doubleCv,
+ (Runnable) () -> doubleCv.vector[2] = 3.14,
+ (Runnable) () -> assertEquals(0.0, doubleCv.vector[2])),
+ Arguments.of(
+ "BytesColumnVector",
+ bytesCv,
+ (Runnable) () -> {
+ bytesCv.vector[2] = "stale".getBytes(StandardCharsets.UTF_8);
+ bytesCv.start[2] = 1;
+ bytesCv.length[2] = 3;
+ },
+ (Runnable) () -> {
+ assertNull(bytesCv.vector[2]);
+ assertEquals(0, bytesCv.start[2]);
+ assertEquals(0, bytesCv.length[2]);
+ }),
+ Arguments.of(
+ "DecimalColumnVector",
+ decCv,
+ (Runnable) () -> decCv.vector[2].setFromLong(999L),
+ (Runnable) () -> assertEquals(0L,
decCv.vector[2].serialize64(decCv.scale))),
+ Arguments.of(
+ "Decimal64ColumnVector",
+ dec64Cv,
+ (Runnable) () -> dec64Cv.vector[2] = 999L,
+ (Runnable) () -> assertEquals(0L, dec64Cv.vector[2])),
+ Arguments.of(
+ "TimestampColumnVector",
+ tsCv,
+ (Runnable) () -> {
+ tsCv.time[2] = 1234567890000L;
+ tsCv.nanos[2] = 999;
+ },
+ (Runnable) () -> {
+ // setNullValue convention: time = 0, nanos = 1
+ assertEquals(0L, tsCv.time[2]);
+ assertEquals(1, tsCv.nanos[2]);
+ }),
+ Arguments.of(
+ "IntervalDayTimeColumnVector",
+ ivCv,
+ (Runnable) () -> ivCv.set(2, new HiveIntervalDayTime(5, 0)),
+ (Runnable) () -> {
+ // setNullValue convention: totalSeconds = 0, nanos = 1
+ assertEquals(0L, ivCv.getTotalSeconds(2));
+ assertEquals(1, ivCv.getNanos(2));
+ })
+ );
+ }
+}
diff --git a/ql/src/test/queries/clientpositive/vector_outer_join7.q
b/ql/src/test/queries/clientpositive/vector_outer_join7.q
new file mode 100644
index 00000000000..141d8c3c68b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_outer_join7.q
@@ -0,0 +1,43 @@
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+
+-- SORT_QUERY_RESULTS
+
+-- HIVE-29598: regression test for stale scratch-slot values in vectorized
+-- outer-join MapJoin. MAX() acts as an aggregation barrier so Calcite cannot
+-- inline the inner expression and simplify the bug surface away.
+
+CREATE TABLE t (k STRING, v STRING) STORED AS ORC;
+
+INSERT INTO t VALUES
+ ('A','1'),('A','2'),('A','3'),
+ ('B','2'),('B','3'),
+ ('C','3'),
+ ('D','1'),('D','3');
+
+WITH
+ probe AS (
+ SELECT k, v, (CAST(v AS INT) > 0) AS p_bool
+ FROM t WHERE CAST(v AS INT) >= 3
+ ),
+ small_side AS (
+ SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool
+ FROM t
+ ),
+ classified AS (
+ SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value
+ FROM probe p
+ LEFT JOIN small_side s
+ ON p.k = s.k
+ AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT)
+ ),
+ diagnosed AS (
+ SELECT k, v, MAX(observed_value) AS observed_value
+ FROM classified
+ GROUP BY k, v
+ )
+SELECT k, v,
+ observed_value AS observed_value_returned_by_select,
+ 1 AS required_value_per_sql_semantics
+FROM diagnosed
+WHERE observed_value = 0;
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out
b/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out
new file mode 100644
index 00000000000..df755cfe473
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out
@@ -0,0 +1,84 @@
+PREHOOK: query: CREATE TABLE t (k STRING, v STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: CREATE TABLE t (k STRING, v STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: INSERT INTO t VALUES
+ ('A','1'),('A','2'),('A','3'),
+ ('B','2'),('B','3'),
+ ('C','3'),
+ ('D','1'),('D','3')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: INSERT INTO t VALUES
+ ('A','1'),('A','2'),('A','3'),
+ ('B','2'),('B','3'),
+ ('C','3'),
+ ('D','1'),('D','3')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.k SCRIPT []
+POSTHOOK: Lineage: t.v SCRIPT []
+PREHOOK: query: WITH
+ probe AS (
+ SELECT k, v, (CAST(v AS INT) > 0) AS p_bool
+ FROM t WHERE CAST(v AS INT) >= 3
+ ),
+ small_side AS (
+ SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool
+ FROM t
+ ),
+ classified AS (
+ SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value
+ FROM probe p
+ LEFT JOIN small_side s
+ ON p.k = s.k
+ AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT)
+ ),
+ diagnosed AS (
+ SELECT k, v, MAX(observed_value) AS observed_value
+ FROM classified
+ GROUP BY k, v
+ )
+SELECT k, v,
+ observed_value AS observed_value_returned_by_select,
+ 1 AS required_value_per_sql_semantics
+FROM diagnosed
+WHERE observed_value = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: WITH
+ probe AS (
+ SELECT k, v, (CAST(v AS INT) > 0) AS p_bool
+ FROM t WHERE CAST(v AS INT) >= 3
+ ),
+ small_side AS (
+ SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool
+ FROM t
+ ),
+ classified AS (
+ SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value
+ FROM probe p
+ LEFT JOIN small_side s
+ ON p.k = s.k
+ AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT)
+ ),
+ diagnosed AS (
+ SELECT k, v, MAX(observed_value) AS observed_value
+ FROM classified
+ GROUP BY k, v
+ )
+SELECT k, v,
+ observed_value AS observed_value_returned_by_select,
+ 1 AS required_value_per_sql_semantics
+FROM diagnosed
+WHERE observed_value = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
index ec98d2ab5b8..cce0cb9ad94 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -504,6 +504,13 @@ public void setElement(int outputElementNum, int
inputElementNum, ColumnVector i
}
}
+ @Override
+ protected void clearSlotValue(int elementNum) {
+ vector[elementNum] = null;
+ start[elementNum] = 0;
+ length[elementNum] = 0;
+ }
+
@Override
public void init() {
initBuffer(0);
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
index 9f611dfd313..ee5e3f3885e 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -231,6 +231,32 @@ public abstract void setElement(int outputElementNum, int
inputElementNum,
public abstract void copySelected(
boolean selectedInUse, int[] sel, int size, ColumnVector
outputColVector);
+ /**
+ * Mark the slot null and put its underlying value into a defined cleared
+ * state. Sets {@code isNull[elementNum] = true} and {@code noNulls = false},
+ * then dispatches to {@link #clearSlotValue(int)} for per-type clearing.
+ *
+ * <p>Defends against consumers that read {@code vector[i]} without first
+ * checking {@code isNull[i]}. Distinct from per-type {@code NULL_VALUE}
+ * sentinels (e.g. {@link LongColumnVector#NULL_VALUE}), which assume the
+ * isNull[]-first contract. Final by design — subclasses customize behavior
+ * by overriding {@link #clearSlotValue(int)}, never this method.
+ */
+ public final void clearValue(int elementNum) {
+ noNulls = false;
+ isNull[elementNum] = true;
+ clearSlotValue(elementNum);
+ }
+
+ /**
+ * Per-type slot-clearing hook invoked by {@link #clearValue(int)}.
+ * Subclasses override to zero out their value array at {@code elementNum}.
+ * Container and void types inherit the no-op default.
+ */
+ protected void clearSlotValue(int elementNum) {
+ // Default no-op.
+ }
+
/**
* Initialize the column vector. This method can be overridden by specific
column vector types.
* Use this method only if the individual type of the column vector is not
known, otherwise its
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
index 5defd27623b..e0cdd76de15 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
@@ -144,6 +144,11 @@ public void setElement(int outputElementNum, int
inputElementNum, ColumnVector i
}
}
+ @Override
+ protected void clearSlotValue(int elementNum) {
+ vector[elementNum].setFromLong(0L);
+ }
+
@Override
public void stringifyValue(StringBuilder buffer, int row) {
if (isRepeating) {
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
index f833bde03f6..fcf297585c6 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
@@ -220,6 +220,11 @@ public void setElement(int outputElementNum, int
inputElementNum, ColumnVector i
}
}
+ @Override
+ protected void clearSlotValue(int elementNum) {
+ vector[elementNum] = 0.0;
+ }
+
@Override
public void stringifyValue(StringBuilder buffer, int row) {
if (isRepeating) {
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
index 9324bc0c610..2b61b09e38c 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
@@ -311,6 +311,11 @@ public void setNullValue(int elementNum) {
nanos[elementNum] = 1;
}
+ @Override
+ protected void clearSlotValue(int elementNum) {
+ setNullValue(elementNum);
+ }
+
// Copy the current object contents into the output. Only copy selected
entries,
// as indicated by selectedInUse and the sel array.
@Override
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
index bf423674b2a..dc727b462a7 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
@@ -294,6 +294,11 @@ public void setElement(int outputElementNum, int
inputElementNum, ColumnVector i
}
}
+ @Override
+ protected void clearSlotValue(int elementNum) {
+ vector[elementNum] = 0L;
+ }
+
@Override
public void stringifyValue(StringBuilder buffer, int row) {
if (isRepeating) {
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
index f97156c4038..c49149cdb28 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
@@ -378,6 +378,11 @@ public void setNullValue(int elementNum) {
nanos[elementNum] = 1;
}
+ @Override
+ protected void clearSlotValue(int elementNum) {
+ setNullValue(elementNum);
+ }
+
// Copy the current object contents into the output. Only copy selected
entries,
// as indicated by selectedInUse and the sel array.
@Override
diff --git
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
index be4ff70935a..dc9c045727d 100644
---
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
+++
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
@@ -25,7 +25,9 @@
import org.junit.Test;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
@@ -220,4 +222,31 @@ private static byte[] writeToBytesColumnVector(int rowIdx,
BytesColumnVector col
col.setValPreallocated(rowIdx, writeSize);
return bytes;
}
+
+ @Test
+ public void testClearValue() {
+ BytesColumnVector cv = new BytesColumnVector(4);
+ byte[] data = "hello".getBytes(StandardCharsets.UTF_8);
+ cv.vector[0] = data;
+ cv.start[0] = 1;
+ cv.length[0] = 3;
+
+ byte[] neighborData = "world".getBytes(StandardCharsets.UTF_8);
+ cv.vector[1] = neighborData;
+ cv.start[1] = 2;
+ cv.length[1] = 4;
+
+ cv.clearValue(0);
+
+ assertTrue(cv.isNull[0]);
+ assertFalse(cv.noNulls);
+ assertNull(cv.vector[0]);
+ assertEquals(0, cv.start[0]);
+ assertEquals(0, cv.length[0]);
+
+ assertSame(neighborData, cv.vector[1]);
+ assertEquals(2, cv.start[1]);
+ assertEquals(4, cv.length[1]);
+ assertFalse(cv.isNull[1]);
+ }
}
diff --git
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java
new file mode 100644
index 00000000000..2644ff8f1bc
--- /dev/null
+++
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class TestDecimalColumnVector {
+
+ @Test
+ void clearValueZeroesSlotAndMarksNull() {
+ DecimalColumnVector cv = new DecimalColumnVector(4, 18, 4);
+ cv.vector[1].setFromLong(12345L);
+ cv.vector[2].setFromLong(67890L);
+
+ cv.clearValue(1);
+
+ assertTrue(cv.isNull[1]);
+ assertFalse(cv.noNulls);
+ assertEquals(0L, cv.vector[1].serialize64(cv.scale));
+ // Neighbour slot untouched: still represents 67890.
+ assertEquals(67890L, cv.vector[2].serialize64((short) 0));
+ assertFalse(cv.isNull[2]);
+ }
+}
diff --git
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java
new file mode 100644
index 00000000000..a67ff94ef32
--- /dev/null
+++
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class TestDoubleColumnVector {
+
+ @Test
+ void clearValueZeroesSlotAndMarksNull() {
+ DoubleColumnVector cv = new DoubleColumnVector(4);
+ cv.vector[1] = 3.14;
+ cv.vector[0] = 1.5;
+ cv.vector[3] = -2.5;
+
+ cv.clearValue(1);
+
+ assertTrue(cv.isNull[1]);
+ assertFalse(cv.noNulls);
+ assertEquals(0.0, cv.vector[1]);
+ assertEquals(1.5, cv.vector[0]);
+ assertEquals(-2.5, cv.vector[3]);
+ assertFalse(cv.isNull[0]);
+ assertFalse(cv.isNull[3]);
+ }
+}
diff --git
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java
new file mode 100644
index 00000000000..d715508e148
--- /dev/null
+++
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class TestIntervalDayTimeColumnVector {
+
+ @Test
+ void clearValueZeroesSlotAndMarksNull() {
+ IntervalDayTimeColumnVector cv = new IntervalDayTimeColumnVector(4);
+ cv.set(3, new HiveIntervalDayTime(5, 0));
+
+ cv.clearValue(3);
+
+ assertTrue(cv.isNull[3]);
+ assertFalse(cv.noNulls);
+ // setNullValue convention: totalSeconds = 0, nanos = 1
+ assertEquals(0L, cv.getTotalSeconds(3));
+ assertEquals(1, cv.getNanos(3));
+ }
+}
diff --git
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java
new file mode 100644
index 00000000000..c1c8acc25e9
--- /dev/null
+++
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class TestLongColumnVector {
+
+ @Test
+ void clearValueZeroesSlotAndMarksNull() {
+ LongColumnVector cv = new LongColumnVector(4);
+ cv.vector[2] = 2025L;
+ cv.vector[1] = 7L;
+ cv.vector[3] = 9L;
+
+ cv.clearValue(2);
+
+ assertTrue(cv.isNull[2]);
+ assertFalse(cv.noNulls);
+ assertEquals(0L, cv.vector[2]);
+ assertEquals(7L, cv.vector[1]);
+ assertEquals(9L, cv.vector[3]);
+ assertFalse(cv.isNull[1]);
+ assertFalse(cv.isNull[3]);
+ }
+}
diff --git
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
index 2d85b115d24..dda52797246 100644
---
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
+++
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.sql.Timestamp;
@@ -246,4 +248,18 @@ private Thread startVectorManipulationThread(final int
vectorLength, final long
return thread;
}
+ @Test
+ public void testClearValue() {
+ TimestampColumnVector cv = new TimestampColumnVector(4);
+ cv.time[2] = 1234567890000L;
+ cv.nanos[2] = 999;
+
+ cv.clearValue(2);
+
+ assertTrue(cv.isNull[2]);
+ assertFalse(cv.noNulls);
+ // setNullValue convention: time = 0, nanos = 1
+ assertEquals(0L, cv.time[2]);
+ assertEquals(1, cv.nanos[2]);
+ }
}