This is an automated email from the ASF dual-hosted git repository.

soumyakanti3578 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e316c3203a HIVE-29598: Fix vectorized outer join wrong results due to 
stale scratch column values (#6486)
9e316c3203a is described below

commit 9e316c3203acc5631082b0bb3c976385164d431b
Author: Ryu Kobayashi <[email protected]>
AuthorDate: Fri May 22 04:37:21 2026 +0900

    HIVE-29598: Fix vectorized outer join wrong results due to stale scratch 
column values (#6486)
    
    Co-authored-by: konstantinb <[email protected]>
---
 .../VectorMapJoinOuterGenerateResultOperator.java  |  14 +-
 ...stVectorMapJoinOuterGenerateResultOperator.java | 361 +++++++++++++++++++++
 .../queries/clientpositive/vector_outer_join7.q    |  43 +++
 .../clientpositive/llap/vector_outer_join7.q.out   |  84 +++++
 .../hive/ql/exec/vector/BytesColumnVector.java     |   7 +
 .../hadoop/hive/ql/exec/vector/ColumnVector.java   |  26 ++
 .../hive/ql/exec/vector/DecimalColumnVector.java   |   5 +
 .../hive/ql/exec/vector/DoubleColumnVector.java    |   5 +
 .../exec/vector/IntervalDayTimeColumnVector.java   |   5 +
 .../hive/ql/exec/vector/LongColumnVector.java      |   5 +
 .../hive/ql/exec/vector/TimestampColumnVector.java |   5 +
 .../hive/ql/exec/vector/TestBytesColumnVector.java |  29 ++
 .../ql/exec/vector/TestDecimalColumnVector.java    |  44 +++
 .../ql/exec/vector/TestDoubleColumnVector.java     |  46 +++
 .../vector/TestIntervalDayTimeColumnVector.java    |  43 +++
 .../hive/ql/exec/vector/TestLongColumnVector.java  |  46 +++
 .../ql/exec/vector/TestTimestampColumnVector.java  |  16 +
 17 files changed, 774 insertions(+), 10 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
index e83b178e4dc..903517c1077 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
@@ -587,16 +587,12 @@ protected void generateOuterNulls(VectorizedRowBatch 
batch, int[] noMatchs,
       // key as null, too.
       //
       for (int column : outerSmallTableKeyColumnMap) {
-        ColumnVector colVector = batch.cols[column];
-        colVector.noNulls = false;
-        colVector.isNull[batchIndex] = true;
+        batch.cols[column].clearValue(batchIndex);
       }
 
       // Small table values are set to null.
       for (int column : smallTableValueColumnMap) {
-        ColumnVector colVector = batch.cols[column];
-        colVector.noNulls = false;
-        colVector.isNull[batchIndex] = true;
+        batch.cols[column].clearValue(batchIndex);
       }
     }
   }
@@ -746,15 +742,13 @@ protected void 
generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws Hi
     //
     for (int column : outerSmallTableKeyColumnMap) {
       ColumnVector colVector = batch.cols[column];
-      colVector.noNulls = false;
-      colVector.isNull[0] = true;
+      colVector.clearValue(0);
       colVector.isRepeating = true;
     }
 
     for (int column : smallTableValueColumnMap) {
       ColumnVector colVector = batch.cols[column];
-      colVector.noNulls = false;
-      colVector.isNull[0] = true;
+      colVector.clearValue(0);
       colVector.isRepeating = true;
     }
   }
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java
new file mode 100644
index 00000000000..35553d9cb44
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java
@@ -0,0 +1,361 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VoidColumnVector;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * HIVE-29598: verifies {@link VectorMapJoinOuterGenerateResultOperator} clears
+ * every small-table slot for unmatched rows, so stale values cannot carry over
+ * past the null marking.
+ */
+class TestVectorMapJoinOuterGenerateResultOperator {
+
+  /** Concrete subclass that exposes the generateOuterNulls* methods to tests. 
*/
+  private static final class TestableOuterOp extends 
VectorMapJoinOuterGenerateResultOperator {
+    @Override
+    protected String getLoggingPrefix() {
+      throw new UnsupportedOperationException("stubbed only to instantiate 
abstract class under test");
+    }
+
+    @Override
+    public void processBatch(VectorizedRowBatch batch) {
+      throw new UnsupportedOperationException("stubbed only to instantiate 
abstract class under test");
+    }
+  }
+
+  /**
+   * Records {@code clearSlotValue} invocations to verify the operator 
dispatches
+   * through {@code clearValue}, not just produces the slot-clearing side 
effect.
+   */
+  private static class TrackingLongColumnVector extends LongColumnVector {
+    final List<Integer> clearedIndices = new ArrayList<>();
+
+    TrackingLongColumnVector(int size) {
+      super(size);
+    }
+
+    @Override
+    protected void clearSlotValue(int elementNum) {
+      super.clearSlotValue(elementNum);
+      clearedIndices.add(elementNum);
+    }
+  }
+
+  @Test
+  void 
generateOuterNullsCallsClearValueOnEachMappedColumnForEachUnmatchedRow() throws 
HiveException, IOException {
+    TestableOuterOp op = new TestableOuterOp();
+    op.outerSmallTableKeyColumnMap = new int[] {0};
+    op.smallTableValueColumnMap = new int[] {1, 2};
+
+    VectorizedRowBatch batch = new VectorizedRowBatch(3, 4);
+    TrackingLongColumnVector keyCol = new TrackingLongColumnVector(4);
+    TrackingLongColumnVector valCol1 = new TrackingLongColumnVector(4);
+    TrackingLongColumnVector valCol2 = new TrackingLongColumnVector(4);
+    keyCol.vector[1] = 99L;
+    valCol1.vector[1] = 88L;
+    valCol2.vector[3] = 77L;
+    batch.cols[0] = keyCol;
+    batch.cols[1] = valCol1;
+    batch.cols[2] = valCol2;
+
+    int[] noMatchs = new int[] {1, 3};
+    op.generateOuterNulls(batch, noMatchs, noMatchs.length);
+
+    assertEquals(Arrays.asList(1, 3), keyCol.clearedIndices);
+    assertEquals(Arrays.asList(1, 3), valCol1.clearedIndices);
+    assertEquals(Arrays.asList(1, 3), valCol2.clearedIndices);
+
+    assertFalse(keyCol.noNulls);
+    assertTrue(keyCol.isNull[1]);
+    assertTrue(keyCol.isNull[3]);
+    assertFalse(keyCol.isNull[0]);
+    assertFalse(keyCol.isNull[2]);
+
+    assertEquals(0L, keyCol.vector[1]);
+    assertEquals(0L, valCol1.vector[1]);
+    assertEquals(0L, valCol2.vector[3]);
+  }
+
+  @Test
+  void 
generateOuterNullsRepeatedAllCallsClearValueAtIndexZeroForEachMappedColumn() 
throws HiveException {
+    TestableOuterOp op = new TestableOuterOp();
+    op.outerSmallTableKeyColumnMap = new int[] {0};
+    op.smallTableValueColumnMap = new int[] {1};
+
+    VectorizedRowBatch batch = new VectorizedRowBatch(2, 4);
+    TrackingLongColumnVector keyCol = new TrackingLongColumnVector(4);
+    TrackingLongColumnVector valCol = new TrackingLongColumnVector(4);
+    keyCol.vector[0] = 42L;
+    valCol.vector[0] = 84L;
+    batch.cols[0] = keyCol;
+    batch.cols[1] = valCol;
+
+    op.generateOuterNullsRepeatedAll(batch);
+
+    assertEquals(Arrays.asList(0), keyCol.clearedIndices);
+    assertEquals(Arrays.asList(0), valCol.clearedIndices);
+
+    // isRepeating is set by the operator, not by clearValue.
+    assertFalse(keyCol.noNulls);
+    assertTrue(keyCol.isNull[0]);
+    assertTrue(keyCol.isRepeating);
+    assertFalse(valCol.noNulls);
+    assertTrue(valCol.isNull[0]);
+    assertTrue(valCol.isRepeating);
+
+    assertEquals(0L, keyCol.vector[0]);
+    assertEquals(0L, valCol.vector[0]);
+  }
+
+  @Test
+  void generateOuterNullsSetsBookkeepingOnTypeWithNoClearSlotValueOverride() 
throws HiveException, IOException {
+    // VoidColumnVector inherits the base no-op clearSlotValue — verifies the
+    // operator still drives the null-marking through clearValue() on a type
+    // without a per-slot value to zero.
+    TestableOuterOp op = new TestableOuterOp();
+    op.outerSmallTableKeyColumnMap = new int[] {};
+    op.smallTableValueColumnMap = new int[] {0};
+
+    VectorizedRowBatch batch = new VectorizedRowBatch(1, 4);
+    VoidColumnVector voidCol = new VoidColumnVector(4);
+    batch.cols[0] = voidCol;
+
+    int[] noMatchs = new int[] {1, 3};
+    op.generateOuterNulls(batch, noMatchs, noMatchs.length);
+
+    assertFalse(voidCol.noNulls);
+    assertTrue(voidCol.isNull[1]);
+    assertTrue(voidCol.isNull[3]);
+    assertFalse(voidCol.isNull[0]);
+    assertFalse(voidCol.isNull[2]);
+  }
+
+  /**
+   * For each {@link ColumnVector} subclass whose {@code clearSlotValue} is
+   * overridden, verifies the operator's call through {@code clearValue} 
reaches
+   * the override and clears the slot to the type's cleared state.
+   */
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("modifiedColumnVectorTypes")
+  void generateOuterNullsClearsSlotForEachModifiedType(
+      String typeName,
+      ColumnVector cv,
+      Runnable preLoad,
+      Runnable assertSlotCleared) throws HiveException, IOException {
+
+    TestableOuterOp op = new TestableOuterOp();
+    op.outerSmallTableKeyColumnMap = new int[] {};
+    op.smallTableValueColumnMap = new int[] {0};
+
+    VectorizedRowBatch batch = new VectorizedRowBatch(1, 4);
+    preLoad.run();
+    batch.cols[0] = cv;
+
+    int[] noMatchs = new int[] {2};
+    op.generateOuterNulls(batch, noMatchs, noMatchs.length);
+
+    assertTrue(cv.isNull[2]);
+    assertFalse(cv.noNulls);
+    assertSlotCleared.run();
+  }
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("modifiedColumnVectorTypesAtSlotZero")
+  void generateOuterNullsRepeatedAllClearsSlotForEachModifiedType(
+      String typeName,
+      ColumnVector cv,
+      Runnable preLoad,
+      Runnable assertSlotCleared) throws HiveException {
+
+    TestableOuterOp op = new TestableOuterOp();
+    op.outerSmallTableKeyColumnMap = new int[] {};
+    op.smallTableValueColumnMap = new int[] {0};
+
+    VectorizedRowBatch batch = new VectorizedRowBatch(1, 4);
+    preLoad.run();
+    batch.cols[0] = cv;
+
+    op.generateOuterNullsRepeatedAll(batch);
+
+    assertTrue(cv.isNull[0]);
+    assertFalse(cv.noNulls);
+    assertTrue(cv.isRepeating);
+    assertSlotCleared.run();
+  }
+
+  static Stream<Arguments> modifiedColumnVectorTypesAtSlotZero() {
+    final LongColumnVector longCv = new LongColumnVector(4);
+    final DoubleColumnVector doubleCv = new DoubleColumnVector(4);
+    final BytesColumnVector bytesCv = new BytesColumnVector(4);
+    final DecimalColumnVector decCv = new DecimalColumnVector(4, 18, 4);
+    final Decimal64ColumnVector dec64Cv = new Decimal64ColumnVector(4, 18, 4);
+    final TimestampColumnVector tsCv = new TimestampColumnVector(4);
+    final IntervalDayTimeColumnVector ivCv = new 
IntervalDayTimeColumnVector(4);
+
+    return Stream.of(
+        Arguments.of(
+            "LongColumnVector",
+            longCv,
+            (Runnable) () -> longCv.vector[0] = 999L,
+            (Runnable) () -> assertEquals(0L, longCv.vector[0])),
+        Arguments.of(
+            "DoubleColumnVector",
+            doubleCv,
+            (Runnable) () -> doubleCv.vector[0] = 3.14,
+            (Runnable) () -> assertEquals(0.0, doubleCv.vector[0])),
+        Arguments.of(
+            "BytesColumnVector",
+            bytesCv,
+            (Runnable) () -> {
+              bytesCv.vector[0] = "stale".getBytes(StandardCharsets.UTF_8);
+              bytesCv.start[0] = 1;
+              bytesCv.length[0] = 3;
+            },
+            (Runnable) () -> {
+              assertNull(bytesCv.vector[0]);
+              assertEquals(0, bytesCv.start[0]);
+              assertEquals(0, bytesCv.length[0]);
+            }),
+        Arguments.of(
+            "DecimalColumnVector",
+            decCv,
+            (Runnable) () -> decCv.vector[0].setFromLong(999L),
+            (Runnable) () -> assertEquals(0L, 
decCv.vector[0].serialize64(decCv.scale))),
+        Arguments.of(
+            "Decimal64ColumnVector",
+            dec64Cv,
+            (Runnable) () -> dec64Cv.vector[0] = 999L,
+            (Runnable) () -> assertEquals(0L, dec64Cv.vector[0])),
+        Arguments.of(
+            "TimestampColumnVector",
+            tsCv,
+            (Runnable) () -> {
+              tsCv.time[0] = 1234567890000L;
+              tsCv.nanos[0] = 999;
+            },
+            (Runnable) () -> {
+              assertEquals(0L, tsCv.time[0]);
+              assertEquals(1, tsCv.nanos[0]);
+            }),
+        Arguments.of(
+            "IntervalDayTimeColumnVector",
+            ivCv,
+            (Runnable) () -> ivCv.set(0, new HiveIntervalDayTime(5, 0)),
+            (Runnable) () -> {
+              assertEquals(0L, ivCv.getTotalSeconds(0));
+              assertEquals(1, ivCv.getNanos(0));
+            })
+    );
+  }
+
+  static Stream<Arguments> modifiedColumnVectorTypes() {
+    final LongColumnVector longCv = new LongColumnVector(4);
+    final DoubleColumnVector doubleCv = new DoubleColumnVector(4);
+    final BytesColumnVector bytesCv = new BytesColumnVector(4);
+    final DecimalColumnVector decCv = new DecimalColumnVector(4, 18, 4);
+    final Decimal64ColumnVector dec64Cv = new Decimal64ColumnVector(4, 18, 4);
+    final TimestampColumnVector tsCv = new TimestampColumnVector(4);
+    final IntervalDayTimeColumnVector ivCv = new 
IntervalDayTimeColumnVector(4);
+
+    return Stream.of(
+        Arguments.of(
+            "LongColumnVector",
+            longCv,
+            (Runnable) () -> longCv.vector[2] = 999L,
+            (Runnable) () -> assertEquals(0L, longCv.vector[2])),
+        Arguments.of(
+            "DoubleColumnVector",
+            doubleCv,
+            (Runnable) () -> doubleCv.vector[2] = 3.14,
+            (Runnable) () -> assertEquals(0.0, doubleCv.vector[2])),
+        Arguments.of(
+            "BytesColumnVector",
+            bytesCv,
+            (Runnable) () -> {
+              bytesCv.vector[2] = "stale".getBytes(StandardCharsets.UTF_8);
+              bytesCv.start[2] = 1;
+              bytesCv.length[2] = 3;
+            },
+            (Runnable) () -> {
+              assertNull(bytesCv.vector[2]);
+              assertEquals(0, bytesCv.start[2]);
+              assertEquals(0, bytesCv.length[2]);
+            }),
+        Arguments.of(
+            "DecimalColumnVector",
+            decCv,
+            (Runnable) () -> decCv.vector[2].setFromLong(999L),
+            (Runnable) () -> assertEquals(0L, 
decCv.vector[2].serialize64(decCv.scale))),
+        Arguments.of(
+            "Decimal64ColumnVector",
+            dec64Cv,
+            (Runnable) () -> dec64Cv.vector[2] = 999L,
+            (Runnable) () -> assertEquals(0L, dec64Cv.vector[2])),
+        Arguments.of(
+            "TimestampColumnVector",
+            tsCv,
+            (Runnable) () -> {
+              tsCv.time[2] = 1234567890000L;
+              tsCv.nanos[2] = 999;
+            },
+            (Runnable) () -> {
+              // setNullValue convention: time = 0, nanos = 1
+              assertEquals(0L, tsCv.time[2]);
+              assertEquals(1, tsCv.nanos[2]);
+            }),
+        Arguments.of(
+            "IntervalDayTimeColumnVector",
+            ivCv,
+            (Runnable) () -> ivCv.set(2, new HiveIntervalDayTime(5, 0)),
+            (Runnable) () -> {
+              // setNullValue convention: totalSeconds = 0, nanos = 1
+              assertEquals(0L, ivCv.getTotalSeconds(2));
+              assertEquals(1, ivCv.getNanos(2));
+            })
+    );
+  }
+}
diff --git a/ql/src/test/queries/clientpositive/vector_outer_join7.q 
b/ql/src/test/queries/clientpositive/vector_outer_join7.q
new file mode 100644
index 00000000000..141d8c3c68b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_outer_join7.q
@@ -0,0 +1,43 @@
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+
+-- SORT_QUERY_RESULTS
+
+-- HIVE-29598: regression test for stale scratch-slot values in vectorized
+-- outer-join MapJoin. MAX() acts as an aggregation barrier so Calcite cannot
+-- inline the inner expression and simplify the bug surface away.
+
+CREATE TABLE t (k STRING, v STRING) STORED AS ORC;
+
+INSERT INTO t VALUES
+  ('A','1'),('A','2'),('A','3'),
+  ('B','2'),('B','3'),
+  ('C','3'),
+  ('D','1'),('D','3');
+
+WITH
+  probe AS (
+    SELECT k, v, (CAST(v AS INT) > 0) AS p_bool
+    FROM t WHERE CAST(v AS INT) >= 3
+  ),
+  small_side AS (
+    SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool
+    FROM t
+  ),
+  classified AS (
+    SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value
+    FROM probe p
+    LEFT JOIN small_side s
+      ON  p.k = s.k
+      AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT)
+  ),
+  diagnosed AS (
+    SELECT k, v, MAX(observed_value) AS observed_value
+    FROM classified
+    GROUP BY k, v
+  )
+SELECT k, v,
+       observed_value AS observed_value_returned_by_select,
+       1              AS required_value_per_sql_semantics
+FROM diagnosed
+WHERE observed_value = 0;
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out
new file mode 100644
index 00000000000..df755cfe473
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out
@@ -0,0 +1,84 @@
+PREHOOK: query: CREATE TABLE t (k STRING, v STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: CREATE TABLE t (k STRING, v STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: INSERT INTO t VALUES
+  ('A','1'),('A','2'),('A','3'),
+  ('B','2'),('B','3'),
+  ('C','3'),
+  ('D','1'),('D','3')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: INSERT INTO t VALUES
+  ('A','1'),('A','2'),('A','3'),
+  ('B','2'),('B','3'),
+  ('C','3'),
+  ('D','1'),('D','3')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.k SCRIPT []
+POSTHOOK: Lineage: t.v SCRIPT []
+PREHOOK: query: WITH
+  probe AS (
+    SELECT k, v, (CAST(v AS INT) > 0) AS p_bool
+    FROM t WHERE CAST(v AS INT) >= 3
+  ),
+  small_side AS (
+    SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool
+    FROM t
+  ),
+  classified AS (
+    SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value
+    FROM probe p
+    LEFT JOIN small_side s
+      ON  p.k = s.k
+      AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT)
+  ),
+  diagnosed AS (
+    SELECT k, v, MAX(observed_value) AS observed_value
+    FROM classified
+    GROUP BY k, v
+  )
+SELECT k, v,
+       observed_value AS observed_value_returned_by_select,
+       1              AS required_value_per_sql_semantics
+FROM diagnosed
+WHERE observed_value = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: WITH
+  probe AS (
+    SELECT k, v, (CAST(v AS INT) > 0) AS p_bool
+    FROM t WHERE CAST(v AS INT) >= 3
+  ),
+  small_side AS (
+    SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool
+    FROM t
+  ),
+  classified AS (
+    SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value
+    FROM probe p
+    LEFT JOIN small_side s
+      ON  p.k = s.k
+      AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT)
+  ),
+  diagnosed AS (
+    SELECT k, v, MAX(observed_value) AS observed_value
+    FROM classified
+    GROUP BY k, v
+  )
+SELECT k, v,
+       observed_value AS observed_value_returned_by_select,
+       1              AS required_value_per_sql_semantics
+FROM diagnosed
+WHERE observed_value = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
index ec98d2ab5b8..cce0cb9ad94 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -504,6 +504,13 @@ public void setElement(int outputElementNum, int 
inputElementNum, ColumnVector i
     }
   }
 
+  @Override
+  protected void clearSlotValue(int elementNum) {
+    vector[elementNum] = null;
+    start[elementNum] = 0;
+    length[elementNum] = 0;
+  }
+
   @Override
   public void init() {
     initBuffer(0);
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
index 9f611dfd313..ee5e3f3885e 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -231,6 +231,32 @@ public abstract void setElement(int outputElementNum, int 
inputElementNum,
   public abstract void copySelected(
       boolean selectedInUse, int[] sel, int size, ColumnVector 
outputColVector);
 
+  /**
+   * Mark the slot null and put its underlying value into a defined cleared
+   * state. Sets {@code isNull[elementNum] = true} and {@code noNulls = false},
+   * then dispatches to {@link #clearSlotValue(int)} for per-type clearing.
+   *
+   * <p>Defends against consumers that read {@code vector[i]} without first
+   * checking {@code isNull[i]}. Distinct from per-type {@code NULL_VALUE}
+   * sentinels (e.g. {@link LongColumnVector#NULL_VALUE}), which assume the
+   * isNull[]-first contract. Final by design — subclasses customize behavior
+   * by overriding {@link #clearSlotValue(int)}, never this method.
+   */
+  public final void clearValue(int elementNum) {
+    noNulls = false;
+    isNull[elementNum] = true;
+    clearSlotValue(elementNum);
+  }
+
+  /**
+   * Per-type slot-clearing hook invoked by {@link #clearValue(int)}.
+   * Subclasses override to zero out their value array at {@code elementNum}.
+   * Container and void types inherit the no-op default.
+   */
+  protected void clearSlotValue(int elementNum) {
+    // Default no-op.
+  }
+
   /**
    * Initialize the column vector. This method can be overridden by specific 
column vector types.
    * Use this method only if the individual type of the column vector is not 
known, otherwise its
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
index 5defd27623b..e0cdd76de15 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
@@ -144,6 +144,11 @@ public void setElement(int outputElementNum, int 
inputElementNum, ColumnVector i
     }
   }
 
+  @Override
+  protected void clearSlotValue(int elementNum) {
+    vector[elementNum].setFromLong(0L);
+  }
+
   @Override
   public void stringifyValue(StringBuilder buffer, int row) {
     if (isRepeating) {
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
index f833bde03f6..fcf297585c6 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
@@ -220,6 +220,11 @@ public void setElement(int outputElementNum, int 
inputElementNum, ColumnVector i
     }
   }
 
+  @Override
+  protected void clearSlotValue(int elementNum) {
+    vector[elementNum] = 0.0;
+  }
+
   @Override
   public void stringifyValue(StringBuilder buffer, int row) {
     if (isRepeating) {
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
index 9324bc0c610..2b61b09e38c 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
@@ -311,6 +311,11 @@ public void setNullValue(int elementNum) {
     nanos[elementNum] = 1;
   }
 
+  @Override
+  protected void clearSlotValue(int elementNum) {
+    setNullValue(elementNum);
+  }
+
   // Copy the current object contents into the output. Only copy selected 
entries,
   // as indicated by selectedInUse and the sel array.
   @Override
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
index bf423674b2a..dc727b462a7 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
@@ -294,6 +294,11 @@ public void setElement(int outputElementNum, int 
inputElementNum, ColumnVector i
     }
   }
 
+  @Override
+  protected void clearSlotValue(int elementNum) {
+    vector[elementNum] = 0L;
+  }
+
   @Override
   public void stringifyValue(StringBuilder buffer, int row) {
     if (isRepeating) {
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
index f97156c4038..c49149cdb28 100644
--- 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java
@@ -378,6 +378,11 @@ public void setNullValue(int elementNum) {
     nanos[elementNum] = 1;
   }
 
+  @Override
+  protected void clearSlotValue(int elementNum) {
+    setNullValue(elementNum);
+  }
+
   // Copy the current object contents into the output. Only copy selected 
entries,
   // as indicated by selectedInUse and the sel array.
   @Override
diff --git 
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
index be4ff70935a..dc9c045727d 100644
--- 
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
+++ 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
@@ -25,7 +25,9 @@
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 
@@ -220,4 +222,31 @@ private static byte[] writeToBytesColumnVector(int rowIdx, 
BytesColumnVector col
     col.setValPreallocated(rowIdx, writeSize);
     return bytes;
   }
+
+  @Test
+  public void testClearValue() {
+    BytesColumnVector cv = new BytesColumnVector(4);
+    byte[] data = "hello".getBytes(StandardCharsets.UTF_8);
+    cv.vector[0] = data;
+    cv.start[0] = 1;
+    cv.length[0] = 3;
+
+    byte[] neighborData = "world".getBytes(StandardCharsets.UTF_8);
+    cv.vector[1] = neighborData;
+    cv.start[1] = 2;
+    cv.length[1] = 4;
+
+    cv.clearValue(0);
+
+    assertTrue(cv.isNull[0]);
+    assertFalse(cv.noNulls);
+    assertNull(cv.vector[0]);
+    assertEquals(0, cv.start[0]);
+    assertEquals(0, cv.length[0]);
+
+    assertSame(neighborData, cv.vector[1]);
+    assertEquals(2, cv.start[1]);
+    assertEquals(4, cv.length[1]);
+    assertFalse(cv.isNull[1]);
+  }
 }
diff --git 
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java
 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java
new file mode 100644
index 00000000000..2644ff8f1bc
--- /dev/null
+++ 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class TestDecimalColumnVector {
+
+  @Test
+  void clearValueZeroesSlotAndMarksNull() {
+    DecimalColumnVector cv = new DecimalColumnVector(4, 18, 4);
+    cv.vector[1].setFromLong(12345L);
+    cv.vector[2].setFromLong(67890L);
+
+    cv.clearValue(1);
+
+    assertTrue(cv.isNull[1]);
+    assertFalse(cv.noNulls);
+    assertEquals(0L, cv.vector[1].serialize64(cv.scale));
+    // Neighbour slot untouched: still represents 67890.
+    assertEquals(67890L, cv.vector[2].serialize64((short) 0));
+    assertFalse(cv.isNull[2]);
+  }
+}
diff --git 
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java
 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java
new file mode 100644
index 00000000000..a67ff94ef32
--- /dev/null
+++ 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class TestDoubleColumnVector {
+
+  @Test
+  void clearValueZeroesSlotAndMarksNull() {
+    DoubleColumnVector cv = new DoubleColumnVector(4);
+    cv.vector[1] = 3.14;
+    cv.vector[0] = 1.5;
+    cv.vector[3] = -2.5;
+
+    cv.clearValue(1);
+
+    assertTrue(cv.isNull[1]);
+    assertFalse(cv.noNulls);
+    assertEquals(0.0, cv.vector[1]);
+    assertEquals(1.5, cv.vector[0]);
+    assertEquals(-2.5, cv.vector[3]);
+    assertFalse(cv.isNull[0]);
+    assertFalse(cv.isNull[3]);
+  }
+}
diff --git 
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java
 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java
new file mode 100644
index 00000000000..d715508e148
--- /dev/null
+++ 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class TestIntervalDayTimeColumnVector {
+
+  @Test
+  void clearValueZeroesSlotAndMarksNull() {
+    IntervalDayTimeColumnVector cv = new IntervalDayTimeColumnVector(4);
+    cv.set(3, new HiveIntervalDayTime(5, 0));
+
+    cv.clearValue(3);
+
+    assertTrue(cv.isNull[3]);
+    assertFalse(cv.noNulls);
+    // setNullValue convention: totalSeconds = 0, nanos = 1
+    assertEquals(0L, cv.getTotalSeconds(3));
+    assertEquals(1, cv.getNanos(3));
+  }
+}
diff --git 
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java
 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java
new file mode 100644
index 00000000000..c1c8acc25e9
--- /dev/null
+++ 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class TestLongColumnVector {
+
+  @Test
+  void clearValueZeroesSlotAndMarksNull() {
+    LongColumnVector cv = new LongColumnVector(4);
+    cv.vector[2] = 2025L;
+    cv.vector[1] = 7L;
+    cv.vector[3] = 9L;
+
+    cv.clearValue(2);
+
+    assertTrue(cv.isNull[2]);
+    assertFalse(cv.noNulls);
+    assertEquals(0L, cv.vector[2]);
+    assertEquals(7L, cv.vector[1]);
+    assertEquals(9L, cv.vector[3]);
+    assertFalse(cv.isNull[1]);
+    assertFalse(cv.isNull[3]);
+  }
+}
diff --git 
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
index 2d85b115d24..dda52797246 100644
--- 
a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
+++ 
b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import java.sql.Timestamp;
@@ -246,4 +248,18 @@ private Thread startVectorManipulationThread(final int 
vectorLength, final long
     return thread;
   }
 
+  @Test
+  public void testClearValue() {
+    TimestampColumnVector cv = new TimestampColumnVector(4);
+    cv.time[2] = 1234567890000L;
+    cv.nanos[2] = 999;
+
+    cv.clearValue(2);
+
+    assertTrue(cv.isNull[2]);
+    assertFalse(cv.noNulls);
+    // setNullValue convention: time = 0, nanos = 1
+    assertEquals(0L, cv.time[2]);
+    assertEquals(1, cv.nanos[2]);
+  }
 }


Reply via email to