(hive) 03/05: HIVE-29176: Wrong result when HiveAntiJoin is replacing an IS NULL filter on a nullable column (#6062)

dkuzmenko Mon, 17 Nov 2025 00:44:16 -0800

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch branch-4.2
in repository https://gitbox.apache.org/repos/asf/hive.git


commit 8aae2dd85855f48f05d0ad85639892f8828f1ae6
Author: Thomas Rebele <[email protected]>
AuthorDate: Thu Nov 13 06:14:30 2025 +0100

    HIVE-29176: Wrong result when HiveAntiJoin is replacing an IS NULL filter 
on a nullable column (#6062)
    
    (cherry picked from commit 330b01822c575662353376e715a05f814950b4db)
---
 .../hive/ql/optimizer/calcite/HiveCalciteUtil.java |  44 +--
 .../calcite/rules/HiveAntiSemiJoinRule.java        | 175 +++++++++---
 .../calcite/rules/TestHiveAntiSemiJoinRule.java    | 248 +++++++++++++++++
 .../ql/optimizer/calcite/rules/TestRuleHelper.java | 113 ++++++--
 ql/src/test/queries/clientpositive/antijoin4.q     |  64 +++++
 .../results/clientpositive/llap/antijoin3.q.out    |   2 +-
 .../results/clientpositive/llap/antijoin4.q.out    | 294 +++++++++++++++++++++
 .../results/clientpositive/llap/llap_smb_ptf.q.out |  47 ++--
 8 files changed, 871 insertions(+), 116 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
index a7305326c00..5cbfb5dbe0c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -1233,43 +1233,17 @@ public FixNullabilityShuttle(RexBuilder rexBuilder,
   }
 
   /**
-   * Checks if any of the expression given as list expressions are from right 
side of the join.
-   *  This is used during anti join conversion.
-   *
-   * @param joinRel Join node whose right side has to be searched.
-   * @param expressions The list of expression to search.
-   * @return true if any of the expressions is from right side of join.
+   * Given a join, creates a bitset of the joined columns originating from the 
right-hand side.
+   * @param joinRel a join that concatenates all columns from its inputs (so 
no semi-join)
+   * @return a bitset
    */
-  public static boolean hasAnyExpressionFromRightSide(RelNode joinRel, 
List<RexNode> expressions)  {
-    List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
-    int nTotalFields = joinFields.size();
-    List<RelDataTypeField> leftFields = 
(joinRel.getInputs().get(0)).getRowType().getFieldList();
-    int nFieldsLeft = leftFields.size();
-    ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, 
nTotalFields);
-
-    for (RexNode node : expressions) {
-      ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node);
-      if (rightBitmap.contains(inputBits)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  public static boolean hasAllExpressionsFromRightSide(RelNode joinRel, 
List<RexNode> expressions) {
-    List<RelDataTypeField> joinFields = joinRel.getRowType().getFieldList();
-    int nTotalFields = joinFields.size();
-    List<RelDataTypeField> leftFields = 
(joinRel.getInputs().get(0)).getRowType().getFieldList();
-    int nFieldsLeft = leftFields.size();
-    ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, 
nTotalFields);
-
-    for (RexNode node : expressions) {
-      ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node);
-      if (!rightBitmap.contains(inputBits)) {
-        return false;
-      }
+  public static ImmutableBitSet getRightSideBitset(RelNode joinRel) {
+    if(joinRel.getInputs().size() != 2) {
+      throw new IllegalArgumentException("The relation must have exactly two 
children:\n" + RelOptUtil.toString(joinRel));
     }
-    return true;
+    int nTotalFields = joinRel.getRowType().getFieldCount();
+    int nFieldsLeft = 
(joinRel.getInputs().get(0)).getRowType().getFieldCount();
+    return ImmutableBitSet.range(nFieldsLeft, nTotalFields);
   }
 
   /**
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java
index 3c4c2ab693c..02f2f3d6825 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java
@@ -17,29 +17,40 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptPredicateList;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.RexImplicationChecker;
 import org.apache.calcite.plan.Strong;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
 import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexExecutor;
+import org.apache.calcite.rex.RexExecutorImpl;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.rex.RexVisitorImpl;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Util;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
-import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.Optional;
 
 /**
  * Planner rule that converts a join plus filter to anti join.
@@ -86,14 +97,17 @@ protected void perform(RelOptRuleCall call, Project 
project, Filter filter, Join
 
     assert (filter != null);
 
-    List<RexNode> filterList = getResidualFilterNodes(filter, join);
-    if (filterList == null) {
+    ImmutableBitSet rhsFields = HiveCalciteUtil.getRightSideBitset(join);
+    Optional<List<RexNode>> optFilterList = getResidualFilterNodes(filter, 
join, rhsFields);
+    if (optFilterList.isEmpty()) {
       return;
     }
+    List<RexNode> filterList = optFilterList.get();
 
     // If any projection is there from right side, then we can not convert to 
anti join.
-    boolean hasProjection = 
HiveCalciteUtil.hasAnyExpressionFromRightSide(join, project.getProjects());
-    if (hasProjection) {
+    ImmutableBitSet projectedFields = 
RelOptUtil.InputFinder.bits(project.getProjects(), null);
+    boolean projectionUsesRHS = projectedFields.intersects(rhsFields);
+    if (projectionUsesRHS) {
       return;
     }
 
@@ -124,13 +138,14 @@ protected void perform(RelOptRuleCall call, Project 
project, Filter filter, Join
   /**
    * Extracts the non-null filter conditions from given filter node.
    *
-   * @param filter The filter condition to be checked.
-   * @param join Join node whose right side has to be searched.
+   * @param filter    The filter condition to be checked.
+   * @param join      Join node whose right side has to be searched.
+   * @param rhsFields
    * @return null : Anti join condition is not matched for filter.
-   *         Empty list : No residual filter conditions present.
-   *         Valid list containing the filter to be applied after join.
+   *     Empty list : No residual filter conditions present.
+   *     Valid list containing the filter to be applied after join.
    */
-  private List<RexNode> getResidualFilterNodes(Filter filter, Join join) {
+  private Optional<List<RexNode>> getResidualFilterNodes(Filter filter, Join 
join, ImmutableBitSet rhsFields) {
     // 1. If null filter is not present from right side then we can not 
convert to anti join.
     // 2. If any non-null filter is present from right side, we can not 
convert it to anti join.
     // 3. Keep other filters which needs to be executed after join.
@@ -140,43 +155,123 @@ private List<RexNode> getResidualFilterNodes(Filter 
filter, Join join) {
     List<RexNode> aboveFilters = 
RelOptUtil.conjunctions(filter.getCondition());
     boolean hasNullFilterOnRightSide = false;
     List<RexNode> filterList = new ArrayList<>();
+    final ImmutableBitSet notNullColumnsFromRightSide = 
getNotNullColumnsFromRightSide(join);
+
     for (RexNode filterNode : aboveFilters) {
-      if (filterNode.getKind() == SqlKind.IS_NULL) {
-        // Null filter from right side table can be removed and its a 
pre-condition for anti join conversion.
-        if (HiveCalciteUtil.hasAllExpressionsFromRightSide(join, 
Collections.singletonList(filterNode))
-            && isStrong(((RexCall) filterNode).getOperands().get(0))) {
-          hasNullFilterOnRightSide = true;
-        } else {
-          filterList.add(filterNode);
-        }
-      } else {
-        if (HiveCalciteUtil.hasAnyExpressionFromRightSide(join, 
Collections.singletonList(filterNode))) {
-          // If some non null condition is present from right side, we can not 
convert the join to anti join as
-          // anti join does not project the fields from right side.
-          return null;
-        } else {
-          filterList.add(filterNode);
-        }
+      final ImmutableBitSet usedFields = 
RelOptUtil.InputFinder.bits(filterNode);
+      boolean usesFieldFromRHS = usedFields.intersects(rhsFields);
+
+      if(!usesFieldFromRHS) {
+        // Only LHS fields or constants, so the filterNode is part of the 
residual filter
+        filterList.add(filterNode);
+        continue;
+      }
+
+      // In the following we check for filter nodes that let us deduce that
+      // "an (originally) not-null column of RHS IS NULL because the LHS row 
will not be matched"
+
+      if(filterNode.getKind() != SqlKind.IS_NULL) {
+        return Optional.empty();
+      }
+
+      boolean usesRHSFieldsOnly = rhsFields.contains(usedFields);
+      if (!usesRHSFieldsOnly) {
+        // If there is a mix between LHS and RHS fields, don't convert to 
anti-join
+        return Optional.empty();
+      }
+
+      // Null filter from right side table can be removed and it is a 
pre-condition for anti join conversion.
+      RexNode arg = ((RexCall) filterNode).getOperands().get(0);
+      if (isStrong(arg, notNullColumnsFromRightSide)) {
+        hasNullFilterOnRightSide = true;
+      } else if(!isStrong(arg, rhsFields)) {
+        // if all RHS fields are null and the IS NULL is still not fulfilled, 
bail out
+        return Optional.empty();
       }
     }
 
     if (!hasNullFilterOnRightSide) {
-      return null;
+      return Optional.empty();
     }
-    return filterList;
+    return Optional.of(filterList);
   }
 
-  private boolean isStrong(RexNode rexNode) {
-    AtomicBoolean hasCast = new AtomicBoolean(false);
-    rexNode.accept(new RexVisitorImpl<Void>(true) {
-      @Override
-      public Void visitCall(RexCall call) {
-        if (call.getKind() == SqlKind.CAST) {
-          hasCast.set(true);
-        }
-        return super.visitCall(call);
+  private ImmutableBitSet getNotNullColumnsFromRightSide(RelNode joinRel) {
+    // we need to shift the indices of the second child to the right
+    int shift = (joinRel.getInput(0)).getRowType().getFieldCount();
+    ImmutableBitSet rhsNotnullColumns = 
deduceNotNullColumns(joinRel.getInput(1));
+    return rhsNotnullColumns.shift(shift);
+  }
+
+  /**
+   * Deduce which columns of the <code>relNode</code> are definitively NOT 
NULL.
+   */
+  private ImmutableBitSet deduceNotNullColumns(RelNode relNode) {
+    // adapted from org.apache.calcite.plan.RelOptUtil.containsNullableFields
+    RelOptCluster cluster = relNode.getCluster();
+    final RexBuilder rexBuilder = cluster.getRexBuilder();
+    final RelMetadataQuery mq = cluster.getMetadataQuery();
+    ImmutableBitSet.Builder result = ImmutableBitSet.builder();
+    ImmutableBitSet.Builder candidatesBuilder = ImmutableBitSet.builder();
+    List<RelDataTypeField> fieldList = relNode.getRowType().getFieldList();
+    for (int i=0; i<fieldList.size(); i++) {
+      if (fieldList.get(i).getType().isNullable()) {
+        candidatesBuilder.set(i);
+      }
+      else {
+        result.set(i);
+      }
+    }
+    ImmutableBitSet candidates = candidatesBuilder.build();
+    if (candidates.isEmpty()) {
+      // All columns are declared NOT NULL, no need to change
+      return result.build();
+    }
+    final RexExecutor executor = cluster.getPlanner().getExecutor();
+    if (!(executor instanceof RexExecutorImpl)) {
+      // Cannot proceed without an executor.
+      return result.build();
+    }
+
+    final RexImplicationChecker checker =
+        new RexImplicationChecker(rexBuilder, executor, relNode.getRowType());
+    final RelOptPredicateList predicates = mq.getPulledUpPredicates(relNode);
+
+    ImmutableList<RexNode> preds = predicates.pulledUpPredicates;
+    final List<RexNode> antecedent = new ArrayList<>(preds);
+    final RexNode first = RexUtil.composeConjunction(rexBuilder, antecedent);
+    for (int c : candidates) {
+      RelDataTypeField field = fieldList.get(c);
+      final RexNode second = 
rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL,
+          rexBuilder.makeInputRef(field.getType(), field.getIndex()));
+      // Suppose we have EMP(empno INT NOT NULL, mgr INT),
+      // and predicates [empno > 0, mgr > 0].
+      // We make first: "empno > 0 AND mgr > 0"
+      // and second: "mgr IS NOT NULL"
+      // and ask whether first implies second.
+      // It does, so we have no nullable columns.
+      if(checker.implies(first, second)) {
+        result.set(c);
       }
-    });
-    return !hasCast.get() && Strong.isStrong(rexNode);
+    }
+    return result.build();
+  }
+
+  private boolean isStrong(RexNode rexNode, ImmutableBitSet rightSideBitset) {
+    try {
+      rexNode.accept(new RexVisitorImpl<Void>(true) {
+        @Override
+        public Void visitCall(RexCall call) {
+          if (call.getKind() == SqlKind.CAST) {
+            throw Util.FoundOne.NULL;
+          }
+          return super.visitCall(call);
+        }
+      });
+    } catch (Util.FoundOne e) {
+      // Hive's CAST might introduce NULL for NOT NULL fields
+      return false;
+    }
+    return Strong.isNull(rexNode, rightSideBitset);
   }
 }
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveAntiSemiJoinRule.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveAntiSemiJoinRule.java
new file mode 100644
index 00000000000..97e808eaa40
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveAntiSemiJoinRule.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.junit.MockitoJUnitRunner;
+
+import java.util.Collections;
+
+import static 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.TestRuleHelper.*;
+
+@RunWith(MockitoJUnitRunner.class)
+public class TestHiveAntiSemiJoinRule {
+
+  PlanFixture fixture() {
+    RelOptPlanner planner = 
buildPlanner(Collections.singletonList(HiveAntiSemiJoinRule.INSTANCE));
+    // executor is needed to determine nullability of RHS columns
+    planner.setExecutor(new HiveRexExecutorImpl());
+    return new PlanFixture(planner)
+        .registerTable("t1", T1Record.class)
+        .registerTable("t2", T2Record.class)
+        .registerTable("t3", T3Record.class);
+  }
+
+  @Test
+  public void testFilterOnNullableColumn() {
+    PlanFixture fixture = fixture();
+    RelBuilder b = fixture.createRelBuilder();
+
+    // @formatter:off
+    RelNode plan = b
+        .scan("t1")
+        .scan("t2")
+        .join(JoinRelType.LEFT, b.equals(
+            b.field(2, 0, "t1nullable"),
+            b.field(2, 1, "t2id")))
+        .filter(b.isNull(b.field("t2nullable")))
+        .project(b.field("t1id"))
+        .build();
+
+    String expectedPlan = "HiveProject(t1id=[$0])\n"
+                        + "  HiveFilter(condition=[IS NULL($5)])\n"
+                        + "    HiveJoin(condition=[=($2, $3)], 
joinType=[left], algorithm=[none], cost=[not available])\n"
+                        + "      LogicalTableScan(table=[[t1]])\n"
+                        + "      LogicalTableScan(table=[[t2]])\n";
+    // @formatter:on
+
+    assertPlans(fixture.getPlanner(), plan, expectedPlan, expectedPlan);
+  }
+
+  @Test
+  public void testFilterOnFormerlyNullableColumn() {
+    PlanFixture fixture = fixture();
+    RelBuilder b = fixture.createRelBuilder();
+
+    // @formatter:off
+    RelNode plan = b
+        .scan("t1")
+        .scan("t2")
+        .filter(b.isNotNull(b.field("t2nullable")))
+        .join(JoinRelType.LEFT, b.equals(
+            b.field(2, 0, "t1nullable"),
+            b.field(2, 1, "t2nullable")))
+        // the IS NOT NULL on the RHS ensures that the values
+        // we get from t2nullable are actually NOT NULL
+        .filter(b.isNull(b.field("t2nullable")))
+        .project(b.field("t1id"))
+        .build();
+
+    String prePlan = "HiveProject(t1id=[$0])\n"
+        + "  HiveFilter(condition=[IS NULL($5)])\n"
+        + "    HiveJoin(condition=[=($2, $5)], joinType=[left], 
algorithm=[none], cost=[not available])\n"
+        + "      LogicalTableScan(table=[[t1]])\n"
+        + "      HiveFilter(condition=[IS NOT NULL($2)])\n"
+        + "        LogicalTableScan(table=[[t2]])\n";
+
+    String postPlan = "HiveProject(t1id=[$0])\n"
+        + "  HiveAntiJoin(condition=[=($2, $5)], joinType=[anti])\n"
+        + "    LogicalTableScan(table=[[t1]])\n"
+        + "    HiveFilter(condition=[IS NOT NULL($2)])\n"
+        + "      LogicalTableScan(table=[[t2]])\n";
+    // @formatter:on
+
+    assertPlans(fixture.getPlanner(), plan, prePlan, postPlan);
+  }
+
+
+  @Test
+  public void testFilterIsNullFromBothSides() {
+    PlanFixture fixture = fixture();
+
+    RelNode plan;
+    try (Hook.Closeable ignore = 
Hook.REL_BUILDER_SIMPLIFY.addThread(Hook.propertyJ(false))) {
+      RelBuilder b = fixture.createRelBuilder();
+      // @formatter:off
+      plan = b.scan("t1")
+              .scan("t2")
+              .join(JoinRelType.LEFT, b.equals(b.field(2, 0, "t1nullable"), 
b.field(2, 1, "t2id")))
+              .filter(b.isNull(b.call(SqlStdOperatorTable.PLUS, 
b.field("t2nullable"), b.field("t1nullable"))))
+              .project(b.field("t1id")).build();
+      // @formatter:on
+    }
+
+    // @formatter:off
+    String expectedPlan = "HiveProject(t1id=[$0])\n"
+                        + "  HiveFilter(condition=[IS NULL(+($5, $2))])\n"
+                        + "    HiveJoin(condition=[=($2, $3)], 
joinType=[left], algorithm=[none], cost=[not available])\n"
+                        + "      LogicalTableScan(table=[[t1]])\n"
+                        + "      LogicalTableScan(table=[[t2]])\n";
+    // @formatter:on
+
+    assertPlans(fixture.getPlanner(), plan, expectedPlan, expectedPlan);
+  }
+
+  @Test
+  public void testFilterOnNotNullColumn() {
+    PlanFixture fixture = fixture();
+    RelBuilder b = fixture.createRelBuilder();
+
+    // @formatter:off
+    RelNode plan = b
+        .scan("t1")
+        .scan("t2")
+        .join(JoinRelType.LEFT, b.equals(
+            b.field(2, 0, "t1nullable"),
+            b.field(2, 1, "t2id")))
+        .filter(b.isNull(b.field("t2notnull")))
+        .project(b.field("t1id"))
+        .build();
+
+    String prePlan = "HiveProject(t1id=[$0])\n"
+                   + "  HiveFilter(condition=[IS NULL($4)])\n"
+                   + "    HiveJoin(condition=[=($2, $3)], joinType=[left], 
algorithm=[none], cost=[not available])\n"
+                   + "      LogicalTableScan(table=[[t1]])\n"
+                   + "      LogicalTableScan(table=[[t2]])\n";
+
+    String postPlan = "HiveProject(t1id=[$0])\n"
+                   + "  HiveAntiJoin(condition=[=($2, $3)], joinType=[anti])\n"
+                   + "    LogicalTableScan(table=[[t1]])\n"
+                   + "    LogicalTableScan(table=[[t2]])\n";
+    // @formatter:on
+
+    assertPlans(fixture.getPlanner(), plan, prePlan, postPlan);
+  }
+
+  /** Check RHS without any nullable columns */
+  @Test
+  public void testFilterOnNotNullColumn2() {
+    PlanFixture fixture = fixture();
+    RelBuilder b = fixture.createRelBuilder();
+
+    // @formatter:off
+    RelNode plan = b
+        .scan("t1")
+        .scan("t3")
+        .join(JoinRelType.LEFT, b.equals(
+            b.field(2, 0, "t1nullable"),
+            b.field(2, 1, "t3id")))
+        .filter(b.isNull(b.field("t3notnull")))
+        .project(b.field("t1id"))
+        .build();
+
+    String prePlan = "HiveProject(t1id=[$0])\n"
+        + "  HiveFilter(condition=[IS NULL($4)])\n"
+        + "    HiveJoin(condition=[=($2, $3)], joinType=[left], 
algorithm=[none], cost=[not available])\n"
+        + "      LogicalTableScan(table=[[t1]])\n"
+        + "      LogicalTableScan(table=[[t3]])\n";
+
+    String postPlan = "HiveProject(t1id=[$0])\n"
+        + "  HiveAntiJoin(condition=[=($2, $3)], joinType=[anti])\n"
+        + "    LogicalTableScan(table=[[t1]])\n"
+        + "    LogicalTableScan(table=[[t3]])\n";
+    // @formatter:on
+
+    assertPlans(fixture.getPlanner(), plan, prePlan, postPlan);
+  }
+
+  @Test
+  public void testFilterOnNullAndNotNullColumn() {
+    PlanFixture fixture = fixture();
+    RelBuilder b = fixture.createRelBuilder();
+
+    // @formatter:off
+    RelNode plan = b
+        .scan("t1")
+        .scan("t2")
+        .join(JoinRelType.LEFT, b.equals(
+            b.field(2, 0, "t1nullable"),
+            b.field(2, 1, "t2id")))
+        .filter(b.and(b.isNull(b.field("t2notnull")), 
b.isNull((b.field("t2nullable")))))
+        .project(b.field("t1id"))
+        .build();
+
+    String prePlan = "HiveProject(t1id=[$0])\n"
+        + "  HiveFilter(condition=[AND(IS NULL($4), IS NULL($5))])\n"
+        + "    HiveJoin(condition=[=($2, $3)], joinType=[left], 
algorithm=[none], cost=[not available])\n"
+        + "      LogicalTableScan(table=[[t1]])\n"
+        + "      LogicalTableScan(table=[[t2]])\n";
+
+    String postPlan = "HiveProject(t1id=[$0])\n"
+        + "  HiveAntiJoin(condition=[=($2, $3)], joinType=[anti])\n"
+        + "    LogicalTableScan(table=[[t1]])\n"
+        + "    LogicalTableScan(table=[[t2]])\n";
+    // @formatter:on
+
+    assertPlans(fixture.getPlanner(), plan, prePlan, postPlan);
+  }
+
+  static class T1Record {
+    public int t1id;
+    public int t1notnull;
+    public Integer t1nullable;
+  }
+
+  static class T2Record {
+    public int t2id;
+    public int t2notnull;
+    public Integer t2nullable;
+  }
+
+  static class T3Record {
+    public int t3id;
+    public int t3notnull;
+  }
+}
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestRuleHelper.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestRuleHelper.java
index 8c49f58c424..937e2c0499d 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestRuleHelper.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestRuleHelper.java
@@ -19,12 +19,7 @@
 package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 
 import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
-import org.apache.calcite.plan.AbstractRelOptPlanner;
-import org.apache.calcite.plan.RelOptCluster;
-import org.apache.calcite.plan.RelOptPlanner;
-import org.apache.calcite.plan.RelOptRule;
-import org.apache.calcite.plan.RelOptSchema;
-import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.*;
 import org.apache.calcite.plan.hep.HepPlanner;
 import org.apache.calcite.plan.hep.HepProgramBuilder;
 import org.apache.calcite.rel.RelNode;
@@ -37,19 +32,13 @@
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
-import org.junit.Test;
-import org.junit.runner.RunWith;
 import org.mockito.ArgumentMatchers;
-import org.mockito.Mock;
-import org.mockito.junit.MockitoJUnitRunner;
 
-import java.util.Collection;
-import java.util.Collections;
+import java.util.*;
 
 import static org.junit.Assert.assertEquals;
 import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.Mockito.doReturn;
-import static org.mockito.Mockito.lenient;
+import static org.mockito.Mockito.*;
 
 public class TestRuleHelper {
 
@@ -73,7 +62,99 @@ public static AbstractRelOptPlanner 
buildPlanner(Collection<RelOptRule> rules) {
     return new HepPlanner(programBuilder.build());
   }
 
-  public static RelBuilder buildRelBuilder(AbstractRelOptPlanner planner,
+  public static class HiveTableMock {
+    final Class<?> recordClass;
+    final RelOptHiveTable tableMock;
+    final Table hiveTableMock;
+    final List<String> name;
+
+    public HiveTableMock(List<String> name, Class<?> recordClass, PlanFixture 
mockBuilder) {
+      this.name = List.copyOf(name);
+      this.recordClass = recordClass;
+      tableMock = mock(RelOptHiveTable.class);
+      hiveTableMock = mock(Table.class);
+
+      RelDataType rowTypeMock = 
JAVA_TYPE_FACTORY.createStructType(recordClass);
+      doReturn(rowTypeMock).when(tableMock).getRowType();
+
+      LogicalTableScan tableScan = 
LogicalTableScan.create(mockBuilder.optCluster, tableMock, 
Collections.emptyList());
+      doReturn(tableScan).when(tableMock).toRel(ArgumentMatchers.any());
+
+      doReturn(this.name).when(tableMock).getQualifiedName();
+
+      lenient().doReturn(hiveTableMock).when(tableMock).getHiveTableMD();
+    }
+  }
+
+  /**
+   * A fixture for creating plans with <code>HiveRelNode</code>s.
+   */
+  public static class PlanFixture {
+    final RelOptCluster optCluster;
+
+    final Map<List<String>, HiveTableMock> tables = new HashMap<>();
+
+    Class<?> defaultRecordClass;
+
+    public PlanFixture(RelOptPlanner planner) {
+      RexBuilder rexBuilder = new RexBuilder(JAVA_TYPE_FACTORY);
+      optCluster = RelOptCluster.create(planner, rexBuilder);
+    }
+
+    /**
+     * Register a table in the schema, using the attributes of the class as 
columns.
+     */
+    public PlanFixture registerTable(String name, Class<?> recordClass) {
+      return registerTable(List.of(name), recordClass);
+    }
+
+    /**
+     * Similar to {@link #registerTable(String, Class)}, but with a qualified 
name.
+     * <p>
+     * See {@link RelOptTable#getQualifiedName()}.
+     */
+    public PlanFixture registerTable(List<String> name, Class<?> recordClass) {
+      name = List.copyOf(name);
+      tables.put(name, new HiveTableMock(name, recordClass, this));
+      return this;
+    }
+
+    /**
+     * Allows to use any table names when scanning.
+     * <p>
+     * The scanned table will provide the attributes of the class as columns.
+     */
+    public PlanFixture setDefaultRecordClass(Class<?> recordClass) {
+      this.defaultRecordClass = recordClass;
+      return this;
+    }
+
+    public RelOptPlanner getPlanner() {
+      return optCluster.getPlanner();
+    }
+
+    public RelBuilder createRelBuilder() {
+      final RelOptSchema schemaMock;
+      schemaMock = mock(RelOptSchema.class);
+      // create a copy that we can modify in our method
+      Map<List<String>, HiveTableMock> tableMap = new HashMap<>(tables);
+
+      when(schemaMock.getTableForMember(any())).thenAnswer(i -> {
+        List<String> tableName = i.getArgument(0);
+        HiveTableMock hiveTableMock = tableMap.get(tableName);
+        if(hiveTableMock == null) {
+          Objects.requireNonNull(defaultRecordClass, "Table " + tableName + " 
was not registered with the mock, and no default table provided");
+          hiveTableMock = new HiveTableMock(tableName, defaultRecordClass, 
this);
+          tableMap.put(tableName, hiveTableMock);
+        }
+        return hiveTableMock.tableMock;
+      } );
+
+      return HiveRelFactories.HIVE_BUILDER.create(optCluster, schemaMock);
+    }
+  }
+
+  public static RelBuilder buildRelBuilder(RelOptPlanner planner,
       RelOptSchema schemaMock, RelOptHiveTable tableMock, Table hiveTableMock, 
Class<?> clazz) {
 
     RexBuilder rexBuilder = new RexBuilder(JAVA_TYPE_FACTORY);
@@ -101,7 +182,7 @@ static RexNode and(RelBuilder relBuilder, RexNode... args) {
     return relBuilder.call(SqlStdOperatorTable.AND, args);
   }
 
-  static void assertPlans(AbstractRelOptPlanner planner, RelNode plan, String 
expectedPrePlan, String expectedPostPlan) {
+  static void assertPlans(RelOptPlanner planner, RelNode plan, String 
expectedPrePlan, String expectedPostPlan) {
     planner.setRoot(plan);
     RelNode optimizedRelNode = planner.findBestExp();
     assertEquals("Original plans do not match", expectedPrePlan, 
RelOptUtil.toString(plan));
diff --git a/ql/src/test/queries/clientpositive/antijoin4.q 
b/ql/src/test/queries/clientpositive/antijoin4.q
new file mode 100644
index 00000000000..81cb5ce9954
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/antijoin4.q
@@ -0,0 +1,64 @@
+SET hive.vectorized.execution.enabled=false;
+set hive.mapred.mode=nonstrict;
+SET hive.auto.convert.join=false;
+SET hive.auto.convert.anti.join=true;
+-- SORT_QUERY_RESULTS
+
+create table antijoin3_t1 (t1id int not null, t1notnull string not null, 
t1nullable string);
+create table antijoin3_t2 (t2id int not null, t2notnull string not null, 
t2nullable string);
+create table antijoin3_t3 (t3id int not null);
+
+insert into antijoin3_t1 values
+(0, "val_0", null),
+(1, "val_1", null),
+(2, "val_2", "val_2"),
+(3, "val_3", "val_3"),
+(4, "val_4", "val_4");
+
+insert into antijoin3_t2 values
+(0, "val_0", null),
+(1, "val_1", null),
+(4, "val_4", "val_4"),
+(5, "val_5", "val_5");
+
+insert into antijoin3_t3 values (0), (4), (6);
+
+-- do not introduce anti-join if filtering a nullable column with IS NULL
+explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join 
antijoin3_t2 t2 on t1id=t2id where t2nullable is null;
+select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 
t2 on t1id=t2id where t2nullable is null;
+
+-- but introduce anti-join if filtering a NOT NULL column with IS NULL
+explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join 
antijoin3_t2 t2 on t1id=t2id where t2notnull is null;
+select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 
t2 on t1id=t2id where t2notnull is null;
+
+-- play it safe and do not introduce antijoin for filters combining LHS and 
RHS columns
+explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join 
antijoin3_t2 t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null;
+select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left join antijoin3_t2 
t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null;
+
+-- selecting constants do not prevent an anti-join (HIVE-29164)
+explain cbo select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 t1 
left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null;
+select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 t1 left join 
antijoin3_t2 t2 on t1id=t2id where t2notnull is null;
+
+-- check whether nullability is propagated correctly
+explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+where t2notnull is null;
+
+select t1id, t1notnull, t1nullable from antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+where t2notnull is null;
+
+explain cbo select t1id, t1notnull, t1nullable from antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+-- t3id is from the RHS of the left join, so it becomes nullable, so no 
antijoin
+where t3id is null;
+
+select t1id, t1notnull, t1nullable from antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+-- t3id is from the RHS of the left join, so it becomes nullable, so no 
antijoin
+where t3id is null;
+
diff --git a/ql/src/test/results/clientpositive/llap/antijoin3.q.out 
b/ql/src/test/results/clientpositive/llap/antijoin3.q.out
index b12a4fe603f..1451f519e47 100644
--- a/ql/src/test/results/clientpositive/llap/antijoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/antijoin3.q.out
@@ -177,7 +177,7 @@ POSTHOOK: Input: default@tabb
 POSTHOOK: Input: default@tabc
 #### A masked pattern was here ####
 CBO PLAN:
-HiveProject(id=[$1], id1=[null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], 
id2=[$0])
+HiveProject(a.id=[$1], b.id=[null:VARCHAR(2147483647) CHARACTER SET 
"UTF-16LE"], c.id=[$0])
   HiveJoin(condition=[=($0, $1)], joinType=[right], algorithm=[none], 
cost=[not available])
     HiveProject(id=[$0])
       HiveFilter(condition=[IS NOT NULL($0)])
diff --git a/ql/src/test/results/clientpositive/llap/antijoin4.q.out 
b/ql/src/test/results/clientpositive/llap/antijoin4.q.out
new file mode 100644
index 00000000000..62cbf85908d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/antijoin4.q.out
@@ -0,0 +1,294 @@
+PREHOOK: query: create table antijoin3_t1 (t1id int not null, t1notnull string 
not null, t1nullable string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@antijoin3_t1
+POSTHOOK: query: create table antijoin3_t1 (t1id int not null, t1notnull 
string not null, t1nullable string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@antijoin3_t1
+PREHOOK: query: create table antijoin3_t2 (t2id int not null, t2notnull string 
not null, t2nullable string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@antijoin3_t2
+POSTHOOK: query: create table antijoin3_t2 (t2id int not null, t2notnull 
string not null, t2nullable string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@antijoin3_t2
+PREHOOK: query: create table antijoin3_t3 (t3id int not null)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@antijoin3_t3
+POSTHOOK: query: create table antijoin3_t3 (t3id int not null)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@antijoin3_t3
+PREHOOK: query: insert into antijoin3_t1 values
+(0, "val_0", null),
+(1, "val_1", null),
+(2, "val_2", "val_2"),
+(3, "val_3", "val_3"),
+(4, "val_4", "val_4")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@antijoin3_t1
+POSTHOOK: query: insert into antijoin3_t1 values
+(0, "val_0", null),
+(1, "val_1", null),
+(2, "val_2", "val_2"),
+(3, "val_3", "val_3"),
+(4, "val_4", "val_4")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@antijoin3_t1
+POSTHOOK: Lineage: antijoin3_t1.t1id SCRIPT []
+POSTHOOK: Lineage: antijoin3_t1.t1notnull SCRIPT []
+POSTHOOK: Lineage: antijoin3_t1.t1nullable SCRIPT []
+PREHOOK: query: insert into antijoin3_t2 values
+(0, "val_0", null),
+(1, "val_1", null),
+(4, "val_4", "val_4"),
+(5, "val_5", "val_5")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@antijoin3_t2
+POSTHOOK: query: insert into antijoin3_t2 values
+(0, "val_0", null),
+(1, "val_1", null),
+(4, "val_4", "val_4"),
+(5, "val_5", "val_5")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@antijoin3_t2
+POSTHOOK: Lineage: antijoin3_t2.t2id SCRIPT []
+POSTHOOK: Lineage: antijoin3_t2.t2notnull SCRIPT []
+POSTHOOK: Lineage: antijoin3_t2.t2nullable SCRIPT []
+PREHOOK: query: insert into antijoin3_t3 values (0), (4), (6)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@antijoin3_t3
+POSTHOOK: query: insert into antijoin3_t3 values (0), (4), (6)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@antijoin3_t3
+POSTHOOK: Lineage: antijoin3_t3.t3id SCRIPT []
+PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2nullable is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2nullable is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2])
+  HiveFilter(condition=[IS NULL($4)])
+    HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], 
cost=[not available])
+      HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2])
+        HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1])
+      HiveProject(t2id=[$0], t2nullable=[$2])
+        HiveTableScan(table=[[default, antijoin3_t2]], table:alias=[t2])
+
+PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left 
join antijoin3_t2 t2 on t1id=t2id where t2nullable is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left 
join antijoin3_t2 t2 on t1id=t2id where t2nullable is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+0      val_0   NULL
+1      val_1   NULL
+2      val_2   val_2
+3      val_3   val_3
+PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2])
+  HiveAntiJoin(condition=[=($0, $3)], joinType=[anti])
+    HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2])
+      HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1])
+    HiveProject(t2id=[$0], t2notnull=[$1])
+      HiveTableScan(table=[[default, antijoin3_t2]], table:alias=[t2])
+
+PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left 
join antijoin3_t2 t2 on t1id=t2id where t2notnull is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left 
join antijoin3_t2 t2 on t1id=t2id where t2notnull is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+2      val_2   val_2
+3      val_3   val_3
+PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where 
(coalesce(t1notnull,t2notnull)) is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where 
(coalesce(t1notnull,t2notnull)) is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveValues(tuples=[[]])
+
+PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left 
join antijoin3_t2 t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1 left 
join antijoin3_t2 t2 on t1id=t2id where (coalesce(t1notnull,t2notnull)) is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable, "foo" from 
antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable, "foo" from 
antijoin3_t1 t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2], 
_c3=[_UTF-16LE'foo':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"])
+  HiveAntiJoin(condition=[=($0, $3)], joinType=[anti])
+    HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2])
+      HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1])
+    HiveProject(t2id=[$0], t2notnull=[$1])
+      HiveTableScan(table=[[default, antijoin3_t2]], table:alias=[t2])
+
+PREHOOK: query: select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 t1 
left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+POSTHOOK: query: select t1id, t1notnull, t1nullable, "foo" from antijoin3_t1 
t1 left join antijoin3_t2 t2 on t1id=t2id where t2notnull is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+#### A masked pattern was here ####
+2      val_2   val_2   foo
+3      val_3   val_3   foo
+PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+where t2notnull is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+PREHOOK: Input: default@antijoin3_t3
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+where t2notnull is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+POSTHOOK: Input: default@antijoin3_t3
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2])
+  HiveAntiJoin(condition=[=($0, $3)], joinType=[anti])
+    HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2])
+      HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1])
+    HiveProject(t2id=[$0], t2notnull=[$1], t3id=[$2])
+      HiveJoin(condition=[=($0, $2)], joinType=[left], algorithm=[none], 
cost=[not available])
+        HiveProject(t2id=[$0], t2notnull=[$1])
+          HiveTableScan(table=[[default, antijoin3_t2]], 
table:alias=[antijoin3_t2])
+        HiveProject(t3id=[$0])
+          HiveTableScan(table=[[default, antijoin3_t3]], 
table:alias=[antijoin3_t3])
+
+PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+where t2notnull is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+PREHOOK: Input: default@antijoin3_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+where t2notnull is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+POSTHOOK: Input: default@antijoin3_t3
+#### A masked pattern was here ####
+2      val_2   val_2
+3      val_3   val_3
+PREHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+
+where t3id is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+PREHOOK: Input: default@antijoin3_t3
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select t1id, t1notnull, t1nullable from 
antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+
+where t3id is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+POSTHOOK: Input: default@antijoin3_t3
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2])
+  HiveFilter(condition=[IS NULL($4)])
+    HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], 
cost=[not available])
+      HiveProject(t1id=[$0], t1notnull=[$1], t1nullable=[$2])
+        HiveTableScan(table=[[default, antijoin3_t1]], table:alias=[t1])
+      HiveJoin(condition=[=($0, $1)], joinType=[left], algorithm=[none], 
cost=[not available])
+        HiveProject(t2id=[$0])
+          HiveTableScan(table=[[default, antijoin3_t2]], 
table:alias=[antijoin3_t2])
+        HiveProject(t3id=[$0])
+          HiveTableScan(table=[[default, antijoin3_t3]], 
table:alias=[antijoin3_t3])
+
+PREHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+
+where t3id is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@antijoin3_t1
+PREHOOK: Input: default@antijoin3_t2
+PREHOOK: Input: default@antijoin3_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select t1id, t1notnull, t1nullable from antijoin3_t1 t1
+left join (select * from antijoin3_t2 left join antijoin3_t3 on t2id=t3id) sq
+on t1id=t2id
+
+where t3id is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@antijoin3_t1
+POSTHOOK: Input: default@antijoin3_t2
+POSTHOOK: Input: default@antijoin3_t3
+#### A masked pattern was here ####
+1      val_1   NULL
+2      val_2   val_2
+3      val_3   val_3
diff --git a/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out 
b/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out
index f9f128b394c..ba1d7fdde05 100644
--- a/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_smb_ptf.q.out
@@ -586,12 +586,18 @@ STAGE PLANS:
                   expressions: _col0 (type: int)
                   outputColumnNames: _col0
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    null sort order: z
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
+                  Group By Operator
+                    keys: _col0 (type: int)
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0
                     Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL 
Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 0 Basic stats: 
PARTIAL Column stats: COMPLETE
         Reducer 2 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
@@ -663,8 +669,8 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12
                 Statistics: Num rows: 1 Data size: 304 Basic stats: PARTIAL 
Column stats: NONE
                 Select Operator
-                  expressions: _col7 (type: int), _col0 (type: smallint), 
_col1 (type: string), _col2 (type: smallint), _col3 (type: string), _col5 
(type: smallint), _col6 (type: string), _col8 (type: smallint), _col9 (type: 
smallint), _col10 (type: string), _col11 (type: string), _col12 (type: string)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12
+                  expressions: _col7 (type: int), _col0 (type: smallint), 
_col1 (type: string), _col2 (type: smallint), _col3 (type: string), _col5 
(type: smallint), _col6 (type: string), _col8 (type: smallint), _col9 (type: 
smallint), _col11 (type: string), _col12 (type: string), _col10 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11
                   Statistics: Num rows: 1 Data size: 304 Basic stats: PARTIAL 
Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: int)
@@ -672,36 +678,29 @@ STAGE PLANS:
                     sort order: +
                     Map-reduce partition columns: _col0 (type: int)
                     Statistics: Num rows: 1 Data size: 304 Basic stats: 
PARTIAL Column stats: NONE
-                    value expressions: _col1 (type: smallint), _col2 (type: 
string), _col3 (type: smallint), _col4 (type: string), _col6 (type: smallint), 
_col7 (type: string), _col8 (type: smallint), _col9 (type: smallint), _col10 
(type: string), _col11 (type: string), _col12 (type: string)
+                    value expressions: _col1 (type: smallint), _col2 (type: 
string), _col3 (type: smallint), _col4 (type: string), _col5 (type: smallint), 
_col6 (type: string), _col7 (type: smallint), _col8 (type: smallint), _col9 
(type: string), _col10 (type: string), _col11 (type: string)
         Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
+                     Anti Join 0 to 1
                 keys:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12, _col14
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11
                 Statistics: Num rows: 1 Data size: 334 Basic stats: PARTIAL 
Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col1 (type: smallint), 
_col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col6 
(type: smallint), _col7 (type: string), _col8 (type: smallint), _col9 (type: 
smallint), _col11 (type: string), _col12 (type: string), _col10 (type: string), 
_col14 (type: int)
+                  expressions: _col0 (type: int), _col1 (type: smallint), 
_col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col5 
(type: smallint), _col6 (type: string), _col7 (type: smallint), _col8 (type: 
smallint), _col9 (type: string), _col10 (type: string), _col11 (type: string), 
'201611160940' (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
                   Statistics: Num rows: 1 Data size: 334 Basic stats: PARTIAL 
Column stats: NONE
-                  Filter Operator
-                    predicate: _col12 is null (type: boolean)
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 1 Data size: 334 Basic stats: 
PARTIAL Column stats: NONE
-                    Select Operator
-                      expressions: _col0 (type: int), _col1 (type: smallint), 
_col2 (type: string), _col3 (type: smallint), _col4 (type: string), _col5 
(type: smallint), _col6 (type: string), _col7 (type: smallint), _col8 (type: 
smallint), _col9 (type: string), _col10 (type: string), _col11 (type: string), 
'201611160940' (type: string)
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
-                      Statistics: Num rows: 1 Data size: 334 Basic stats: 
PARTIAL Column stats: NONE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 1 Data size: 334 Basic stats: 
PARTIAL Column stats: NONE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 7 
             Execution mode: vectorized, llap
             Reduce Operator Tree:

(hive) 03/05: HIVE-29176: Wrong result when HiveAntiJoin is replacing an IS NULL filter on a nullable column (#6062)

Reply via email to