Re: [PR] HIVE-29479: Improve histogram-based selectivity estimation for two-sided range predicates [hive]

via GitHub Fri, 15 May 2026 13:06:59 -0700


rubenada commented on code in PR #6477:
URL: https://github.com/apache/hive/pull/6477#discussion_r3250704963



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java:
##########
@@ -603,6 +605,151 @@ private Optional<Float> extractLiteral(SqlTypeName 
typeName, Object boundValueOb
     return Optional.of(value);
   }
 
+  private double computeSearchSelectivity(RexCall search) {
+    return new SearchSelectivityHelper<>(search).compute();
+  }
+
+  /**
+   * Similar to {@link SearchTransformer}, but computing the selectivity of 
the expression.
+   */
+  private final class SearchSelectivityHelper<C extends Comparable<C>> {
+    private final RexNode ref;
+    private final Sarg<C> sarg;
+    private final RelDataType operandType;
+
+    private SearchSelectivityHelper(RexCall search) {
+      ref = search.getOperands().get(0);
+      RexLiteral literal = (RexLiteral) search.operands.get(1);
+      sarg = Objects.requireNonNull(literal.getValueAs(Sarg.class), "Sarg");
+      operandType = literal.getType();
+    }
+
+    private RexNode makeLiteral(C value) {
+      return rexBuilder.makeLiteral(value, operandType, true, true);
+    }
+
+    private double compute() {
+      final List<Double> selectivityList = new ArrayList<>();
+      final List<RexNode> inLiterals = new ArrayList<>();
+
+      if (sarg.nullAs == RexUnknownAs.TRUE) {
+        selectivityList.add(
+            rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, 
ref).accept(FilterSelectivityEstimator.this));
+      }
+
+      RangeSets.forEach(sarg.rangeSet, new RangeSets.Consumer<C>() {
+        @Override
+        public void all() {
+          selectivityList.add(1.0);
+        }
+
+        @Override
+        public void singleton(C value) {
+          inLiterals.add(rexBuilder.makeLiteral(value, operandType, true, 
true));
+        }
+
+        @Override
+        public void atLeast(C lower) {
+          Optional<Float> lowerLiteral = extractLiteral(makeLiteral(lower));
+          if (lowerLiteral.isEmpty()) {
+            selectivityList.add(DEFAULT_COMPARISON_SELECTIVITY);
+          } else {
+            processRange(() -> DEFAULT_COMPARISON_SELECTIVITY,
+                Range.range(lowerLiteral.get(), BoundType.CLOSED, 
Float.POSITIVE_INFINITY, BoundType.CLOSED));
+          }
+        }
+
+        @Override
+        public void atMost(C upper) {
+          Optional<Float> upperLiteral = extractLiteral(makeLiteral(upper));
+          if (upperLiteral.isEmpty()) {
+            selectivityList.add(DEFAULT_COMPARISON_SELECTIVITY);
+          } else {
+            processRange(() -> DEFAULT_COMPARISON_SELECTIVITY,
+                Range.range(Float.NEGATIVE_INFINITY, BoundType.CLOSED, 
upperLiteral.get(), BoundType.CLOSED));
+          }
+        }
+
+        @Override
+        public void greaterThan(C lower) {
+          Optional<Float> lowerLiteral = extractLiteral(makeLiteral(lower));
+          if (lowerLiteral.isEmpty()) {
+            selectivityList.add(DEFAULT_COMPARISON_SELECTIVITY);
+          } else {
+            processRange(() -> DEFAULT_COMPARISON_SELECTIVITY,
+                Range.range(lowerLiteral.get(), BoundType.OPEN, 
Float.POSITIVE_INFINITY, BoundType.CLOSED));
+          }
+        }
+
+        @Override
+        public void lessThan(C upper) {
+          Optional<Float> upperLiteral = extractLiteral(makeLiteral(upper));
+          if (upperLiteral.isEmpty()) {
+            selectivityList.add(DEFAULT_COMPARISON_SELECTIVITY);
+          } else {
+            processRange(() -> DEFAULT_COMPARISON_SELECTIVITY,
+                Range.range(Float.NEGATIVE_INFINITY, BoundType.CLOSED, 
upperLiteral.get(), BoundType.OPEN));
+          }
+        }
+
+        @Override
+        public void closed(C lower, C upper) {
+          processRange(lower, BoundType.CLOSED, upper, BoundType.CLOSED);
+        }
+
+        @Override
+        public void closedOpen(C lower, C upper) {
+          processRange(lower, BoundType.CLOSED, upper, BoundType.OPEN);
+        }
+
+        @Override
+        public void openClosed(C lower, C upper) {
+          processRange(lower, BoundType.OPEN, upper, BoundType.CLOSED);
+        }
+
+        @Override
+        public void open(C lower, C upper) {
+          processRange(lower, BoundType.OPEN, upper, BoundType.OPEN);
+        }
+
+        private void processRange(C lower, BoundType lowerBoundType, C upper, 
BoundType upperBoundType) {
+          RexNode lowerRexLiteral = makeLiteral(lower);
+          RexNode upperRexLiteral = makeLiteral(upper);
+          Supplier<Double> defaultSelectivity =
+              () -> computeFunctionSelectivity(List.of(ref, lowerRexLiteral, 
upperRexLiteral));
+          Optional<Float> lowerLiteral = extractLiteral(lowerRexLiteral);
+          Optional<Float> upperLiteral = extractLiteral(upperRexLiteral);
+          if (lowerLiteral.isEmpty() || upperLiteral.isEmpty()) {
+            selectivityList.add(defaultSelectivity.get());
+          } else {
+            processRange(defaultSelectivity,
+                Range.range(lowerLiteral.get(), lowerBoundType, 
upperLiteral.get(), upperBoundType));
+          }
+        }
+
+        private void processRange(Supplier<Double> defaultSelectivity, 
Range<Float> boundaries) {
+          
selectivityList.add(computeRangePredicateSelectivity(defaultSelectivity, ref, 
boundaries));
+        }
+      });
+
+      switch (inLiterals.size()) {
+      case 0:
+        break;
+      case 1:
+        selectivityList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, 
ref, inLiterals.get(0))
+            .accept(FilterSelectivityEstimator.this));
+        break;
+      default:
+        List<RexNode> operands = new ArrayList<>(inLiterals.size() + 1);
+        operands.add(ref);
+        operands.addAll(inLiterals);
+        selectivityList.add(rexBuilder.makeCall(HiveIn.INSTANCE, 
operands).accept(FilterSelectivityEstimator.this));
+      }
+
+      return selectivityList.size() == 1 ? selectivityList.get(0) : 
computeDisjunctionSelectivity(selectivityList);

Review Comment:
   Indeed. I have applied the change to add the ranges selectivities (and it 
gives better results in certain tests, as expected).
   I have kept the "disjunction logic" for the combination with the other 
expressions (EQ/IN, IS_NULL), since it seems more aligned with how this class 
works in general when computing OR-combined RexNodes. But I agree this can be 
revisits in the future.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] HIVE-29479: Improve histogram-based selectivity estimation for two-sided range predicates [hive]

Reply via email to