rubenada commented on code in PR #6477:
URL: https://github.com/apache/hive/pull/6477#discussion_r3250704963
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java:
##########
@@ -603,6 +605,151 @@ private Optional<Float> extractLiteral(SqlTypeName
typeName, Object boundValueOb
return Optional.of(value);
}
+ private double computeSearchSelectivity(RexCall search) {
+ return new SearchSelectivityHelper<>(search).compute();
+ }
+
+ /**
+ * Similar to {@link SearchTransformer}, but computing the selectivity of
the expression.
+ */
+ private final class SearchSelectivityHelper<C extends Comparable<C>> {
+ private final RexNode ref;
+ private final Sarg<C> sarg;
+ private final RelDataType operandType;
+
+ private SearchSelectivityHelper(RexCall search) {
+ ref = search.getOperands().get(0);
+ RexLiteral literal = (RexLiteral) search.operands.get(1);
+ sarg = Objects.requireNonNull(literal.getValueAs(Sarg.class), "Sarg");
+ operandType = literal.getType();
+ }
+
+ private RexNode makeLiteral(C value) {
+ return rexBuilder.makeLiteral(value, operandType, true, true);
+ }
+
+ private double compute() {
+ final List<Double> selectivityList = new ArrayList<>();
+ final List<RexNode> inLiterals = new ArrayList<>();
+
+ if (sarg.nullAs == RexUnknownAs.TRUE) {
+ selectivityList.add(
+ rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL,
ref).accept(FilterSelectivityEstimator.this));
+ }
+
+ RangeSets.forEach(sarg.rangeSet, new RangeSets.Consumer<C>() {
+ @Override
+ public void all() {
+ selectivityList.add(1.0);
+ }
+
+ @Override
+ public void singleton(C value) {
+ inLiterals.add(rexBuilder.makeLiteral(value, operandType, true,
true));
+ }
+
+ @Override
+ public void atLeast(C lower) {
+ Optional<Float> lowerLiteral = extractLiteral(makeLiteral(lower));
+ if (lowerLiteral.isEmpty()) {
+ selectivityList.add(DEFAULT_COMPARISON_SELECTIVITY);
+ } else {
+ processRange(() -> DEFAULT_COMPARISON_SELECTIVITY,
+ Range.range(lowerLiteral.get(), BoundType.CLOSED,
Float.POSITIVE_INFINITY, BoundType.CLOSED));
+ }
+ }
+
+ @Override
+ public void atMost(C upper) {
+ Optional<Float> upperLiteral = extractLiteral(makeLiteral(upper));
+ if (upperLiteral.isEmpty()) {
+ selectivityList.add(DEFAULT_COMPARISON_SELECTIVITY);
+ } else {
+ processRange(() -> DEFAULT_COMPARISON_SELECTIVITY,
+ Range.range(Float.NEGATIVE_INFINITY, BoundType.CLOSED,
upperLiteral.get(), BoundType.CLOSED));
+ }
+ }
+
+ @Override
+ public void greaterThan(C lower) {
+ Optional<Float> lowerLiteral = extractLiteral(makeLiteral(lower));
+ if (lowerLiteral.isEmpty()) {
+ selectivityList.add(DEFAULT_COMPARISON_SELECTIVITY);
+ } else {
+ processRange(() -> DEFAULT_COMPARISON_SELECTIVITY,
+ Range.range(lowerLiteral.get(), BoundType.OPEN,
Float.POSITIVE_INFINITY, BoundType.CLOSED));
+ }
+ }
+
+ @Override
+ public void lessThan(C upper) {
+ Optional<Float> upperLiteral = extractLiteral(makeLiteral(upper));
+ if (upperLiteral.isEmpty()) {
+ selectivityList.add(DEFAULT_COMPARISON_SELECTIVITY);
+ } else {
+ processRange(() -> DEFAULT_COMPARISON_SELECTIVITY,
+ Range.range(Float.NEGATIVE_INFINITY, BoundType.CLOSED,
upperLiteral.get(), BoundType.OPEN));
+ }
+ }
+
+ @Override
+ public void closed(C lower, C upper) {
+ processRange(lower, BoundType.CLOSED, upper, BoundType.CLOSED);
+ }
+
+ @Override
+ public void closedOpen(C lower, C upper) {
+ processRange(lower, BoundType.CLOSED, upper, BoundType.OPEN);
+ }
+
+ @Override
+ public void openClosed(C lower, C upper) {
+ processRange(lower, BoundType.OPEN, upper, BoundType.CLOSED);
+ }
+
+ @Override
+ public void open(C lower, C upper) {
+ processRange(lower, BoundType.OPEN, upper, BoundType.OPEN);
+ }
+
+ private void processRange(C lower, BoundType lowerBoundType, C upper,
BoundType upperBoundType) {
+ RexNode lowerRexLiteral = makeLiteral(lower);
+ RexNode upperRexLiteral = makeLiteral(upper);
+ Supplier<Double> defaultSelectivity =
+ () -> computeFunctionSelectivity(List.of(ref, lowerRexLiteral,
upperRexLiteral));
+ Optional<Float> lowerLiteral = extractLiteral(lowerRexLiteral);
+ Optional<Float> upperLiteral = extractLiteral(upperRexLiteral);
+ if (lowerLiteral.isEmpty() || upperLiteral.isEmpty()) {
+ selectivityList.add(defaultSelectivity.get());
+ } else {
+ processRange(defaultSelectivity,
+ Range.range(lowerLiteral.get(), lowerBoundType,
upperLiteral.get(), upperBoundType));
+ }
+ }
+
+ private void processRange(Supplier<Double> defaultSelectivity,
Range<Float> boundaries) {
+
selectivityList.add(computeRangePredicateSelectivity(defaultSelectivity, ref,
boundaries));
+ }
+ });
+
+ switch (inLiterals.size()) {
+ case 0:
+ break;
+ case 1:
+ selectivityList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
ref, inLiterals.get(0))
+ .accept(FilterSelectivityEstimator.this));
+ break;
+ default:
+ List<RexNode> operands = new ArrayList<>(inLiterals.size() + 1);
+ operands.add(ref);
+ operands.addAll(inLiterals);
+ selectivityList.add(rexBuilder.makeCall(HiveIn.INSTANCE,
operands).accept(FilterSelectivityEstimator.this));
+ }
+
+ return selectivityList.size() == 1 ? selectivityList.get(0) :
computeDisjunctionSelectivity(selectivityList);
Review Comment:
Indeed. I have applied the change to add the ranges selectivities (and it
gives better results in certain tests, as expected).
I have kept the "disjunction logic" for the combination with the other
expressions (EQ/IN, IS_NULL), since it seems more aligned with how this class
works in general when computing OR-combined RexNodes. But I agree this can be
revisits in the future.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]