thomasrebele commented on code in PR #6293:
URL: https://github.com/apache/hive/pull/6293#discussion_r2872837427
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java:
##########
@@ -295,21 +314,54 @@ private static Range<Float>
getRangeOfDecimalType(RelDataType type, BoundType lo
* @param typeRange the boundaries of the type range
* @return the adjusted boundary
*/
- private static Range<Float> adjustRangeToDecimalType(Range<Float>
predicateRange, RelDataType type,
+ private static Range<Float> adjustRangeToType(Range<Float> predicateRange,
RelDataType type,
Range<Float> typeRange) {
- float adjust = (float) (5 * Math.pow(10, -(type.getScale() + 1)));
- // the resulting value of +- adjust would be rounded up, so in some cases
we need to use Math.nextDown
boolean lowerInclusive =
BoundType.CLOSED.equals(predicateRange.lowerBoundType());
boolean upperInclusive =
BoundType.CLOSED.equals(predicateRange.upperBoundType());
- float adjusted1 = lowerInclusive ? predicateRange.lowerEndpoint() - adjust
- : Math.nextDown(predicateRange.lowerEndpoint() + adjust);
- float adjusted2 = upperInclusive ?
Math.nextDown(predicateRange.upperEndpoint() + adjust)
- : predicateRange.upperEndpoint() - adjust;
- float lower = Math.max(adjusted1, typeRange.lowerEndpoint());
- float upper = Math.min(adjusted2, typeRange.upperEndpoint());
- // the boundaries might result in an invalid range (e.g., left > right)
- // in that case the predicate does not select anything, and we return an
empty range
- return makeRange(lower, predicateRange.lowerBoundType(), upper,
predicateRange.upperBoundType());
+ switch (type.getSqlTypeName()) {
+ case TINYINT, SMALLINT, INTEGER, BIGINT: {
+ // when casting a floating point, its values are rounded towards 0
+ // i.e, 10.99 is rounded to 10, and -10.99 is rounded to -10
+ // to take this into account, the predicate range is transformed in the
following ways
+ // [10.0, 15.0] -> [10, 15.99999]
+ // (10.0, 15.0) -> [11, 14.99999]
+ // [10.2, 15.2] -> [11, 15.99999]
+ // (10.2, 15.2) -> [11, 15.99999]
+
+ // [-15.0, -10.0] -> [-15.9999, -10]
+ // (-15.0, -10.0) -> [-14.9999, -11]
+ // [-15.2, -10.2] -> [-15.9999, -11]
+ // (-15.2, -10.2) -> [-15.9999, -11]
+
+ // normalize the range to make the formulas easier
+ Range<Float> range = convertRangeToClosedOpen(predicateRange);
+ Range<Float> typeClosedOpen = convertRangeToClosedOpen(typeRange);
+ float rangeLower = (range.lowerEndpoint() >= 0 ? (float)
Math.ceil(range.lowerEndpoint())
+ : Math.nextUp(-(float)
Math.ceil(Math.nextUp(-range.lowerEndpoint()))));
+ float rangeUpper = range.upperEndpoint() >= 0 ? Math.nextDown((float)
Math.ceil(range.upperEndpoint()))
+ : Math.nextUp((float) -Math.ceil(-range.upperEndpoint()));
+ float lower = Math.max(typeClosedOpen.lowerEndpoint(), rangeLower);
+ float upper = Math.min(typeClosedOpen.upperEndpoint(), rangeUpper);
+ return makeRange(lower, BoundType.CLOSED, upper, BoundType.OPEN);
+ }
+ case DECIMAL: {
+ float adjust = (float) (5 * Math.pow(10, -(type.getScale() + 1)));
+ // the resulting value of +- adjust would be rounded up, so in some
cases we need to use Math.nextDown
+ float adjusted1 = lowerInclusive ? predicateRange.lowerEndpoint() -
adjust
+ : Math.nextDown(predicateRange.lowerEndpoint() + adjust);
+ float adjusted2 = upperInclusive ?
Math.nextDown(predicateRange.upperEndpoint() + adjust)
+ : predicateRange.upperEndpoint() - adjust;
+ float lower = Math.max(adjusted1, typeRange.lowerEndpoint());
+ float upper = Math.min(adjusted2, typeRange.upperEndpoint());
+ // the boundaries might result in an invalid range (e.g., left > right)
+ // in that case the predicate does not select anything, and we return an
empty range
+ return makeRange(lower, predicateRange.lowerBoundType(), upper,
predicateRange.upperBoundType());
+ }
+ case TIMESTAMP, DATE:
+ return predicateRange;
+ default:
+ return typeRange.intersection(predicateRange);
Review Comment:
I'll add an "if connected" to ensure that no exception is thrown.
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java:
##########
@@ -367,11 +419,9 @@ private double computeRangePredicateSelectivity(RexCall
call, SqlKind op) {
int inputRefOpIndex = 1 - literalOpIdx;
RexNode node = operands.get(inputRefOpIndex);
if (isRemovableCast(node, scan)) {
- if (node.getType().getSqlTypeName() == SqlTypeName.DECIMAL) {
- Range<Float> rangeOfDecimalType =
- getRangeOfDecimalType(node.getType(), boundaries.lowerBoundType(),
boundaries.upperBoundType());
- boundaries = adjustRangeToDecimalType(boundaries, node.getType(),
rangeOfDecimalType);
- }
+ Range<Float> rangeOfDecimalType =
Review Comment:
I'll rename it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]