This is an automated email from the ASF dual-hosted git repository.
soumyakanti3578 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 6d9d9b4710a HIVE-29479: Improve histogram-based selectivity estimation
for two-sided range predicates (#6477)
6d9d9b4710a is described below
commit 6d9d9b4710a7270028ef74938839e7934e5fbcc4
Author: Ruben Quesada Lopez <[email protected]>
AuthorDate: Thu May 21 04:16:49 2026 +0100
HIVE-29479: Improve histogram-based selectivity estimation for two-sided
range predicates (#6477)
---
.../calcite/stats/FilterSelectivityEstimator.java | 215 ++++++++++++++-----
.../stats/TestFilterSelectivityEstimator.java | 209 ++++++++++++++++++-
.../llap/materialized_view_rewrite_7.q.out | 230 ++++++++++-----------
3 files changed, 487 insertions(+), 167 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
index e0f8eb41bf3..477e7fca984 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
@@ -25,6 +25,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
+import java.util.function.Supplier;
import com.google.common.collect.BoundType;
import com.google.common.collect.Range;
@@ -44,16 +45,17 @@
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitorImpl;
import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Sarg;
import org.apache.datasketches.kll.KllFloatsSketch;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
-import org.apache.hadoop.hive.ql.optimizer.calcite.SearchTransformer;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
@@ -65,6 +67,8 @@ public class FilterSelectivityEstimator extends
RexVisitorImpl<Double> {
protected static final Logger LOG =
LoggerFactory.getLogger(FilterSelectivityEstimator.class);
+ private static final double DEFAULT_COMPARISON_SELECTIVITY = 1.0 / 3.0;
+
private final RelNode childRel;
private final double childCardinality;
private final RelMetadataQuery mq;
@@ -114,7 +118,8 @@ public Double visitCall(RexCall call) {
break;
}
case SEARCH:
- return new SearchTransformer<>(rexBuilder, call,
RexUnknownAs.FALSE).transform().accept(this);
+ selectivity = computeSearchSelectivity(call);
+ break;
case OR: {
selectivity = computeDisjunctionSelectivity(call);
break;
@@ -159,7 +164,7 @@ public Double visitCall(RexCall call) {
case GREATER_THAN_OR_EQUAL:
case LESS_THAN:
case GREATER_THAN: {
- selectivity = computeRangePredicateSelectivity(call, call.getKind());
+ selectivity = computeComparisonPredicateSelectivity(call,
call.getKind());
break;
}
@@ -405,8 +410,8 @@ private static Range<Float> makeRange(float lower, float
upper, BoundType upperT
return lower > upper ? Range.closedOpen(0f, 0f) : Range.range(lower,
BoundType.CLOSED, upper, upperType);
}
- private double computeRangePredicateSelectivity(RexCall call, SqlKind op) {
- double defaultSelectivity = ((double) 1 / (double) 3);
+ private double computeComparisonPredicateSelectivity(RexCall call, SqlKind
op) {
+ double defaultSelectivity = DEFAULT_COMPARISON_SELECTIVITY;
if (!(childRel instanceof HiveTableScan)) {
return defaultSelectivity;
}
@@ -440,34 +445,56 @@ private double computeRangePredicateSelectivity(RexCall
call, SqlKind op) {
boundaryValues[boundaryIdx] = value;
inclusive[boundaryIdx] = openBound ? BoundType.OPEN : BoundType.CLOSED;
Range<Float> boundaries = Range.range(boundaryValues[0], inclusive[0],
boundaryValues[1], inclusive[1]);
-
- // extract the column index from the other operator
- final HiveTableScan scan = (HiveTableScan) childRel;
int inputRefOpIndex = 1 - literalOpIdx;
RexNode node = operands.get(inputRefOpIndex);
- if (isRemovableCast(node, scan)) {
- Range<Float> typeRange = getRangeOfType(node.getType());
- boundaries = adjustRangeToType(boundaries, node.getType(), typeRange);
+ return computeRangePredicateSelectivity(() -> defaultSelectivity, node,
boundaries);
+ }
+
+ private Double computeRangePredicateSelectivity(Supplier<Double>
defaultSelectivity, RexNode operand,
+ Range<Float> boundaries) {
+ return computeRangePredicateSelectivity(defaultSelectivity, operand,
boundaries, false);
+ }
+
+ /**
+ * Computes the selectivity of an operand in a certain range trying to
leverage the histogram information.
+ * Returns the default selectivity if the histogram is not available.
+ */
+ private Double computeRangePredicateSelectivity(Supplier<Double>
defaultSelectivity, RexNode operand,
+ Range<Float> boundaries, boolean inverseBool /* true only for
NOT_BETWEEN */) {
+ if (!(childRel instanceof HiveTableScan)) {
+ return defaultSelectivity.get();
+ }
- node = RexUtil.removeCast(node);
+ final HiveTableScan scan = (HiveTableScan) childRel;
+ Range<Float> typeRange = inverseBool ?
Range.closed(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY) : null;
+ if (isRemovableCast(operand, scan)) {
+ typeRange = getRangeOfType(operand.getType());
+ boundaries = adjustRangeToType(boundaries, operand.getType(), typeRange);
+ operand = RexUtil.removeCast(operand);
}
int inputRefIndex = -1;
- if (node.getKind().equals(SqlKind.INPUT_REF)) {
- inputRefIndex = ((RexInputRef) node).getIndex();
+ if (operand.getKind().equals(SqlKind.INPUT_REF)) {
+ inputRefIndex = ((RexInputRef) operand).getIndex();
}
if (inputRefIndex < 0) {
- return defaultSelectivity;
+ return defaultSelectivity.get();
}
final List<ColStatistics> colStats =
scan.getColStat(Collections.singletonList(inputRefIndex));
if (colStats.isEmpty() || !isHistogramAvailable(colStats.get(0))) {
- return defaultSelectivity;
+ return defaultSelectivity.get();
}
final KllFloatsSketch kll =
KllFloatsSketch.heapify(Memory.wrap(colStats.get(0).getHistogram()));
double rawSelectivity = rangedSelectivity(kll, boundaries);
+ if (inverseBool) {
+ // when inverseBool == true, this is a NOT_BETWEEN and selectivity must
be inverted
+ // if there's a cast, the inversion is with respect to its codomain
(range of the values of the cast)
+ double typeRangeSelectivity = rangedSelectivity(kll, typeRange);
+ rawSelectivity = typeRangeSelectivity - rawSelectivity;
+ }
return scaleSelectivityToNullableValues(kll, rawSelectivity, scan);
}
@@ -511,7 +538,6 @@ private Double computeBetweenPredicateSelectivity(RexCall
call) {
Optional<Float> rightLiteral = extractLiteral(operands.get(3));
if (hasLiteralBool && leftLiteral.isPresent() && rightLiteral.isPresent())
{
- final HiveTableScan scan = (HiveTableScan) childRel;
float leftValue = leftLiteral.get();
float rightValue = rightLiteral.get();
@@ -522,36 +548,9 @@ private Double computeBetweenPredicateSelectivity(RexCall
call) {
}
Range<Float> rangeBoundaries = makeRange(leftValue, rightValue,
BoundType.CLOSED);
- Range<Float> typeBoundaries = inverseBool ?
Range.closed(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY) : null;
-
RexNode expr = operands.get(1); // expr to be checked by the BETWEEN
- if (isRemovableCast(expr, scan)) {
- typeBoundaries = getRangeOfType(expr.getType());
- rangeBoundaries = adjustRangeToType(rangeBoundaries, expr.getType(),
typeBoundaries);
- expr = RexUtil.removeCast(expr);
- }
-
- int inputRefIndex = -1;
- if (expr.getKind().equals(SqlKind.INPUT_REF)) {
- inputRefIndex = ((RexInputRef) expr).getIndex();
- }
-
- if (inputRefIndex < 0) {
- return computeFunctionSelectivity(call);
- }
-
- final List<ColStatistics> colStats =
scan.getColStat(Collections.singletonList(inputRefIndex));
- if (!colStats.isEmpty() && isHistogramAvailable(colStats.get(0))) {
- final KllFloatsSketch kll =
KllFloatsSketch.heapify(Memory.wrap(colStats.get(0).getHistogram()));
- double rawSelectivity = rangedSelectivity(kll, rangeBoundaries);
- if (inverseBool) {
- // when inverseBool == true, this is a NOT_BETWEEN and selectivity
must be inverted
- // if there's a cast, the inversion is with respect to its codomain
(range of the values of the cast)
- double typeRangeSelectivity = rangedSelectivity(kll, typeBoundaries);
- rawSelectivity = typeRangeSelectivity - rawSelectivity;
- }
- return scaleSelectivityToNullableValues(kll, rawSelectivity, scan);
- }
+ return computeRangePredicateSelectivity(() ->
computeFunctionSelectivity(call), expr, rangeBoundaries,
+ inverseBool);
}
return computeFunctionSelectivity(call);
}
@@ -603,6 +602,106 @@ private Optional<Float> extractLiteral(SqlTypeName
typeName, Object boundValueOb
return Optional.of(value);
}
+ private double computeSearchSelectivity(RexCall search) {
+ return new SearchSelectivityHelper<>(search).compute();
+ }
+
+ /**
+ * Auxiliary class to compute the selectivity of a SEARCH expression.
+ */
+ private final class SearchSelectivityHelper<C extends Comparable<C>> {
+ private final RexNode ref;
+ private final Sarg<C> sarg;
+ private final RelDataType operandType;
+
+ private SearchSelectivityHelper(RexCall search) {
+ ref = search.getOperands().get(0);
+ RexLiteral literal = (RexLiteral) search.operands.get(1);
+ sarg = Objects.requireNonNull(literal.getValueAs(Sarg.class), "Sarg");
+ operandType = literal.getType();
+ }
+
+ private RexNode makeLiteral(C value) {
+ return rexBuilder.makeLiteral(value, operandType, true, true);
+ }
+
+ private double compute() {
+ final List<RexNode> inLiterals = new ArrayList<>();
+ final List<Double> rangeSelectivities = new ArrayList<>();
+ for (Range<C> range : sarg.rangeSet.asRanges()) {
+ if (!range.hasLowerBound() && !range.hasUpperBound()) {
+ return 1.0; // "all" range
+ }
+ processRangeSelectivity(range, rangeSelectivities, inLiterals);
+ }
+
+ final List<Double> searchSelectivities = new ArrayList<>();
+ if (!rangeSelectivities.isEmpty() &&
rangeSelectivities.stream().noneMatch(Objects::isNull)) {
+ // Aggregate all ranges selectivity, respecting the max value of 1
+ double total = Math.min(1.0,
rangeSelectivities.stream().mapToDouble(Double::doubleValue).sum());
+ if (total == 1.0) {
+ return 1.0;
+ }
+ searchSelectivities.add(total);
+ } else {
+ searchSelectivities.addAll(rangeSelectivities);
+ }
+
+ if (!inLiterals.isEmpty()) {
+ if (inLiterals.size() == 1) {
+
searchSelectivities.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref,
inLiterals.get(0))
+ .accept(FilterSelectivityEstimator.this));
+ } else {
+ List<RexNode> operands = new ArrayList<>(inLiterals.size() + 1);
+ operands.add(ref);
+ operands.addAll(inLiterals);
+ searchSelectivities.add(rexBuilder.makeCall(HiveIn.INSTANCE,
operands).accept(FilterSelectivityEstimator.this));
+ }
+ }
+
+ if (sarg.nullAs == RexUnknownAs.TRUE) {
+ searchSelectivities.add(
+ rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL,
ref).accept(FilterSelectivityEstimator.this));
+ }
+
+ return searchSelectivities.size() == 1 ? searchSelectivities.get(0) :
computeDisjunctionSelectivity(searchSelectivities);
+ }
+
+ private void processRangeSelectivity(Range<C> range, List<Double>
rangeSelectivities, List<RexNode> inLiterals) {
+ final boolean hasLower = range.hasLowerBound();
+ final boolean hasUpper = range.hasUpperBound();
+
+ final BoundType lowerBoundType = hasLower ? range.lowerBoundType() :
BoundType.CLOSED;
+ final BoundType upperBoundType = hasUpper ? range.upperBoundType() :
BoundType.CLOSED;
+
+ final RexNode lowerRex = hasLower ? makeLiteral(range.lowerEndpoint()) :
null;
+ final RexNode upperRex = hasUpper ? makeLiteral(range.upperEndpoint()) :
null;
+
+ // map missing bounds to infinity
+ final Optional<Float> lowerLiteral = hasLower ? extractLiteral(lowerRex)
: Optional.of(Float.NEGATIVE_INFINITY);
+ final Optional<Float> upperLiteral = hasUpper ? extractLiteral(upperRex)
: Optional.of(Float.POSITIVE_INFINITY);
+
+ // check for single value ranges
+ if (hasLower && hasUpper && lowerBoundType == BoundType.CLOSED &&
upperBoundType == BoundType.CLOSED
+ && lowerLiteral.equals(upperLiteral)) {
+ inLiterals.add(lowerRex);
+ return;
+ }
+
+ // map the range to a selectivity
+ final Supplier<Double> defaultSelectivity =
+ hasLower && hasUpper ? () -> computeFunctionSelectivity(List.of(ref,
lowerRex, upperRex))
+ : () -> DEFAULT_COMPARISON_SELECTIVITY;
+
+ if (lowerLiteral.isEmpty() || upperLiteral.isEmpty()) {
+ rangeSelectivities.add(defaultSelectivity.get());
+ } else {
+
rangeSelectivities.add(computeRangePredicateSelectivity(defaultSelectivity, ref,
+ Range.range(lowerLiteral.get(), lowerBoundType,
upperLiteral.get(), upperBoundType)));
+ }
+ }
+ }
+
/**
* NDV of "f1(x, y, z) != f2(p, q, r)" ->
* "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)".
@@ -633,7 +732,11 @@ private Double computeNotEqualitySelectivity(RexCall call)
{
* @return
*/
private Double computeFunctionSelectivity(RexCall call) {
- Double tmpNDV = getMaxNDV(call);
+ return computeFunctionSelectivity(call.getOperands());
+ }
+
+ private Double computeFunctionSelectivity(List<RexNode> operands) {
+ Double tmpNDV = getMaxNDV(operands);
if (tmpNDV == null) {
// Could not be computed
return null;
@@ -653,12 +756,20 @@ private Double computeFunctionSelectivity(RexCall call) {
* @return
*/
private Double computeDisjunctionSelectivity(RexCall call) {
+ List<Double> selectivityList = new ArrayList<>(call.getOperands().size());
+ for (RexNode dje : call.getOperands()) {
+ selectivityList.add(dje.accept(this));
+ }
+ return computeDisjunctionSelectivity(selectivityList);
+ }
+
+ private double computeDisjunctionSelectivity(List<Double> selectivityList) {
Double tmpCardinality;
Double tmpSelectivity;
double selectivity = 1;
- for (RexNode dje : call.getOperands()) {
- tmpSelectivity = dje.accept(this);
+ for (Double sel : selectivityList) {
+ tmpSelectivity = sel;
if (tmpSelectivity == null) {
tmpSelectivity = 0.99;
}
@@ -729,10 +840,14 @@ private long getMaxNulls(RexCall call, HiveTableScan t) {
}
private Double getMaxNDV(RexCall call) {
+ return getMaxNDV(call.getOperands());
+ }
+
+ private Double getMaxNDV(List<RexNode> operands) {
Double tmpNDV;
double maxNDV = 1.0;
InputReferencedVisitor irv;
- for (RexNode op : call.getOperands()) {
+ for (RexNode op : operands) {
if (op instanceof RexInputRef) {
tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel,
mq,
((RexInputRef) op).getIndex());
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java
index 28dc2e1ec34..39c6ca8f80c 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java
@@ -20,6 +20,7 @@
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelOptPredicateList;
import org.apache.calcite.plan.RelOptSchema;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
@@ -30,7 +31,9 @@
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexSimplify;
import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.type.SqlTypeUtil;
@@ -62,7 +65,9 @@
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import static org.apache.calcite.sql.type.SqlTypeName.BIGINT;
import static org.apache.calcite.sql.type.SqlTypeName.INTEGER;
@@ -135,12 +140,14 @@ public class TestFilterSelectivityEstimator {
private static RexNode int3;
private static RexNode int4;
private static RexNode int5;
+ private static RexNode int6;
private static RexNode int7;
private static RexNode int8;
private static RexNode int10;
private static RexNode int11;
private static RelDataType tableType;
private static RexNode inputRef0;
+ private static RexNode inputRef10;
private static RexNode boolFalse;
private static RexNode boolTrue;
@@ -166,6 +173,7 @@ public static void beforeClass() {
int3 = REX_BUILDER.makeLiteral(3, integerType, true);
int4 = REX_BUILDER.makeLiteral(4, integerType, true);
int5 = REX_BUILDER.makeLiteral(5, integerType, true);
+ int6 = REX_BUILDER.makeLiteral(6, integerType, true);
int7 = REX_BUILDER.makeLiteral(7, integerType, true);
int8 = REX_BUILDER.makeLiteral(8, integerType, true);
int10 = REX_BUILDER.makeLiteral(10, integerType, true);
@@ -183,6 +191,7 @@ public static void beforeClass() {
b.add("f_bigint", TYPE_FACTORY.createSqlType(BIGINT));
b.add("f_timestamp", SqlTypeName.TIMESTAMP);
b.add("f_date", SqlTypeName.DATE).build();
+ b.add("f_numeric_nullable",
TYPE_FACTORY.createTypeWithNullability(decimalType(38, 25), true));
tableType = b.build();
RelOptPlanner planner = CalcitePlanner.createPlanner(new HiveConf());
@@ -203,13 +212,22 @@ public void before() {
currentValuesSize = VALUES.length;
doReturn(tableType).when(tableMock).getRowType();
when(tableMock.getRowCount()).thenAnswer(a -> (double) currentValuesSize);
+ Set<Float> set = new HashSet<>();
+ for (float f : VALUES) {
+ set.add(f);
+ }
+ double distinctRowCount = set.size();
RelBuilder relBuilder =
HiveRelFactories.HIVE_BUILDER.create(relOptCluster, schemaMock);
HiveTableScan tableScan =
new HiveTableScan(relOptCluster,
relOptCluster.traitSetOf(HiveRelNode.CONVENTION), tableMock, "table", null,
false, false);
scan = relBuilder.push(tableScan).build();
+ when(mq.getRowCount(scan)).thenAnswer(a -> (double) currentValuesSize);
+ when(mq.getDistinctRowCount(scan, ImmutableBitSet.of(0),
REX_BUILDER.makeLiteral(true))).thenAnswer(
+ a -> distinctRowCount);
inputRef0 = REX_BUILDER.makeInputRef(scan, 0);
+ inputRef10 = REX_BUILDER.makeInputRef(scan, 10);
currentInputRef = inputRef0;
stats = new ColStatistics();
@@ -457,6 +475,129 @@ public void
testComputeRangePredicateSelectivityGreaterThanOrEqualWhenHigherThan
Assert.assertEquals(0, estimator.estimateSelectivity(filter), DELTA);
}
+ private RexNode simplify(RexNode e) {
+ return new RexSimplify(REX_BUILDER, RelOptPredicateList.EMPTY,
RexUtil.EXECUTOR).simplify(e);
+ }
+
+ @Test
+ public void testComputeRangePredicateSelectivityOpenOpenWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN, inputRef0,
int1),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef0, int4));
+ filter = simplify(filter);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.6153846153846154,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
+ @Test
+ public void testComputeRangePredicateSelectivityClosedOpenWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL,
inputRef0, int2),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef0, int4));
+ filter = simplify(filter);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.6153846153846154,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
+ @Test
+ public void testComputeRangePredicateSelectivityOpenClosedWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN, inputRef0,
int1),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
inputRef0, int3));
+ filter = simplify(filter);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.6153846153846154,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
+ @Test
+ public void testComputeRangePredicateSelectivityClosedClosedWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL,
inputRef0, int2),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
inputRef0, int3));
+ filter = simplify(filter);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.6153846153846154,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
+ @Test
+ public void testComputeSeveralRangesPredicateSelectivityWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.OR,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN, inputRef0,
int1),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef0,
int4)),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN, inputRef0,
int6),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef0,
int10)));
+ filter = simplify(filter);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.6923076923076923,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
+ @Test
+ public void
testComputeSeveralSingleSideRangesPredicateSelectivityWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.OR,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef0, int2),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN, inputRef0,
int4));
+ filter = simplify(filter);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.30769230769230765,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
+ @Test
+ public void testComputeRangeOrPointPredicateSelectivityWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.OR,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.EQUALS, inputRef0, int6),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN, inputRef0,
int1),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
inputRef0, int3)));
+ filter = simplify(filter);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.6703296703296704,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
+ @Test
+ public void testComputeRangeOrPointsPredicateSelectivityWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.OR,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.EQUALS, inputRef0, int6),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.EQUALS, inputRef0, int7),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN, inputRef0,
int1),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
inputRef0, int3)));
+ filter = simplify(filter);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.7252747252747254,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
+ @Test
+ public void testComputeRangeOrIsNullPredicateSelectivityWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(10));
+ RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.OR,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.IS_NULL, inputRef10),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
+ REX_BUILDER.makeCall(SqlStdOperatorTable.GREATER_THAN, inputRef10,
int1),
+ REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
inputRef10, int3)));
+ filter = simplify(filter);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.6153846153846154,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
@Test
public void testComputeRangePredicateSelectivityBetween() {
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
@@ -465,6 +606,15 @@ public void testComputeRangePredicateSelectivityBetween() {
Assert.assertEquals(0.6923076923076923,
estimator.estimateSelectivity(filter), DELTA);
}
+ @Test
+ public void testComputeRangePredicateSelectivityBetweenWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeBetween(inputRef0, int1, int3);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.6923076923076923,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
@Test
public void testComputeRangePredicateSelectivityBetweenFromMinToMax() {
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
@@ -473,6 +623,15 @@ public void
testComputeRangePredicateSelectivityBetweenFromMinToMax() {
Assert.assertEquals(1, estimator.estimateSelectivity(filter), DELTA);
}
+ @Test
+ public void
testComputeRangePredicateSelectivityBetweenFromMinToMaxWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeBetween(inputRef0, int1, int7);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(1, estimator.estimateSelectivity(filter), DELTA);
+ }
+
@Test
public void
testComputeRangePredicateSelectivityBetweenFromLowerThanMinToHigherThanMax() {
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
@@ -481,6 +640,15 @@ public void
testComputeRangePredicateSelectivityBetweenFromLowerThanMinToHigherT
Assert.assertEquals(1, estimator.estimateSelectivity(filter), DELTA);
}
+ @Test
+ public void
testComputeRangePredicateSelectivityBetweenFromLowerThanMinToHigherThanMaxWithSearch()
{
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeBetween(inputRef0, int0, int8);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(1, estimator.estimateSelectivity(filter), DELTA);
+ }
+
@Test
public void testComputeRangePredicateSelectivityBetweenLeftLowerThanMin() {
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
@@ -489,6 +657,15 @@ public void
testComputeRangePredicateSelectivityBetweenLeftLowerThanMin() {
Assert.assertEquals(0.6923076923076923,
estimator.estimateSelectivity(filter), DELTA);
}
+ @Test
+ public void
testComputeRangePredicateSelectivityBetweenLeftLowerThanMinWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeBetween(inputRef0, int0, int3);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0.6923076923076923,
estimator.estimateSelectivity(filter), DELTA);
+ }
+
@Test
public void testComputeRangePredicateSelectivityBetweenRightLowerThanMin() {
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
@@ -497,6 +674,15 @@ public void
testComputeRangePredicateSelectivityBetweenRightLowerThanMin() {
Assert.assertEquals(0, estimator.estimateSelectivity(filter), DELTA);
}
+ @Test
+ public void
testComputeRangePredicateSelectivityBetweenRightLowerThanMinWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeBetween(inputRef0, intMinus1, int0);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0, estimator.estimateSelectivity(filter), DELTA);
+ }
+
@Test
public void testComputeRangePredicateSelectivityBetweenLeftHigherThanMax() {
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
@@ -505,6 +691,15 @@ public void
testComputeRangePredicateSelectivityBetweenLeftHigherThanMax() {
Assert.assertEquals(0, estimator.estimateSelectivity(filter), DELTA);
}
+ @Test
+ public void
testComputeRangePredicateSelectivityBetweenLeftHigherThanMaxWithSearch() {
+
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
+ RexNode filter = REX_BUILDER.makeBetween(inputRef0, int10, int11);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ Assert.assertEquals(0, estimator.estimateSelectivity(filter), DELTA);
+ }
+
@Test
public void testComputeRangePredicateSelectivityBetweenLeftLowerThanRight() {
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
@@ -523,6 +718,17 @@ public void
testComputeRangePredicateSelectivityBetweenLeftEqualsRight() {
Assert.assertEquals(0.1, estimator.estimateSelectivity(filter), DELTA);
}
+ @Test
+ public void
testComputeRangePredicateSelectivityBetweenLeftEqualsRightWithSearch() {
+ verify(tableMock, never()).getColStat(any());
+ doReturn(10.0).when(mq).getDistinctRowCount(scan, ImmutableBitSet.of(0),
REX_BUILDER.makeLiteral(true));
+ RexNode filter = REX_BUILDER.makeBetween(inputRef0, int3, int3);
+ Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
+ FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
+ // this is what FilterSelectivityEstimator returns for a generic
"function" based on NDV values, in this case 1 / 10
+ Assert.assertEquals(0.1, estimator.estimateSelectivity(filter), DELTA);
+ }
+
@Test
public void testComputeRangePredicateSelectivityNotBetween() {
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
@@ -552,7 +758,7 @@ public void
testComputeRangePredicateSelectivityNotBetweenLeftEqualsRight() {
verify(tableMock, never()).getColStat(any());
RexNode filter = REX_BUILDER.makeCall(HiveBetween.INSTANCE, boolTrue,
inputRef0, int3, int3);
FilterSelectivityEstimator estimator = new
FilterSelectivityEstimator(scan, mq);
- Assert.assertEquals(1, estimator.estimateSelectivity(filter), DELTA);
+ Assert.assertEquals(0.8571428571428571,
estimator.estimateSelectivity(filter), DELTA);
}
@Test
@@ -931,7 +1137,6 @@ private void checkBetweenSelectivity(float
expectedEntries, float universe, floa
Assert.assertEquals(invMessage, invExpectedSelectivity,
estimator.estimateSelectivity(invBetween), DELTA);
}
-
private RexNode cast(String fieldname, SqlTypeName typeName) {
return cast(fieldname, type(typeName));
}
diff --git
a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
index 06a4b05a6f8..f32f4e940f8 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
@@ -276,22 +276,23 @@ STAGE PLANS:
Map 10
Map Operator Tree:
TableScan
- alias: emps_n8
- filterExpr: ((deptno > 10) and (deptno <= 11)) (type:
boolean)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ alias: depts_n6
+ filterExpr: ((deptno > 10) and (deptno <= 11) and name is
not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: ((deptno > 10) and (deptno <= 11)) (type:
boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ predicate: ((deptno > 10) and (deptno <= 11) and name is
not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
- expressions: deptno (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: deptno (type: int), name (type:
varchar(256))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: varchar(256))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 11
@@ -341,23 +342,22 @@ STAGE PLANS:
Map 8
Map Operator Tree:
TableScan
- alias: depts_n6
- filterExpr: ((deptno > 10) and (deptno <= 11) and name is
not null) (type: boolean)
- Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
+ alias: emps_n8
+ filterExpr: ((deptno > 10) and (deptno <= 11)) (type:
boolean)
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: ((deptno > 10) and (deptno <= 11) and name is
not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ predicate: ((deptno > 10) and (deptno <= 11)) (type:
boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: deptno (type: int), name (type:
varchar(256))
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: deptno (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: varchar(256))
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -386,11 +386,11 @@ STAGE PLANS:
Inner Join 0 to 1
keys:
0 _col1 (type: varchar(256))
- 1 _col1 (type: varchar(256))
- outputColumnNames: _col0, _col3
+ 1 _col2 (type: varchar(256))
+ outputColumnNames: _col0, _col4
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: int), _col3 (type: int)
+ keys: _col0 (type: int), _col4 (type: int)
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0, _col1
@@ -445,15 +445,15 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE
Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col1 (type: varchar(256))
+ key expressions: _col2 (type: varchar(256))
null sort order: z
sort order: +
- Map-reduce partition columns: _col1 (type: varchar(256))
+ Map-reduce partition columns: _col2 (type: varchar(256))
Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE
Column stats: COMPLETE
- value expressions: _col0 (type: int)
+ value expressions: _col1 (type: int)
Union 5
Vertex: Union 5
@@ -582,45 +582,44 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: dependents_n4
- filterExpr: name is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE
Column stats: COMPLETE
+ alias: emps_n8
+ filterExpr: (((deptno > 10) and (deptno <= 11)) or ((deptno
>= 19) and (deptno < 20))) (type: boolean)
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: name is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 188 Basic stats:
COMPLETE Column stats: COMPLETE
+ predicate: (((deptno > 10) and (deptno <= 11)) or ((deptno
>= 19) and (deptno < 20))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: empid (type: int), name (type: varchar(256))
+ expressions: deptno (type: int), salary (type: float)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 188 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col1 (type: varchar(256))
+ key expressions: _col0 (type: int)
null sort order: z
sort order: +
- Map-reduce partition columns: _col1 (type:
varchar(256))
- Statistics: Num rows: 2 Data size: 188 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int)
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: float)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 10
Map Operator Tree:
TableScan
- alias: depts_n6
- filterExpr: ((((deptno > 10) and (deptno <= 11)) or ((deptno
>= 19) and (deptno < 20))) and name is not null) (type: boolean)
- Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
+ alias: locations_n4
+ filterExpr: name is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: ((((deptno > 10) and (deptno <= 11)) or
((deptno >= 19) and (deptno < 20))) and name is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ predicate: name is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
- expressions: deptno (type: int), name (type:
varchar(256))
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: name (type: varchar(256))
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col0 (type: varchar(256))
null sort order: z
sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: varchar(256))
+ Map-reduce partition columns: _col0 (type:
varchar(256))
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 11
@@ -651,44 +650,45 @@ STAGE PLANS:
Map 7
Map Operator Tree:
TableScan
- alias: locations_n4
- filterExpr: name is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE
Column stats: COMPLETE
+ alias: depts_n6
+ filterExpr: ((((deptno > 10) and (deptno <= 11)) or ((deptno
>= 19) and (deptno < 20))) and name is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: name is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ predicate: ((((deptno > 10) and (deptno <= 11)) or
((deptno >= 19) and (deptno < 20))) and name is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
- expressions: name (type: varchar(256))
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: deptno (type: int), name (type:
varchar(256))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: varchar(256))
+ key expressions: _col0 (type: int)
null sort order: z
sort order: +
- Map-reduce partition columns: _col0 (type:
varchar(256))
- Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: varchar(256))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 8
Map Operator Tree:
TableScan
- alias: emps_n8
- filterExpr: (((deptno > 10) and (deptno <= 11)) or ((deptno
>= 19) and (deptno < 20))) (type: boolean)
- Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
+ alias: dependents_n4
+ filterExpr: name is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: (((deptno > 10) and (deptno <= 11)) or ((deptno
>= 19) and (deptno < 20))) (type: boolean)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ predicate: name is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 188 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
- expressions: deptno (type: int), salary (type: float)
+ expressions: empid (type: int), name (type: varchar(256))
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 188 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col1 (type: varchar(256))
null sort order: z
sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: float)
+ Map-reduce partition columns: _col1 (type:
varchar(256))
+ Statistics: Num rows: 2 Data size: 188 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 12
@@ -725,17 +725,17 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: varchar(256))
- 1 _col0 (type: varchar(256))
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE
Column stats: COMPLETE
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE
Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col1 (type: varchar(256))
+ key expressions: _col3 (type: varchar(256))
null sort order: z
sort order: +
- Map-reduce partition columns: _col1 (type: varchar(256))
- Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE
Column stats: COMPLETE
- value expressions: _col0 (type: int)
+ Map-reduce partition columns: _col3 (type: varchar(256))
+ Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col1 (type: float)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -743,13 +743,13 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: varchar(256))
- 1 _col3 (type: varchar(256))
- outputColumnNames: _col0, _col4
+ 0 _col3 (type: varchar(256))
+ 1 _col1 (type: varchar(256))
+ outputColumnNames: _col1, _col4
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
Group By Operator
- aggregations: count(_col4)
- keys: _col0 (type: int)
+ aggregations: count(_col1)
+ keys: _col4 (type: int)
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0, _col1
@@ -811,17 +811,17 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col1, _col3
- Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE
Column stats: COMPLETE
+ 0 _col1 (type: varchar(256))
+ 1 _col0 (type: varchar(256))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE
Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col3 (type: varchar(256))
+ key expressions: _col1 (type: varchar(256))
null sort order: z
sort order: +
- Map-reduce partition columns: _col3 (type: varchar(256))
- Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE
Column stats: COMPLETE
- value expressions: _col1 (type: float)
+ Map-reduce partition columns: _col1 (type: varchar(256))
+ Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: int)
Union 5
Vertex: Union 5
@@ -934,44 +934,44 @@ STAGE PLANS:
Map 1
Map Operator Tree:
TableScan
- alias: depts_n6
- filterExpr: ((deptno > 0) and (deptno < 10) and name is not
null) (type: boolean)
- Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
+ alias: emps_n8
+ filterExpr: ((deptno > 0) and (deptno < 10)) (type: boolean)
+ Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: ((deptno > 0) and (deptno < 10) and name is not
null) (type: boolean)
- Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ predicate: ((deptno > 0) and (deptno < 10)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: deptno (type: int), name (type:
varchar(256))
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: deptno (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: varchar(256))
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 5
Map Operator Tree:
TableScan
- alias: emps_n8
- filterExpr: ((deptno > 0) and (deptno < 10)) (type: boolean)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ alias: depts_n6
+ filterExpr: ((deptno > 0) and (deptno < 10) and name is not
null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
- predicate: ((deptno > 0) and (deptno < 10)) (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ predicate: ((deptno > 0) and (deptno < 10) and name is not
null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
- expressions: deptno (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: deptno (type: int), name (type:
varchar(256))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: varchar(256))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 6
@@ -1023,13 +1023,13 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- outputColumnNames: _col1
+ outputColumnNames: _col2
Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE
Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col1 (type: varchar(256))
+ key expressions: _col2 (type: varchar(256))
null sort order: z
sort order: +
- Map-reduce partition columns: _col1 (type: varchar(256))
+ Map-reduce partition columns: _col2 (type: varchar(256))
Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE
Column stats: COMPLETE
Reducer 3
Execution mode: llap
@@ -1038,7 +1038,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: varchar(256))
+ 0 _col2 (type: varchar(256))
1 _col1 (type: varchar(256))
outputColumnNames: _col3
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE