korlov42 commented on a change in pull request #9276:
URL: https://github.com/apache/ignite/pull/9276#discussion_r698330664
##########
File path:
modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/metadata/IgniteMdSelectivity.java
##########
@@ -110,6 +122,654 @@ public Double getSelectivity(IgniteSortedIndexSpool rel,
RelMetadataQuery mq, Re
}
/** */
+ public Double getSelectivity(RelSubset rel, RelMetadataQuery mq, RexNode
predicate) {
+ RelNode best = rel.getBest();
+ if (best == null)
+ return super.getSelectivity(rel, mq, predicate);
+
+ return getSelectivity(best, mq, predicate);
+ }
+
+ /**
+ * Convert specified value into comparable type: BigDecimal,
+ *
+ * @param val Value to convert to comparable form.
+ * @return Comparable form of value.
+ */
+ private BigDecimal toComparableValue(RexLiteral val) {
+ RelDataType type = val.getType();
+
+ if (type instanceof BasicSqlType) {
+ BasicSqlType bType = (BasicSqlType)type;
+
+ switch ((SqlTypeFamily)bType.getFamily()) {
+ case NULL:
+ return null;
+
+ case NUMERIC:
+ return val.getValueAs(BigDecimal.class);
+
+ case DATE:
+ return new
BigDecimal(val.getValueAs(DateString.class).getMillisSinceEpoch());
+
+ case TIME:
+ return new
BigDecimal(val.getValueAs(TimeString.class).getMillisOfDay());
+
+ case TIMESTAMP:
+ return new
BigDecimal(val.getValueAs(TimestampString.class).getMillisSinceEpoch());
+
+ case BOOLEAN:
+ return (val.getValueAs(Boolean.class)) ? BigDecimal.ONE :
BigDecimal.ZERO;
+
+ default:
+ return null;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Convert specified value into comparable type: BigDecimal,
+ *
+ * @param val Value to convert to comparable form.
+ * @return Comparable form of value.
+ */
+ private BigDecimal toComparableValue(Value val) {
+ if (val == null)
+ return null;
+
+ switch (val.getType()) {
+ case Value.NULL:
+ throw new IllegalArgumentException("Can't compare null
values");
+
+ case Value.BOOLEAN:
+ return (val.getBoolean()) ? BigDecimal.ONE : BigDecimal.ZERO;
+
+ case Value.BYTE:
+ return new BigDecimal(val.getByte());
+
+ case Value.SHORT:
+ return new BigDecimal(val.getShort());
+
+ case Value.INT:
+ return new BigDecimal(val.getInt());
+
+ case Value.LONG:
+ return new BigDecimal(val.getLong());
+
+ case Value.DECIMAL:
+ return val.getBigDecimal();
+
+ case Value.DOUBLE:
+ return BigDecimal.valueOf(val.getDouble());
+
+ case Value.FLOAT:
+ return BigDecimal.valueOf(val.getFloat());
+
+ case Value.DATE:
+ return BigDecimal.valueOf(val.getDate().getTime());
+
+ case Value.TIME:
+ return BigDecimal.valueOf(val.getTime().getTime());
+
+ case Value.TIMESTAMP:
+ return BigDecimal.valueOf(val.getTimestamp().getTime());
+
+ case Value.BYTES:
+ BigInteger bigInteger = new BigInteger(1, val.getBytes());
+ return new BigDecimal(bigInteger);
+
+ case Value.STRING:
+ case Value.STRING_FIXED:
+ case Value.STRING_IGNORECASE:
+ case Value.ARRAY:
+ case Value.JAVA_OBJECT:
+ case Value.GEOMETRY:
+ return null;
+
+ case Value.UUID:
+ BigInteger bigInt = new BigInteger(1, val.getBytes());
+ return new BigDecimal(bigInt);
+
+ default:
+ throw new IllegalStateException("Unsupported H2 type: " +
val.getType());
+ }
+ }
+
+ /**
+ * Predicate based selectivity for table. Estimate condition on each
column taking in comparison it's statistics.
+ *
+ * @param rel Original rel node to fallback calculation by.
+ * @param tbl Underlying IgniteTable.
+ * @param mq RelMetadataQuery.
+ * @param predicate Predicate to estimate selectivity by.
+ * @return Selectivity.
+ */
+ private double getTablePredicateBasedSelectivity(
+ RelNode rel,
+ IgniteTable tbl,
+ RelMetadataQuery mq,
+ RexNode predicate
+ ) {
+ if (tbl == null)
+ return RelMdUtil.guessSelectivity(predicate);
+
+ double sel = 1.0;
+
+ Map<RexSlot, Boolean> addNotNull = new HashMap<>();
+
+ for (RexNode pred : RelOptUtil.conjunctions(predicate)) {
+ SqlKind predKind = pred.getKind();
+ RexLocalRef op = getOperand(pred, RexLocalRef.class);
+
+ if (predKind == SqlKind.OR) {
+ double orSelTotal = 1;
+
+ for (RexNode orPred : RelOptUtil.disjunctions(pred))
+ orSelTotal *= 1 - getTablePredicateBasedSelectivity(rel,
tbl, mq, orPred);
+
+ sel *= 1 - orSelTotal;
+ }
+ else if (predKind == SqlKind.NOT) {
+ if (op == null)
+ sel *= guessSelectivity(pred);
+ else {
+ tryAddNotNull(addNotNull, tbl, op);
+
+ sel *= 1 - getTablePredicateBasedSelectivity(rel, tbl, mq,
op);
+ }
+ }
+ else if (predKind == SqlKind.LOCAL_REF) {
+ if (op != null)
+ addNotNull.put(op, Boolean.TRUE);
+
+ sel *= estimateRefSelectivity(rel, mq, tbl, (RexLocalRef)pred);
+ } else if (predKind == SqlKind.IS_NULL) {
+ if (op != null)
+ addNotNull.put(op, Boolean.FALSE);
+
+ sel *= estimateIsNullSelectivity(rel, mq, tbl, pred);
+
+ } else if (predKind == SqlKind.IS_NOT_NULL) {
+ if (op != null)
+ addNotNull.put(op, Boolean.FALSE);
+
+ sel *= estimateIsNotNullSelectivity(rel, mq, tbl, pred);
+ } else if (predKind == SqlKind.EQUALS) {
+ if (op != null)
+ addNotNull.put(op, Boolean.TRUE);
+
+ sel *= estimateEqualsSelectivity(rel, mq, tbl, pred);
+ } else if (predKind.belongsTo(SqlKind.COMPARISON)) {
+ if (op != null)
+ addNotNull.put(op, Boolean.TRUE);
+
+ sel *= estimateComparisonSelectivity(rel, mq, tbl, pred);
+ } else
+ sel *= .25;
+ }
+
+ // Estimate not null selectivity in addition to comparison.
+ for (Map.Entry<RexSlot, Boolean> colAddNotNull :
addNotNull.entrySet()) {
+ if (colAddNotNull.getValue()) {
+ ColumnStatistics colStat = getColStatBySlot(rel, mq, tbl,
colAddNotNull.getKey());
+
+ sel *= (colStat == null) ? NOT_NULL_SELECTIVITY :
estimateNotNullSelectivity(colStat);
+ }
+ }
+
+ return sel;
+ }
+
+ /**
+ * Try to add operand "not null" flag if there are no false flag for it.
+ *
+ * @param addNotNull Map with "add not null" flags for operands.
+ * @param tbl IgniteTable.
+ * @param op RexSlot to add operand by.
+ */
+ private void tryAddNotNull(Map<RexSlot, Boolean> addNotNull, IgniteTable
tbl, RexSlot op) {
+ Boolean colNotNull = addNotNull.get(op);
+
+ addNotNull.put(op, (colNotNull == null) || colNotNull);
+ }
+
+ /**
+ * Estimate local ref selectivity (means is true confition).
+ *
+ * @param rel RelNode.
+ * @param mq RelMetadataQuery.
+ * @param tbl IgniteTable.
+ * @param ref RexLocalRef.
+ * @return Selectivity estimation.
+ */
+ private double estimateRefSelectivity(RelNode rel, RelMetadataQuery mq,
IgniteTable tbl, RexLocalRef ref) {
+ ColumnStatistics colStat = getColStatBySlot(rel, mq, tbl, ref);
+ double res = 0.33;
+ if (colStat == null)
+ // true, false and null with equivalent probability
+ return res;
+
+ if (colStat.max() == null || colStat.max().getType() != Value.BOOLEAN)
+ return res;
+
+ Boolean min = colStat.min().getBoolean();
+ Boolean max = colStat.max().getBoolean();
+
+ if (!max)
+ return 0;
+
+ double notNullSel = estimateNotNullSelectivity(colStat);
+
+ return (max && min) ? notNullSel : notNullSel / 2;
+ }
+
+ /**
+ * Compute selectivity for "is null" condition.
+ *
+ * @param rel RelNode.
+ * @param mq RelMetadataQuery.
+ * @param tbl IgniteTable.
+ * @param pred RexNode.
+ * @return Selectivity estimation.
+ */
+ private double estimateIsNullSelectivity(RelNode rel, RelMetadataQuery mq,
IgniteTable tbl, RexNode pred) {
+ ColumnStatistics colStat = getColStat(rel, mq, tbl, pred);
+
+ if (colStat == null)
+ return guessSelectivity(pred);
+
+ return estimateNullSelectivity(colStat);
+ }
+
+ /**
+ * Estimate selectivity for equals predicate.
+ *
+ * @param rel RElNode.
+ * @param mq RelMetadataQuery.
+ * @param tbl IgniteTable.
+ * @param pred RexNode with predicate.
+ *
+ * @return Selectivity.
+ */
+ private double estimateEqualsSelectivity(
+ RelNode rel,
+ RelMetadataQuery mq,
+ IgniteTable tbl,
+ RexNode pred) {
+ ColumnStatistics colStat = getColStat(rel, mq, tbl, pred);
+
+ if (colStat == null)
+ return guessSelectivity(pred);
+
+ RexLiteral val = getOperand(pred, RexLiteral.class);
+
+ if (val == null)
+ return guessSelectivity(pred);
+
+ BigDecimal comparableVal = toComparableValue(val);
+
+ if (comparableVal == null)
+ return guessSelectivity(pred);
+
+ return estimateEqualsSelectivity(colStat, comparableVal);
+ }
+
+ /**
+ * Compute selectivity for "is not null" condition.
+ *
+ * @param rel RelNode.
+ * @param mq RelMetadataQuery.
+ * @param tbl IgniteTable.
+ * @param pred RexNode.
+ * @return Selectivity estimation.
+ */
+ private double estimateIsNotNullSelectivity(RelNode rel, RelMetadataQuery
mq, IgniteTable tbl, RexNode pred) {
+ ColumnStatistics colStat = getColStat(rel, mq, tbl, pred);
+
+ if (colStat == null)
+ return guessSelectivity(pred);
+
+ return estimateNotNullSelectivity(colStat);
+ }
+
+ /**
+ * Estimate selectivity for comparison predicate.
+ *
+ * @param rel RelNode.
+ * @param mq RelMetadataQuery.
+ * @param tbl IgniteTable.
+ * @param pred RexNode.
+ * @return Selectivity.
+ */
+ private double estimateComparisonSelectivity(RelNode rel, RelMetadataQuery
mq, IgniteTable tbl, RexNode pred) {
+ ColumnStatistics colStat = getColStat(rel, mq, tbl, pred);
+
+ if (colStat == null)
+ return guessSelectivity(pred);
+
+ return estimateRangeSelectivity(colStat, pred);
+ }
+
+ /**
+ * Get column statistics.
+ *
+ * @param rel RelNode.
+ * @param mq RelMetadataQuery.
+ * @param tbl IgniteTable to get statistics from.
+ * @param pred Predicate to get statistics by related column.
+ * @return ColumnStatistics or {@code null}.
+ */
+ private ColumnStatistics getColStat(RelNode rel, RelMetadataQuery mq,
IgniteTable tbl, RexNode pred) {
+ SqlKind predKind = pred.getKind();
+
+ if (predKind != SqlKind.IS_NULL && predKind != SqlKind.IS_NOT_NULL &&
predKind != SqlKind.LOCAL_REF &&
+ predKind != SqlKind.NOT && !predKind.belongsTo(SqlKind.COMPARISON))
+ return null;
+
+ RexSlot operand = getOperand(pred, RexSlot.class);
+
+ if (operand == null)
+ return null;
+
+ return getColStatBySlot(rel, mq, tbl, operand);
+ }
+
+ /**
+ * Get column statistics.
+ *
+ * @param rel RelNode.
+ * @param mq RelMetadataQuery.
+ * @param tbl IgniteTable to get statistics from.
+ * @param pred RelSlot to get statistics by related column.
+ * @return ColumnStatistics or {@code null}.
+ */
+ private ColumnStatistics getColStatBySlot(RelNode rel, RelMetadataQuery
mq, IgniteTable tbl, RexSlot pred) {
+ Set<RelColumnOrigin> origins = null;
+
+ if (pred instanceof RexLocalRef) {
+ if (rel instanceof ProjectableFilterableTableScan) {
+ origins = Collections.singleton(
+
((ProjectableFilterableTableScan)rel).columnOriginsByRelLocalRef(pred.getIndex()));
+ }
+ }
+ else
+ origins = mq.getColumnOrigins(rel, pred.getIndex());
+
+ if (origins == null || origins.isEmpty() ||
origins.iterator().next().isDerived())
+ return null;
+
+ IgniteTypeFactory typeFactory = Commons.typeFactory(rel);
+
+ List<String> columns = tbl.getRowType(typeFactory).getFieldNames();
+
+ String colName =
columns.get(origins.iterator().next().getOriginColumnOrdinal());
+
+ if (QueryUtils.KEY_FIELD_NAME.equals(colName))
+ colName = tbl.descriptor().typeDescription().keyFieldName();
+
+ Statistic stat = tbl.getStatistic();
+
+ if (stat == null)
+ return null;
+
+ return ((IgniteStatisticsImpl)stat).getColumnStatistics(colName);
+ }
+
+ /**
+ * Estimate range selectivity based on predicate.
+ *
+ * @param colStat Column statistics to use.
+ * @param pred Condition.
+ * @return Selectivity.
+ */
+ private double estimateRangeSelectivity(ColumnStatistics colStat, RexNode
pred) {
+ if (pred instanceof RexCall) {
+ RexLiteral literal = getOperand(pred, RexLiteral.class);
+
+ if (literal == null)
+ return 1.;
+
+ BigDecimal val = toComparableValue(literal);
+
+ return estimateSelectivity(colStat, val, pred);
+ }
+
+ return 1.;
+ }
+
+ /**
+ * Estimate range selectivity based on predicate, condition and column
statistics.
+ *
+ * @param colStat Column statistics to use.
+ * @param val Condition value.
+ * @param pred Condition.
+ * @return Selectivity.
+ */
+ private double estimateSelectivity(ColumnStatistics colStat, BigDecimal
val, RexNode pred) {
+ // Without value or statistics we can only guess.
+ if (val == null)
+ return guessSelectivity(pred);
+
+ SqlOperator op = ((RexCall)pred).op;
+
+ BigDecimal min = toComparableValue(colStat.min());
+ BigDecimal max = toComparableValue(colStat.max());
+ BigDecimal total = (min == null || max == null) ? null :
max.subtract(min).abs();
+
+ if (total == null)
+ // No min/max mean that all values are null for coumn.
Review comment:
`toComparableValue` returns NULL for string and byte array values,
causing a string index be chosen over any other. Need to fix this and add an
according test for this case.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]