This is an automated email from the ASF dual-hosted git repository.
preetham02 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 7c322f49e0 [ASTERIXDB-3571][COMP] Infer Selection predicates from join
predicates
7c322f49e0 is described below
commit 7c322f49e047a6edffaf3dd8b5125180ce4d309f
Author: murali4104 <[email protected]>
AuthorDate: Tue Jun 17 08:13:26 2025 -0700
[ASTERIXDB-3571][COMP] Infer Selection predicates from join predicates
Ext-ref: MB-65670
Change-Id: I7766e107bfadd915b6d760fc1ab74a6651c49db6
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19943
Integration-Tests: Jenkins <[email protected]>
Reviewed-by: <[email protected]>
Tested-by: Jenkins <[email protected]>
---
.../asterix/optimizer/rules/cbo/JoinCondition.java | 3 ++
.../asterix/optimizer/rules/cbo/JoinEnum.java | 51 ++++++++++++++++++----
.../asterix/optimizer/rules/cbo/JoinNode.java | 1 +
.../apache/asterix/optimizer/rules/cbo/Stats.java | 10 +----
4 files changed, 47 insertions(+), 18 deletions(-)
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
index 0f4cc82f2f..a75a90b43c 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinCondition.java
@@ -19,10 +19,12 @@
package org.apache.asterix.optimizer.rules.cbo;
+import java.util.ArrayList;
import java.util.List;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import
org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
public class JoinCondition {
@@ -47,6 +49,7 @@ public class JoinCondition {
protected comparisonOp comparisonType;
protected JoinOperator joinOp = null;
protected List<LogicalVariable> usedVars = null;
+ protected List<SelectOperator> derivedSelOps = new ArrayList<>(); // only
one of them will be regarded as original
protected enum comparisonOp {
OP_EQ,
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
index 73be2982c4..16518c2a55 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java
@@ -1334,6 +1334,7 @@ public class JoinEnum {
findJoinConditionsAndDoTC();
addTCSelectionPredicates();
+ keepOnlyOneSelectivityHint();
int lastBaseLevelJnNum = enumerateBaseLevelJoinNodes();
if (lastBaseLevelJnNum == PlanNode.NO_PLAN) {
return PlanNode.NO_PLAN;
@@ -1375,6 +1376,26 @@ public class JoinEnum {
return lastJn.cheapestPlanIndex;
}
+ private void keepOnlyOneSelectivityHint() {
+ AbstractFunctionCallExpression afce;
+ for (JoinCondition jc : joinConditions) {
+ int n = 0;
+ for (SelectOperator selOp : jc.derivedSelOps) {
+ afce = (AbstractFunctionCallExpression)
selOp.getCondition().getValue();
+ if (afce.hasAnnotation(PredicateCardinalityAnnotation.class)) {
+ n++;
+ }
+ }
+ if (n <= 1) { // R.a = S.a and R.a < 1
+ return; // perfect. At most one predicate has the annotation
+ } else {// n == 2, both of them have it of them have it, So remove
it from the last one
+ // R.a = S.a and R.a < 1 and S.a < 1; user typed in both
predicates, so each one looks derived.
+ afce = (AbstractFunctionCallExpression) jc.derivedSelOps.get(n
- 1).getCondition().getValue();
+ afce.removeAnnotation(PredicateCardinalityAnnotation.class);
+ }
+ }
+ }
+
// R.a = S.a and R.a op operand ==> S.a op operand
private void addTCSelectionPredicates() throws AlgebricksException {
List<SelectOperator> existingSelOps = new ArrayList<>();
@@ -1396,33 +1417,33 @@ public class JoinEnum {
List<JoinCondition> jcs = findVarinJoinPreds(var);
for (JoinCondition jc : jcs) { // join predicate can be R.a = S.a
or S.a = R.a. Check for both cases
if (var == jc.usedVars.get(0)) { // R.a
- newSelOp = makeNewSelOper(existingSelOps,
jc.usedVars.get(1), // == S.a
+ newSelOp = makeNewSelOper(jc, existingSelOps,
jc.usedVars.get(1), // == S.a
((AbstractFunctionCallExpression)
selOp.getCondition().getValue()).getFunctionInfo(), // op
exp.getArguments().get(1)); // operand
if (newSelOp != null) { // does not already exist
- addSelOpToLeafInput(jc.usedVars.get(1), newSelOp);
+ addSelOpToLeafInput(jc, jc.usedVars.get(1), newSelOp);
}
} else if (var == jc.usedVars.get(1)) { // R.a
- newSelOp = makeNewSelOper(existingSelOps,
jc.usedVars.get(0), // == S.a
+ newSelOp = makeNewSelOper(jc, existingSelOps,
jc.usedVars.get(0), // == S.a
((AbstractFunctionCallExpression)
selOp.getCondition().getValue()).getFunctionInfo(), // op
exp.getArguments().get(1)); // operand
if (newSelOp != null) {
- addSelOpToLeafInput(jc.usedVars.get(0), newSelOp);
+ addSelOpToLeafInput(jc, jc.usedVars.get(0), newSelOp);
}
}
}
}
}
- private SelectOperator makeNewSelOper(List<SelectOperator> existingSelOps,
LogicalVariable var, IFunctionInfo tag,
- Mutable<ILogicalExpression> arg) throws AlgebricksException {
+ private SelectOperator makeNewSelOper(JoinCondition jc,
List<SelectOperator> existingSelOps, LogicalVariable var,
+ IFunctionInfo tag, Mutable<ILogicalExpression> arg) throws
AlgebricksException {
List<Mutable<ILogicalExpression>> arguments = new ArrayList<>();
VariableReferenceExpression e1 = new VariableReferenceExpression(var);
arguments.add(new MutableObject<>(e1)); // S.a
arguments.add(new MutableObject<>(arg.getValue())); // this will be
the operand
ScalarFunctionCallExpression expr = new
ScalarFunctionCallExpression(tag, arguments); //S.a op operand
SelectOperator newsel = new SelectOperator(new MutableObject<>(expr),
null, null);
- if (newSelNotPresent(newsel, existingSelOps)) {
+ if (newSelNotPresent(jc, newsel, existingSelOps)) {
LOGGER.info("adding newsel " + newsel.getCondition());
return newsel; // add since it does not exist
} else {
@@ -1430,21 +1451,33 @@ public class JoinEnum {
}
}
- private boolean newSelNotPresent(SelectOperator newsel,
List<SelectOperator> existingSelOps) {
+ private boolean newSelNotPresent(JoinCondition jc, SelectOperator newsel,
List<SelectOperator> existingSelOps) {
for (SelectOperator existingSelOp : existingSelOps) {
if (newsel.getCondition().equals(existingSelOp.getCondition())) {
+ PredicateCardinalityAnnotation anno = new
PredicateCardinalityAnnotation(0.9999); // cannot be 1.0 as check in
setCardsAndSizes will not work
+ AbstractFunctionCallExpression afce =
+ (AbstractFunctionCallExpression)
existingSelOp.getCondition().getValue();
+ afce.putAnnotation(anno);
+ jc.derivedSelOps.add(existingSelOp);
return false;
}
}
return true;
}
- private void addSelOpToLeafInput(LogicalVariable var, SelectOperator
newSelOp) throws AlgebricksException {
+ private void addSelOpToLeafInput(JoinCondition jc, LogicalVariable var,
SelectOperator newSelOp)
+ throws AlgebricksException {
int l = varLeafInputIds.get(var); // get the corresponding leafInput
using the map
ILogicalOperator parent = leafInputs.get(l - 1);
ILogicalOperator child = parent.getInputs().get(0).getValue();
parent.getInputs().get(0).setValue(newSelOp);
newSelOp.getInputs().add(new MutableObject<>(child));
+ // Add the selectivity annotation with selectivity 1.0;
+ // Note the actual cardinality will be different; but all join
cardinalities should be ok.
+ PredicateCardinalityAnnotation anno = new
PredicateCardinalityAnnotation(0.9999);
+ AbstractFunctionCallExpression afce = (AbstractFunctionCallExpression)
newSelOp.getCondition().getValue();
+ afce.putAnnotation(anno);
+ jc.derivedSelOps.add(newSelOp);
optCtx.computeAndSetTypeEnvironmentForOperator(newSelOp);
}
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
index 34519666e9..5cbb6f80e7 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java
@@ -371,6 +371,7 @@ public class JoinNode {
// There are predicates here. So skip the predicates and get the
original dataset card.
// Now apply all the predicates and get the card after all predicates
are applied.
+ // We call the sampling query even if a selectivity hint was provided
because we have to get the lengths of the variables.
result =
joinEnum.getStatsHandle().runSamplingQueryProjection(joinEnum.optCtx,
leafInput, jnArrayIndex,
primaryKey);
double predicateCardinalityFromSample =
joinEnum.getStatsHandle().findPredicateCardinality(result, true);
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
index 18bc53f0f5..ab57d3ff8d 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/Stats.java
@@ -153,15 +153,7 @@ public class Stats {
return 1.0;
}
double productivity = anno.getJoinProductivity();
- if (productivity <= 0) {
- IWarningCollector warningCollector =
joinEnum.optCtx.getWarningCollector();
- if (warningCollector.shouldWarn()) {
-
warningCollector.warn(Warning.of(joinExpr.getSourceLocation(),
ErrorCode.INAPPLICABLE_HINT,
- "productivity",
- "Productivity specified: " + productivity + ", has
to be a decimal value greater than 0"));
- }
- return 1.0;
- }
+
if (leftIndex == idx1) {
return productivity / card2;
} else {