tkhurana commented on code in PR #1701:
URL: https://github.com/apache/phoenix/pull/1701#discussion_r1372416468
##########
phoenix-core/src/main/java/org/apache/phoenix/compile/WhereCompiler.java:
##########
@@ -364,7 +401,495 @@ public Void visit(KeyValueColumnExpression expression) {
ScanUtil.andFilterAtBeginning(scan,
scanRanges.getSkipScanFilter());
}
}
-
+
+
+ public static Expression transformDNF(ParseNode where, StatementContext
statementContext)
+ throws SQLException {
+ if (where == null) {
+ return null;
+ }
+ StatementContext context = new StatementContext(statementContext);
+
context.setResolver(FromCompiler.getResolver(context.getCurrentTable()));
+ Expression expression = where.accept(new
WhereExpressionCompiler(context));
+ Expression dnf = expression.accept(new DNFExpressionRewriter());
+ return dnf;
+ }
+
+ /**
+ * Rewrites an expression in DNF (Disjunctive Normal Form). To do that
+ * (1) it transforms operators like RVC, IN, and BETWEEN to their AND/OR
equivalents,
+ * (2) eliminate double negations and apply DeMorgan rule, i.e.,
+ * NOT (A AND B) = NOT A OR NOT B and NOT (A OR B) = NOT A AND NOT
B, and
+ * (3) distributes AND over OR, i.e.,
+ * (A OR B) AND (C OR D) = (A AND C) OR (A AND D) OR (B AND C) OR (B
AND D).
+ */
+ public static class DNFExpressionRewriter extends
TraverseAllExpressionVisitor<Expression> {
+ /**
+ * Flattens nested AND expressions.
+ * For example A > 10 AND (B = 10 AND C > 0) is an AndExpression with
two children that are
+ * A > 10 and (B = 10 AND C > 0). Note the second child is another
AndExpression. This is
+ * flattened as an AndExpression ( A > 10 AND B = 10 AND C > 0) with
three
+ * children that are A > 10, B = 10, and C > 0.
+ *
+ */
+
+ private static AndExpression flattenAnd(List<Expression> l) {
+ for (Expression e : l) {
+ if (e instanceof AndExpression) {
+ List <Expression> flattenedList = new ArrayList<>(l.size()
+ + e.getChildren().size());
+ for (Expression child : l) {
+ if (child instanceof AndExpression) {
+ flattenedList.addAll(child.getChildren());
+ } else {
+ flattenedList.add(child);
+ }
+ }
+ return new AndExpression(flattenedList);
+ }
+ }
+ return new AndExpression(l);
+ }
+
+ /**
+ * Flattens nested OR expressions.
+ * For example A > 10 OR (B = 10 OR C > 0) is an OrExpression with two
children that are
+ * A > 10 and (B = 10 OR C > 0). Note the second child is another
OrExpression. This is
+ * flattened as an OrExpression ( A > 10 OR B = 10 OR C > 0) with
three
+ * children that are A > 10, B = 10, and C > 0.
+ *
+ */
+ private static OrExpression flattenOr(List<Expression> l) {
+ for (Expression e : l) {
+ if (e instanceof OrExpression) {
+ List <Expression> flattenedList = new ArrayList<>(l.size()
+ + e.getChildren().size());
+ for (Expression child : l) {
+ if (child instanceof OrExpression) {
+ flattenedList.addAll(child.getChildren());
+ } else {
+ flattenedList.add(child);
+ }
+ }
+ return new OrExpression(flattenedList);
+ }
+ }
+ return new OrExpression(l);
+ }
+
+ /**
+ * Flattens nested AND expressions and then distributes AND over OR.
+ *
+ */
+ @Override public Expression visitLeave(AndExpression node,
List<Expression> l) {
+ AndExpression andExpression = flattenAnd(l);
+
+ boolean foundOrChild = false;
+ int i;
+ Expression child = null;
+ List <Expression> andChildren = andExpression.getChildren();
+ for (i = 0; i < andChildren.size(); i++) {
+ child = andChildren.get(i);
+ if (child instanceof OrExpression) {
+ foundOrChild = true;
+ break;
+ }
+ }
+
+ if (foundOrChild) {
+ List <Expression> flattenedList = new
ArrayList<>(andChildren.size() - 1);
+ for (int j = 0; j < andChildren.size(); j++) {
+ if (i != j) {
+ flattenedList.add(andChildren.get(j));
+ }
+ }
+ List <Expression> orList = new
ArrayList<>(child.getChildren().size());
+ for (Expression grandChild : child.getChildren()) {
+ List <Expression> andList = new ArrayList<>(l.size());
+ andList.addAll(flattenedList);
+ andList.add(grandChild);
+ orList.add(visitLeave(new AndExpression(andList),
andList));
+ }
+ return visitLeave(new OrExpression(orList), orList);
+ }
+ return andExpression;
+ }
+ @Override public Expression visitLeave(OrExpression node,
List<Expression> l) {
+ return flattenOr(l);
+ }
+
+ @Override public Expression visitLeave(ScalarFunction node,
List<Expression> l) {
+ return node;
+ }
+
+ private static ComparisonExpression
createComparisonExpression(CompareOperator op, Expression lhs, Expression rhs) {
+ List <Expression> children = new ArrayList<>(2);
+ children.add(lhs);
+ children.add(rhs);
+ return new ComparisonExpression(children, op);
+ }
+ @Override public Expression visitLeave(ComparisonExpression node,
List<Expression> l) {
+ if (l == null || l.isEmpty()) {
+ return node;
+ }
+ Expression lhs = l.get(0);
+ Expression rhs = l.get(1);
+ if (!(lhs instanceof RowValueConstructorExpression) ||
+ !(rhs instanceof RowValueConstructorExpression)) {
+ return new ComparisonExpression(l, node.getFilterOp());
+ }
+
+ // Rewrite RVC in DNF (Disjunctive Normal Form)
+ // For example
+ // (A, B, C ) op (a, b, c) where op is == or != equals to
+ // (A != a and B != b and C != c)
+ // (A, B, C ) op (a, b, c) where op is <, <=, >, or >= is equals to
+ // (A == a and B == b and C op c) or (A == a and B op b) or A op c
+
+ int childCount = lhs.getChildren().size();
+ if (node.getFilterOp() == EQUAL ||
+ node.getFilterOp() == NOT_EQUAL) {
+ List <Expression> andList = new ArrayList<>(childCount);
+ for (int i = 0; i < childCount; i++) {
+ andList.add(createComparisonExpression(node.getFilterOp(),
lhs.getChildren().get(i),
+ rhs.getChildren().get(i)));
+ }
+ return new AndExpression(andList);
+ }
+ List <Expression> orList = new ArrayList<>(childCount);
+ for (int i = 0; i < childCount; i++) {
+ List <Expression> andList = new ArrayList<>(childCount);
+ int j;
+ for (j = 0; j < childCount - i - 1; j++) {
+ andList.add(createComparisonExpression(EQUAL,
lhs.getChildren().get(j),
+ rhs.getChildren().get(j)));
+ }
+ andList.add(createComparisonExpression(node.getFilterOp(),
lhs.getChildren().get(j),
+ rhs.getChildren().get(j)));
+ orList.add(new AndExpression(andList));
+ }
+ return new OrExpression(orList);
+ }
+
+ @Override public Expression visitLeave(LikeExpression node,
List<Expression> l) {
+ return node;
+ }
+
+ @Override public Expression visitLeave(SingleAggregateFunction node,
List<Expression> l) {
+ return node;
+ }
+
+ @Override public Expression visitLeave(CaseExpression node,
List<Expression> l) {
+ return node;
+ }
+
+ private static Expression negate(ComparisonExpression node) {
+ CompareOperator op = node.getFilterOp();
+ Expression lhs = node.getChildren().get(0);
+ Expression rhs = node.getChildren().get(1);
+ switch (op){
+ case LESS:
+ return createComparisonExpression(GREATER_OR_EQUAL, lhs, rhs);
+ case LESS_OR_EQUAL:
+ return createComparisonExpression(GREATER, lhs, rhs);
+ case EQUAL:
+ return createComparisonExpression(NOT_EQUAL, lhs, rhs);
+ case NOT_EQUAL:
+ return createComparisonExpression(EQUAL, lhs, rhs);
+ case GREATER_OR_EQUAL:
+ return createComparisonExpression(LESS, lhs, rhs);
+ case GREATER:
+ return createComparisonExpression(LESS_OR_EQUAL, lhs, rhs);
+ default:
+ throw new IllegalArgumentException("Unexpected CompareOp of "
+ op);
+ }
+ }
+ private static List<Expression> negateChildren(List<Expression>
children) {
+ List <Expression> list = new ArrayList<>(children.size());
+ for (Expression child : children) {
+ if (child instanceof ComparisonExpression) {
+ list.add(negate((ComparisonExpression) child));
+ } else if (child instanceof OrExpression) {
+ list.add(negate((OrExpression) child));
+ } else if (child instanceof AndExpression) {
+ list.add(negate((AndExpression) child));
+ } else if (child instanceof ColumnExpression) {
+ list.add(new NotExpression(child));
+ } else if (child instanceof NotExpression) {
+ list.add(child.getChildren().get(0));
+ } else {
+ throw new IllegalArgumentException("Unexpected Instance of
" + child);
+ }
+ }
+ return list;
+ }
+ private static Expression negate(OrExpression node) {
+ return new AndExpression(negateChildren(node.getChildren()));
+ }
+
+ private static Expression negate(AndExpression node) {
+ return new OrExpression(negateChildren(node.getChildren()));
+ }
+ @Override public Expression visitLeave(NotExpression node,
List<Expression> l) {
+ Expression child = l.get(0);
+ if (child instanceof OrExpression) {
+ return negate((OrExpression) child);
+ } else if (child instanceof AndExpression) {
+ return negate((AndExpression) child);
+ } else if (child instanceof ComparisonExpression) {
+ return negate((ComparisonExpression) child);
+ } else if (child instanceof NotExpression) {
+ return child.getChildren().get(0);
+ } else if (child instanceof IsNullExpression) {
+ return new
IsNullExpression(ImmutableList.of(l.get(0).getChildren().get(0)),
+ !((IsNullExpression) child).isNegate());
+ } else {
+ return new NotExpression(child);
+ }
+ }
+
+ private Expression transformInList(InListExpression node, boolean
negate, List<Expression> l) {
+ List<Expression> list = new
ArrayList<>(node.getKeyExpressions().size());
+ for (Expression element : node.getKeyExpressions()) {
+ if (negate) {
+ list.add(createComparisonExpression(NOT_EQUAL, l.get(0),
element));
Review Comment:
@kadirozde This doesn't seem to handle the case where the LHS of inlist can
be a RVC expression like `(col1, col2) IN ((2,3), (7,8))`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]