[
https://issues.apache.org/jira/browse/PHOENIX-7032?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17779698#comment-17779698
]
ASF GitHub Bot commented on PHOENIX-7032:
-----------------------------------------
tkhurana commented on code in PR #1701:
URL: https://github.com/apache/phoenix/pull/1701#discussion_r1372416468
##########
phoenix-core/src/main/java/org/apache/phoenix/compile/WhereCompiler.java:
##########
@@ -364,7 +401,495 @@ public Void visit(KeyValueColumnExpression expression) {
ScanUtil.andFilterAtBeginning(scan,
scanRanges.getSkipScanFilter());
}
}
-
+
+
+ public static Expression transformDNF(ParseNode where, StatementContext
statementContext)
+ throws SQLException {
+ if (where == null) {
+ return null;
+ }
+ StatementContext context = new StatementContext(statementContext);
+
context.setResolver(FromCompiler.getResolver(context.getCurrentTable()));
+ Expression expression = where.accept(new
WhereExpressionCompiler(context));
+ Expression dnf = expression.accept(new DNFExpressionRewriter());
+ return dnf;
+ }
+
+ /**
+ * Rewrites an expression in DNF (Disjunctive Normal Form). To do that
+ * (1) it transforms operators like RVC, IN, and BETWEEN to their AND/OR
equivalents,
+ * (2) eliminate double negations and apply DeMorgan rule, i.e.,
+ * NOT (A AND B) = NOT A OR NOT B and NOT (A OR B) = NOT A AND NOT
B, and
+ * (3) distributes AND over OR, i.e.,
+ * (A OR B) AND (C OR D) = (A AND C) OR (A AND D) OR (B AND C) OR (B
AND D).
+ */
+ public static class DNFExpressionRewriter extends
TraverseAllExpressionVisitor<Expression> {
+ /**
+ * Flattens nested AND expressions.
+ * For example A > 10 AND (B = 10 AND C > 0) is an AndExpression with
two children that are
+ * A > 10 and (B = 10 AND C > 0). Note the second child is another
AndExpression. This is
+ * flattened as an AndExpression ( A > 10 AND B = 10 AND C > 0) with
three
+ * children that are A > 10, B = 10, and C > 0.
+ *
+ */
+
+ private static AndExpression flattenAnd(List<Expression> l) {
+ for (Expression e : l) {
+ if (e instanceof AndExpression) {
+ List <Expression> flattenedList = new ArrayList<>(l.size()
+ + e.getChildren().size());
+ for (Expression child : l) {
+ if (child instanceof AndExpression) {
+ flattenedList.addAll(child.getChildren());
+ } else {
+ flattenedList.add(child);
+ }
+ }
+ return new AndExpression(flattenedList);
+ }
+ }
+ return new AndExpression(l);
+ }
+
+ /**
+ * Flattens nested OR expressions.
+ * For example A > 10 OR (B = 10 OR C > 0) is an OrExpression with two
children that are
+ * A > 10 and (B = 10 OR C > 0). Note the second child is another
OrExpression. This is
+ * flattened as an OrExpression ( A > 10 OR B = 10 OR C > 0) with
three
+ * children that are A > 10, B = 10, and C > 0.
+ *
+ */
+ private static OrExpression flattenOr(List<Expression> l) {
+ for (Expression e : l) {
+ if (e instanceof OrExpression) {
+ List <Expression> flattenedList = new ArrayList<>(l.size()
+ + e.getChildren().size());
+ for (Expression child : l) {
+ if (child instanceof OrExpression) {
+ flattenedList.addAll(child.getChildren());
+ } else {
+ flattenedList.add(child);
+ }
+ }
+ return new OrExpression(flattenedList);
+ }
+ }
+ return new OrExpression(l);
+ }
+
+ /**
+ * Flattens nested AND expressions and then distributes AND over OR.
+ *
+ */
+ @Override public Expression visitLeave(AndExpression node,
List<Expression> l) {
+ AndExpression andExpression = flattenAnd(l);
+
+ boolean foundOrChild = false;
+ int i;
+ Expression child = null;
+ List <Expression> andChildren = andExpression.getChildren();
+ for (i = 0; i < andChildren.size(); i++) {
+ child = andChildren.get(i);
+ if (child instanceof OrExpression) {
+ foundOrChild = true;
+ break;
+ }
+ }
+
+ if (foundOrChild) {
+ List <Expression> flattenedList = new
ArrayList<>(andChildren.size() - 1);
+ for (int j = 0; j < andChildren.size(); j++) {
+ if (i != j) {
+ flattenedList.add(andChildren.get(j));
+ }
+ }
+ List <Expression> orList = new
ArrayList<>(child.getChildren().size());
+ for (Expression grandChild : child.getChildren()) {
+ List <Expression> andList = new ArrayList<>(l.size());
+ andList.addAll(flattenedList);
+ andList.add(grandChild);
+ orList.add(visitLeave(new AndExpression(andList),
andList));
+ }
+ return visitLeave(new OrExpression(orList), orList);
+ }
+ return andExpression;
+ }
+ @Override public Expression visitLeave(OrExpression node,
List<Expression> l) {
+ return flattenOr(l);
+ }
+
+ @Override public Expression visitLeave(ScalarFunction node,
List<Expression> l) {
+ return node;
+ }
+
+ private static ComparisonExpression
createComparisonExpression(CompareOperator op, Expression lhs, Expression rhs) {
+ List <Expression> children = new ArrayList<>(2);
+ children.add(lhs);
+ children.add(rhs);
+ return new ComparisonExpression(children, op);
+ }
+ @Override public Expression visitLeave(ComparisonExpression node,
List<Expression> l) {
+ if (l == null || l.isEmpty()) {
+ return node;
+ }
+ Expression lhs = l.get(0);
+ Expression rhs = l.get(1);
+ if (!(lhs instanceof RowValueConstructorExpression) ||
+ !(rhs instanceof RowValueConstructorExpression)) {
+ return new ComparisonExpression(l, node.getFilterOp());
+ }
+
+ // Rewrite RVC in DNF (Disjunctive Normal Form)
+ // For example
+ // (A, B, C ) op (a, b, c) where op is == or != equals to
+ // (A != a and B != b and C != c)
+ // (A, B, C ) op (a, b, c) where op is <, <=, >, or >= is equals to
+ // (A == a and B == b and C op c) or (A == a and B op b) or A op c
+
+ int childCount = lhs.getChildren().size();
+ if (node.getFilterOp() == EQUAL ||
+ node.getFilterOp() == NOT_EQUAL) {
+ List <Expression> andList = new ArrayList<>(childCount);
+ for (int i = 0; i < childCount; i++) {
+ andList.add(createComparisonExpression(node.getFilterOp(),
lhs.getChildren().get(i),
+ rhs.getChildren().get(i)));
+ }
+ return new AndExpression(andList);
+ }
+ List <Expression> orList = new ArrayList<>(childCount);
+ for (int i = 0; i < childCount; i++) {
+ List <Expression> andList = new ArrayList<>(childCount);
+ int j;
+ for (j = 0; j < childCount - i - 1; j++) {
+ andList.add(createComparisonExpression(EQUAL,
lhs.getChildren().get(j),
+ rhs.getChildren().get(j)));
+ }
+ andList.add(createComparisonExpression(node.getFilterOp(),
lhs.getChildren().get(j),
+ rhs.getChildren().get(j)));
+ orList.add(new AndExpression(andList));
+ }
+ return new OrExpression(orList);
+ }
+
+ @Override public Expression visitLeave(LikeExpression node,
List<Expression> l) {
+ return node;
+ }
+
+ @Override public Expression visitLeave(SingleAggregateFunction node,
List<Expression> l) {
+ return node;
+ }
+
+ @Override public Expression visitLeave(CaseExpression node,
List<Expression> l) {
+ return node;
+ }
+
+ private static Expression negate(ComparisonExpression node) {
+ CompareOperator op = node.getFilterOp();
+ Expression lhs = node.getChildren().get(0);
+ Expression rhs = node.getChildren().get(1);
+ switch (op){
+ case LESS:
+ return createComparisonExpression(GREATER_OR_EQUAL, lhs, rhs);
+ case LESS_OR_EQUAL:
+ return createComparisonExpression(GREATER, lhs, rhs);
+ case EQUAL:
+ return createComparisonExpression(NOT_EQUAL, lhs, rhs);
+ case NOT_EQUAL:
+ return createComparisonExpression(EQUAL, lhs, rhs);
+ case GREATER_OR_EQUAL:
+ return createComparisonExpression(LESS, lhs, rhs);
+ case GREATER:
+ return createComparisonExpression(LESS_OR_EQUAL, lhs, rhs);
+ default:
+ throw new IllegalArgumentException("Unexpected CompareOp of "
+ op);
+ }
+ }
+ private static List<Expression> negateChildren(List<Expression>
children) {
+ List <Expression> list = new ArrayList<>(children.size());
+ for (Expression child : children) {
+ if (child instanceof ComparisonExpression) {
+ list.add(negate((ComparisonExpression) child));
+ } else if (child instanceof OrExpression) {
+ list.add(negate((OrExpression) child));
+ } else if (child instanceof AndExpression) {
+ list.add(negate((AndExpression) child));
+ } else if (child instanceof ColumnExpression) {
+ list.add(new NotExpression(child));
+ } else if (child instanceof NotExpression) {
+ list.add(child.getChildren().get(0));
+ } else {
+ throw new IllegalArgumentException("Unexpected Instance of
" + child);
+ }
+ }
+ return list;
+ }
+ private static Expression negate(OrExpression node) {
+ return new AndExpression(negateChildren(node.getChildren()));
+ }
+
+ private static Expression negate(AndExpression node) {
+ return new OrExpression(negateChildren(node.getChildren()));
+ }
+ @Override public Expression visitLeave(NotExpression node,
List<Expression> l) {
+ Expression child = l.get(0);
+ if (child instanceof OrExpression) {
+ return negate((OrExpression) child);
+ } else if (child instanceof AndExpression) {
+ return negate((AndExpression) child);
+ } else if (child instanceof ComparisonExpression) {
+ return negate((ComparisonExpression) child);
+ } else if (child instanceof NotExpression) {
+ return child.getChildren().get(0);
+ } else if (child instanceof IsNullExpression) {
+ return new
IsNullExpression(ImmutableList.of(l.get(0).getChildren().get(0)),
+ !((IsNullExpression) child).isNegate());
+ } else {
+ return new NotExpression(child);
+ }
+ }
+
+ private Expression transformInList(InListExpression node, boolean
negate, List<Expression> l) {
+ List<Expression> list = new
ArrayList<>(node.getKeyExpressions().size());
+ for (Expression element : node.getKeyExpressions()) {
+ if (negate) {
+ list.add(createComparisonExpression(NOT_EQUAL, l.get(0),
element));
Review Comment:
@kadirozde This doesn't seem to handle the case where the LHS of inlist can
be a RVC expression like `(col1, col2) IN ((2,3), (7,8))`
> Partial Global Secondary Indexes
> --------------------------------
>
> Key: PHOENIX-7032
> URL: https://issues.apache.org/jira/browse/PHOENIX-7032
> Project: Phoenix
> Issue Type: New Feature
> Reporter: Kadir Ozdemir
> Assignee: Kadir Ozdemir
> Priority: Major
>
> The secondary indexes supported in Phoenix have been full indexes such that
> for every data table row there is an index row. Generating an index row for
> every data table row is not always required. For example, some use cases do
> not require index rows for the data table rows in which indexed column values
> are null. Such indexes are called sparse indexes. Partial indexes generalize
> the concept of sparse indexing and allow users to specify the subset of the
> data table rows for which index rows will be maintained. This subset is
> specified using a WHERE clause added to the CREATE INDEX DDL statement.
> Partial secondary indexes were first proposed by Michael Stonebraker
> [here|https://dsf.berkeley.edu/papers/ERL-M89-17.pdf]. Since then several SQL
> databases (e.g.,
> [Postgres|https://www.postgresql.org/docs/current/indexes-partial.html] and
> [SQLite|https://www.sqlite.org/partialindex.html]) and NoSQL databases
> (e.g., [MongoDB|https://www.mongodb.com/docs/manual/core/index-partial/])
> have supported some form of partial indexes. It is challenging to allow
> arbitrary WHERE clauses in DDL statements. For example, Postgres does not
> allow subqueries in these where clauses and SQLite supports much more
> restrictive where clauses.
> Supporting arbitrary where clauses creates challenges for query optimizers in
> deciding the usability of a partial index for a given query. If the set of
> data table rows that satisfy the query is a subset of the data table rows
> that the partial index points back, then the query can use the index. Thus,
> the query optimizer has to decide if the WHERE clause of the query implies
> the WHERE clause of the index.
> Michael Stonebraker [here|https://dsf.berkeley.edu/papers/ERL-M89-17.pdf]
> suggests that an index WHERE clause is a conjunct of simple terms, i.e:
> i-clause-1 and i-clause-2 and ... and i-clause-m where each clause is of the
> form <column> <operator> <constant>. Hence, the qualification can be
> evaluated for each tuple in the indicated relation without consulting
> additional tuples.
> Phoenix partial indexes will initially support a more general set of index
> WHERE clauses that can be evaluated on a single row with the following
> exceptions
> * Subqueries are not allowed.
> * Like expressions are allowed with very limited support such that an index
> WHERE clause with like expressions can imply/contain a query if the query has
> the same like expressions that the index WHERE clause has.
> * Comparison between columns are allowed without supporting transitivity,
> for example, a > b and b > c does not imply a > c.
> Partial indexes will be supported initially for global secondary indexes,
> i.e., covered global indexes and uncovered global indexes. The local
> secondary indexes will be supported in future.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)