Jackie-Jiang commented on code in PR #8979:
URL: https://github.com/apache/pinot/pull/8979#discussion_r959860447
##########
pinot-core/src/main/java/org/apache/pinot/core/plan/SelectionPlanNode.java:
##########
@@ -50,67 +52,113 @@ public Operator<IntermediateResultsBlock> run() {
List<ExpressionContext> expressions =
SelectionOperatorUtils.extractExpressions(_queryContext, _indexSegment);
int limit = _queryContext.getLimit();
- if (limit > 0) {
- List<OrderByExpressionContext> orderByExpressions =
_queryContext.getOrderByExpressions();
- if (orderByExpressions == null) {
- // Selection only
- TransformOperator transformOperator = new
TransformPlanNode(_indexSegment, _queryContext, expressions,
- Math.min(limit, DocIdSetPlanNode.MAX_DOC_PER_CALL)).run();
- return new SelectionOnlyOperator(_indexSegment, _queryContext,
expressions, transformOperator);
- } else {
- // Selection order-by
- if (isAllOrderByColumnsSorted(orderByExpressions)) {
- // All order-by columns are sorted, no need to sort the records
- TransformOperator transformOperator = new
TransformPlanNode(_indexSegment, _queryContext, expressions,
- Math.min(limit + _queryContext.getOffset(),
DocIdSetPlanNode.MAX_DOC_PER_CALL)).run();
- return new SelectionOrderByOperator(_indexSegment, _queryContext,
expressions, transformOperator, true);
- } else if (orderByExpressions.size() == expressions.size()) {
- // All output expressions are ordered
- TransformOperator transformOperator =
- new TransformPlanNode(_indexSegment, _queryContext, expressions,
DocIdSetPlanNode.MAX_DOC_PER_CALL).run();
- return new SelectionOrderByOperator(_indexSegment, _queryContext,
expressions, transformOperator, false);
- } else {
- // Not all output expressions are ordered, only fetch the order-by
expressions and docId to avoid the
- // unnecessary data fetch
- List<ExpressionContext> expressionsToTransform = new
ArrayList<>(orderByExpressions.size());
- for (OrderByExpressionContext orderByExpression :
orderByExpressions) {
- expressionsToTransform.add(orderByExpression.getExpression());
- }
- TransformOperator transformOperator =
- new TransformPlanNode(_indexSegment, _queryContext,
expressionsToTransform,
- DocIdSetPlanNode.MAX_DOC_PER_CALL).run();
- return new SelectionOrderByOperator(_indexSegment, _queryContext,
expressions, transformOperator, false);
- }
- }
- } else {
+ if (limit == 0) {
// Empty selection (LIMIT 0)
TransformOperator transformOperator = new
TransformPlanNode(_indexSegment, _queryContext, expressions, 0).run();
return new EmptySelectionOperator(_indexSegment, expressions,
transformOperator);
}
+ List<OrderByExpressionContext> orderByExpressions =
_queryContext.getOrderByExpressions();
+
+ if (orderByExpressions == null) {
+ // Selection only
+ // ie: SELECT ... FROM Table WHERE ... LIMIT 10
+ int actualLimit = Math.min(limit, DocIdSetPlanNode.MAX_DOC_PER_CALL);
+ TransformPlanNode planNode = new TransformPlanNode(_indexSegment,
_queryContext, expressions, actualLimit);
+ TransformOperator transformOperator = planNode.run();
+
+ return new SelectionOnlyOperator(_indexSegment, _queryContext,
expressions, transformOperator);
+ }
+ int numOrderByExpressions = orderByExpressions.size();
+ // Although it is a break of abstraction, some code, specially merging,
assumes that if there is an order by
+ // expression the operator will return a block whose selection result is a
priority queue.
+ int sortedColumnsPrefixSize = getSortedColumnsPrefix(orderByExpressions);
+ if (sortedColumnsPrefixSize > 0 && isOrderByOptimizationEnabled()) {
+ // The first order by expressions are sorted (either asc or desc).
+ // ie: SELECT ... FROM Table WHERE predicates ORDER BY sorted_column
DESC LIMIT 10 OFFSET 5
+ // or: SELECT ... FROM Table WHERE predicates ORDER BY sorted_column,
not_sorted LIMIT 10 OFFSET 5
+ // but not SELECT ... FROM Table WHERE predicates ORDER BY not_sorted,
sorted_column LIMIT 10 OFFSET 5
+ if (orderByExpressions.get(0).isAsc()) {
+ int actualLimit = Math.min(limit + _queryContext.getOffset(),
DocIdSetPlanNode.MAX_DOC_PER_CALL);
+ TransformPlanNode planNode = new TransformPlanNode(_indexSegment,
_queryContext, expressions, actualLimit);
+ TransformOperator transformOperator = planNode.run();
+ return new SelectionPartiallyOrderedByAscOperator(_indexSegment,
_queryContext, expressions,
+ transformOperator, sortedColumnsPrefixSize);
+ } else {
+ int actualLimit = DocIdSetPlanNode.MAX_DOC_PER_CALL;
+ TransformPlanNode planNode = new TransformPlanNode(_indexSegment,
_queryContext, expressions, actualLimit);
+ TransformOperator transformOperator = planNode.run();
+ return new SelectionPartiallyOrderedByDescOperation(_indexSegment,
_queryContext, expressions,
+ transformOperator, sortedColumnsPrefixSize);
+ }
+ }
+ if (numOrderByExpressions == expressions.size()) {
+ // All output expressions are ordered
+ // ie: SELECT not_sorted1, not_sorted2 FROM Table WHERE ... ORDER BY
not_sorted1, not_sorted2 LIMIT 10 OFFSET 5
+ TransformOperator transformOperator =
+ new TransformPlanNode(_indexSegment, _queryContext, expressions,
DocIdSetPlanNode.MAX_DOC_PER_CALL).run();
+ return new SelectionOrderByOperator(_indexSegment, _queryContext,
expressions, transformOperator);
+ }
+ // Not all output expressions are ordered, only fetch the order-by
expressions and docId to avoid the
+ // unnecessary data fetch
+ // ie: SELECT ... FROM Table WHERE ... ORDER BY not_sorted1, not_sorted2
LIMIT 10
+ List<ExpressionContext> expressionsToTransform = new
ArrayList<>(numOrderByExpressions);
+ for (OrderByExpressionContext orderByExpression : orderByExpressions) {
+ expressionsToTransform.add(orderByExpression.getExpression());
+ }
+ TransformOperator transformOperator =
+ new TransformPlanNode(_indexSegment, _queryContext,
expressionsToTransform,
+ DocIdSetPlanNode.MAX_DOC_PER_CALL).run();
+ return new SelectionOrderByOperator(_indexSegment, _queryContext,
expressions, transformOperator);
}
/**
- * This function checks whether all columns in order by clause are
pre-sorted.
- * This is used to optimize order by limit clauses.
- * For eg:
- * A query like "select * from table order by col1, col2 limit 10"
- * will take all the n matching rows and add it to a priority queue of size
10.
- * This is nlogk operation which can be quite expensive for a large n.
- * In the above example, if the docs in the segment are already sorted by
col1 and col2 then there is no need for
- * sorting at all (only limit is needed).
- * @return true is all columns in order by clause are sorted . False
otherwise
+ * This functions returns the number of expressions that are sorted by the
implicit order in the index.
+ *
+ * This means that query that uses these expressions in its order by doesn't
actually need to sort from 0 to the
+ * given number (excluded) of expressions, as they are returned in the
correct order.
+ *
+ * This method supports ASC and DESC order and ensures that all prefix
expressions follow the same order. For example,
+ * ORDER BY sorted_col1 ASC, sorted_col2 ASC and ORDER BY sorted_col1 DESC,
sorted_col2 DESC will return 2 but
+ * ORDER BY sorted_col1 DESC, sorted_col2 ASC and ORDER BY sorted_col1 ASC,
sorted_col2 DESC will return 1 while
+ * ORDER BY not_sorted, sorted_col1 will return 0 because the first column
is not sorted.
+ *
+ * It doesn't make sense to add literal expressions in an order by
expression, but if they are included, they are
+ * considered sorted and its ASC/DESC is ignored.
+ *
+ * @return the max number that guarantees that from the first expression to
the returned number, the index is already
+ * sorted.
*/
- private boolean isAllOrderByColumnsSorted(List<OrderByExpressionContext>
orderByExpressions) {
- for (OrderByExpressionContext orderByExpression : orderByExpressions) {
- if (!(orderByExpression.getExpression().getType() ==
ExpressionContext.Type.IDENTIFIER)
- || !orderByExpression.isAsc()) {
- return false;
+ private int getSortedColumnsPrefix(List<OrderByExpressionContext>
orderByExpressions) {
+ boolean asc = orderByExpressions.get(0).isAsc();
+ for (int i = 0; i < orderByExpressions.size(); i++) {
+ if (!isSorted(orderByExpressions.get(i), asc)) {
+ return i;
}
- String column = orderByExpression.getExpression().getIdentifier();
- if
(!_indexSegment.getDataSource(column).getDataSourceMetadata().isSorted()) {
+ }
+ // If we reach here, all are sorted
+ return orderByExpressions.size();
+ }
+
+ private boolean isSorted(OrderByExpressionContext orderByExpression, boolean
asc) {
+ switch (orderByExpression.getExpression().getType()) {
+ case LITERAL: {
+ return true;
+ }
+ case IDENTIFIER: {
+ if (!orderByExpression.isAsc() == asc) {
+ return false;
+ }
+ String column = orderByExpression.getExpression().getIdentifier();
+ return
_indexSegment.getDataSource(column).getDataSourceMetadata().isSorted();
+ }
+ case FUNCTION: // we could optimize monotonically increasing functions
+ default: {
return false;
}
}
- return true;
+ }
+
+ private boolean isOrderByOptimizationEnabled() {
Review Comment:
What I meant is that since this is on by default, we can choose to not have
it (always on).
If you feel it is useful to have the ability to turn it off, we can put it
in the `QueryOptionsUtils` (which you already did), and store the key in the
constant for easier tracking. For the existing naming convention, suggest
changing it to `skipOrderByOptimization` and make it default to `false`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]