pranavbhole commented on code in PR #14587:
URL: https://github.com/apache/druid/pull/14587#discussion_r1281013733
##########
processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java:
##########
@@ -408,7 +356,138 @@ void addPreFilter(@Nullable final Filter filter)
);
}
+ class FilterSplitter
+ {
+ private String inputColumn;
+ private ColumnCapabilities inputColumnCapabilites;
+ private VirtualColumns queryVirtualColumns;
+
+ public FilterSplitter(
+ String inputColumn,
+ ColumnCapabilities inputColumnCapabilites, VirtualColumns
queryVirtualColumns
+ )
+ {
+ this.inputColumn = inputColumn;
+ this.inputColumnCapabilites = inputColumnCapabilites;
+ this.queryVirtualColumns = queryVirtualColumns;
+ }
+
+ final List<Filter> filtersPushedDownToBaseCursor = new ArrayList<>();
+ final List<Filter> filtersForPostUnnestCursor = new ArrayList<>();
+
+ void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter
filter, boolean skipPreFilters)
+ {
+ if (filter == null) {
+ return;
+ }
+ if (!skipPreFilters) {
+ final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter,
inputColumn, inputColumnCapabilites);
+ if (newFilter != null) {
+ // Add the rewritten filter pre-unnest, so we get the benefit of any
indexes, and so we avoid unnesting
+ // any rows that do not match this filter at all.
+ filtersPushedDownToBaseCursor.add(newFilter);
+ }
+ }
+ // Add original filter post-unnest no matter what: we need to filter out
any extraneous unnested values.
+ filtersForPostUnnestCursor.add(filter);
+ }
+
+ void addPreFilter(@Nullable final Filter filter)
+ {
+ if (filter == null) {
+ return;
+ }
+
+ final Set<String> requiredColumns = filter.getRequiredColumns();
+
+ // Run filter post-unnest if it refers to any virtual columns. This is a
conservative judgement call
+ // that perhaps forces the code to use a ValueMatcher where an index
would've been available,
+ // which can have real performance implications. This is an interim
choice made to value correctness
+ // over performance. When we need to optimize this performance, we
should be able to
+ // create a VirtualColumnDatasource that contains all the virtual
columns, in which case the query
+ // itself would stop carrying them and everything should be able to be
pushed down.
+ if (queryVirtualColumns.getVirtualColumns().length > 0) {
+ for (String column : requiredColumns) {
+ if (queryVirtualColumns.exists(column)) {
+ filtersForPostUnnestCursor.add(filter);
+ return;
+ }
+ }
+ }
+ filtersPushedDownToBaseCursor.add(filter);
+
+ }
+ }
+ /**
+ * handles the nested rewrite for unnest columns in recursive way,
+ * it loops through all and/or filters and rewrite only required filters in
the child and add it to preFilter if qualified
+ * or else skip adding it to preFilters.
+ * RULES:
+ * 1. Add to preFilters only when top level filter is AND.
+ * for example: a=1 and (b=2 or c=2) , In this case a=1 can be added as
preFilters but we can not add b=2 as preFilters.
+ * 2. If Top level is OR filter then we can either choose to add entire top
level OR filter to preFilter or skip it all together.
+ * for example: a=1 or (b=2 and c=2)
+ * 3. Filters on unnest column which is derived from Array or any other
Expression can not be pushe down to base.
+ * for example: a=1 and vc=3 , lets say vc is ExpressionVirtualColumn, and
vc=3 can not be push down to base even if top level is AND filter.
+ * 4.
+ *
Review Comment:
updated the comments
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]