clintropolis commented on code in PR #14587:
URL: https://github.com/apache/druid/pull/14587#discussion_r1296566951


##########
processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java:
##########
@@ -316,22 +307,190 @@ public void 
test_pushdown_or_filters_unnested_and_original_dimension_with_unnest
       return null;
     });
   }
+  @Test
+  public void 
test_nested_filters_unnested_and_original_dimension_with_unnest_adapters()
+  {
+    final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter(
+        new TestStorageAdapter(INCREMENTAL_INDEX),
+        new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + 
"\"", null, ExprMacroTable.nil()),
+        null
+    );
 
+    final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn();
+
+    final String inputColumn = 
unnestStorageAdapter.getUnnestInputIfDirectAccess(vc);
+
+    final OrFilter baseFilter = new OrFilter(ImmutableList.of(
+        selector(OUTPUT_COLUMN_NAME, "1"),
+        new AndFilter(ImmutableList.of(
+            selector(inputColumn, "2"),
+            selector(OUTPUT_COLUMN_NAME, "10")
+        ))
+    ));
+
+    final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of(
+        selector(inputColumn, "1"),
+        new AndFilter(ImmutableList.of(
+            selector(inputColumn, "2"),
+            selector(inputColumn, "10")
+        ))
+    ));
+
+    final Sequence<Cursor> cursorSequence = unnestStorageAdapter.makeCursors(
+        baseFilter,
+        unnestStorageAdapter.getInterval(),
+        VirtualColumns.EMPTY,
+        Granularities.ALL,
+        false,
+        null
+    );
+
+    final TestStorageAdapter base = (TestStorageAdapter) 
unnestStorageAdapter.getBaseAdapter();
+    final Filter pushDownFilter = base.getPushDownFilter();
+
+    Assert.assertEquals(expectedPushDownFilter, pushDownFilter);
+    cursorSequence.accumulate(null, (accumulated, cursor) -> {
+      Assert.assertEquals(cursor.getClass(), PostJoinCursor.class);
+      final Filter postFilter = ((PostJoinCursor) cursor).getPostJoinFilter();
+      // OR-case so base filter should match the postJoinFilter
+      Assert.assertEquals(baseFilter, postFilter);
+      return null;
+    });
+  }
+  @Test
+  public void test_nested_filters_unnested_and_topLevel1And3filtersInOR()
+  {
+    final Filter testQueryFilter = and(ImmutableList.of(
+        selector(OUTPUT_COLUMN_NAME, "3"),
+        or(ImmutableList.of(
+            selector("newcol", "2"),
+            selector(COLUMNNAME, "2"),
+            selector(OUTPUT_COLUMN_NAME, "1")
+        ))
+    ));
+    testComputeBaseAndPostUnnestFilters(
+        testQueryFilter,
+        "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || 
multi-string1 = 1))",
+        "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || 
unnested-multi-string1 = 1))"
+    );
+  }
+  @Test
+  public void test_nested_multiLevel_filters_unnested()
+  {
+    final Filter testQueryFilter = and(ImmutableList.of(
+        selector(OUTPUT_COLUMN_NAME, "3"),
+        or(ImmutableList.of(
+            or(ImmutableList.of(
+                selector("newcol", "2"),
+                selector(COLUMNNAME, "2"),
+                and(ImmutableList.of(
+                    selector("newcol", "3"),
+                    selector(COLUMNNAME, "7")
+                ))
+            )),
+            selector(OUTPUT_COLUMN_NAME, "1")
+        ))
+    ));
+    testComputeBaseAndPostUnnestFilters(
+        testQueryFilter,
+        "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || (newcol = 3 
&& multi-string1 = 7) || multi-string1 = 1))",
+        "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2 || 
(newcol = 3 && multi-string1 = 7) || unnested-multi-string1 = 1))"
+    );
+  }
+  @Test
+  public void test_nested_multiLevel_filters_unnested5Level()
+  {
+    final Filter testQueryFilter = or(ImmutableList.of(
+        selector(OUTPUT_COLUMN_NAME, "3"),
+        or(ImmutableList.of(
+            or(ImmutableList.of(
+                selector("newcol", "2"),
+                selector(COLUMNNAME, "2"),
+                and(ImmutableList.of(
+                    selector("newcol", "3"),
+                    and(ImmutableList.of(
+                        selector(COLUMNNAME, "7"),
+                        selector("newcol_1", "10")
+                    ))
+                ))
+            )),
+            selector(OUTPUT_COLUMN_NAME, "1")
+        ))
+    ));
+    testComputeBaseAndPostUnnestFilters(
+        testQueryFilter,
+        "(multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || (newcol = 3 
&& multi-string1 = 7 && newcol_1 = 10) || multi-string1 = 1)",
+        "(unnested-multi-string1 = 3 || newcol = 2 || multi-string1 = 2 || 
(newcol = 3 && multi-string1 = 7 && newcol_1 = 10) || unnested-multi-string1 = 
1)"
+    );
+  }
+  @Test
+  public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR()
+  {
+    final Filter testQueryFilter = or(ImmutableList.of(
+        selector(OUTPUT_COLUMN_NAME, "3"),
+        and(ImmutableList.of(
+            selector("newcol", "2"),
+            selector(COLUMNNAME, "2"),
+            selector(OUTPUT_COLUMN_NAME, "1")
+        ))
+    ));
+    testComputeBaseAndPostUnnestFilters(
+        testQueryFilter,
+        "(multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && 
multi-string1 = 1))",
+        "(unnested-multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && 
unnested-multi-string1 = 1))"
+    );
+  }
+
+  @Test
+  public void 
test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOrs()
+  {
+    final Filter testQueryFilter = and(ImmutableList.of(
+        selector(OUTPUT_COLUMN_NAME, "3"),
+        or(ImmutableList.of(
+            selector("newcol", "2"),
+            selector(COLUMNNAME, "2")
+        )),
+        or(ImmutableList.of(
+            selector("newcol", "4"),
+            selector(COLUMNNAME, "8"),
+            selector(OUTPUT_COLUMN_NAME, "6")
+        ))
+    ));
+    testComputeBaseAndPostUnnestFilters(
+        testQueryFilter,
+        "(multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && (newcol = 
4 || multi-string1 = 8 || multi-string1 = 6))",
+        "(unnested-multi-string1 = 3 && (newcol = 2 || multi-string1 = 2) && 
(newcol = 4 || multi-string1 = 8 || unnested-multi-string1 = 6))"
+    );
+  }
+
+  @Test
+  public void test_nested_filters_unnested_and_topLevelAND2sdf()
+  {
+    final Filter testQueryFilter = and(ImmutableList.of(
+        selector(OUTPUT_COLUMN_NAME, "3"),
+        selector(COLUMNNAME, "2")
+    ));
+    testComputeBaseAndPostUnnestFilters(
+        testQueryFilter,
+        "(multi-string1 = 3 && multi-string1 = 2)",
+        "(unnested-multi-string1 = 3 && multi-string1 = 2)"
+    );

Review Comment:
   I think there is still some work to do here, but is ok to do as a follow-up.
   
   Side note, this would be a very strange query since there is like 
coincidentally a filter on the underlying unnest column that is referring to it 
directly alongside a filter on the unnested column itself. I think it would 
make sense to migrate a lot of these tests away from having filters on both the 
unnest column and its underlying column, since I cant imagine its going to be 
common in practice.
   
   But thinking about the more general case, like if it was `WHERE 
unnested-multi-string1 = 3 && other-column = 2` i the best rewrite would be
   ```    testComputeBaseAndPostUnnestFilters(
           testQueryFilter,
           "(multi-string1 = 3 && other-column= 2)",
           "unnested-multi-string1 = 3"
       );```
   but the current code leaves the `other-column = 2` in the post-filters, 
which isn't necessary. It would be a pretty big optimization to leave these off 
and only have to do post-filters when absolutely necessary. OR filters cannot 
have the same optimization.
   
   But, i think it should be ok to do this in a follow-up PR, since there is 
some other correctness stuff to be done as well, such as skipping pushdown of 
NOT filters, which could incorrectly filter out too much before the post-filter 
has a chance to match stuff.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to