This is an automated email from the ASF dual-hosted git repository.

abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 911941b4a63 fix issue with nested virtual column index supplier for 
partial paths when processing from raw (#15643)
911941b4a63 is described below

commit 911941b4a63eb6393fde77048fb2f7bad1cca1bc
Author: Clint Wylie <[email protected]>
AuthorDate: Mon Jan 8 18:25:08 2024 -0800

    fix issue with nested virtual column index supplier for partial paths when 
processing from raw (#15643)
---
 .../segment/virtual/NestedFieldVirtualColumn.java  |   7 +-
 .../druid/query/scan/NestedDataScanQueryTest.java  | 118 +++++++++++++++++++++
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git 
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
 
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
index 63b8598ef63..160415924a3 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
@@ -1170,10 +1170,15 @@ public class NestedFieldVirtualColumn implements 
VirtualColumn
     if (theColumn instanceof CompressedNestedDataComplexColumn) {
       final CompressedNestedDataComplexColumn<?> nestedColumn = 
(CompressedNestedDataComplexColumn<?>) theColumn;
       final ColumnIndexSupplier nestedColumnPathIndexSupplier = 
nestedColumn.getColumnIndexSupplier(parts);
+      if (nestedColumnPathIndexSupplier == null && processFromRaw) {
+        // if processing from raw, a non-exstent path from parts doesn't mean 
the path doesn't really exist
+        // so fall back to no indexes
+        return NoIndexesColumnIndexSupplier.getInstance();
+      }
       if (expectedType != null) {
         final Set<ColumnType> types = nestedColumn.getColumnTypes(parts);
         // if the expected output type is numeric but not all of the input 
types are numeric, we might have additional
-        // null values than what the null value bitmap is tracking, wrap it
+        // null values than what the null value bitmap is tracking, fall back 
to not using indexes
         if (expectedType.isNumeric() && (types == null || 
types.stream().anyMatch(t -> !t.isNumeric()))) {
           return NoIndexesColumnIndexSupplier.getInstance();
         }
diff --git 
a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
 
b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
index 8435ea42f5f..cedef264a53 100644
--- 
a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
@@ -37,6 +37,8 @@ import 
org.apache.druid.query.aggregation.AggregationTestHelper;
 import org.apache.druid.query.aggregation.AggregatorFactory;
 import org.apache.druid.query.aggregation.CountAggregatorFactory;
 import org.apache.druid.query.filter.BoundDimFilter;
+import org.apache.druid.query.filter.NotDimFilter;
+import org.apache.druid.query.filter.NullFilter;
 import org.apache.druid.query.filter.SelectorDimFilter;
 import org.apache.druid.query.ordering.StringComparators;
 import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
@@ -799,6 +801,122 @@ public class NestedDataScanQueryTest extends 
InitializedNullHandlingTest
     Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), 
resultsRealtime.get(0).getEvents().toString());
   }
 
+  @Test
+  public void testIngestAndScanSegmentsAndFilterPartialPathArrayIndex() throws 
Exception
+  {
+    Query<ScanResultValue> scanQuery = Druids.newScanQueryBuilder()
+                                             .dataSource("test_datasource")
+                                             .intervals(
+                                                 new 
MultipleIntervalSegmentSpec(
+                                                     
Collections.singletonList(Intervals.ETERNITY)
+                                                 )
+                                             )
+                                             .filters(
+                                                 
NotDimFilter.of(NullFilter.forColumn("v0"))
+                                             )
+                                             .virtualColumns(
+                                                 new NestedFieldVirtualColumn(
+                                                     "complexObj",
+                                                     "v0",
+                                                     ColumnType.NESTED_DATA,
+                                                     null,
+                                                     true,
+                                                     "$.y[0]",
+                                                     false
+                                                 )
+                                             )
+                                             
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                                             .limit(100)
+                                             .context(ImmutableMap.of())
+                                             .build();
+    List<Segment> segs = NestedDataTestUtils.createSegmentsForJsonInput(
+        tempFolder,
+        closer,
+        NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
+        Granularities.HOUR,
+        true,
+        IndexSpec.DEFAULT
+    );
+
+    List<Segment> realtimeSegs = ImmutableList.of(
+        NestedDataTestUtils.createIncrementalIndexForJsonInput(
+            tempFolder,
+            NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
+            Granularities.NONE,
+            true
+        )
+    );
+
+    final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(segs, 
scanQuery);
+    final Sequence<ScanResultValue> seqRealtime = 
helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
+    List<ScanResultValue> results = seq.toList();
+    List<ScanResultValue> resultsRealtime = seqRealtime.toList();
+    logResults(results);
+    logResults(resultsRealtime);
+    Assert.assertEquals(1, results.size());
+    Assert.assertEquals(4, ((List) results.get(0).getEvents()).size());
+    Assert.assertEquals(results.size(), resultsRealtime.size());
+    Assert.assertEquals(results.get(0).getEvents().toString(), 
resultsRealtime.get(0).getEvents().toString());
+  }
+
+  @Test
+  public void testIngestAndScanSegmentsAndFilterPartialPath() throws Exception
+  {
+    Query<ScanResultValue> scanQuery = Druids.newScanQueryBuilder()
+                                             .dataSource("test_datasource")
+                                             .intervals(
+                                                 new 
MultipleIntervalSegmentSpec(
+                                                     
Collections.singletonList(Intervals.ETERNITY)
+                                                 )
+                                             )
+                                             .filters(
+                                                 
NotDimFilter.of(NullFilter.forColumn("v0"))
+                                             )
+                                             .virtualColumns(
+                                                 new NestedFieldVirtualColumn(
+                                                     "obj",
+                                                     "v0",
+                                                     ColumnType.NESTED_DATA,
+                                                     null,
+                                                     true,
+                                                     "$.b",
+                                                     false
+                                                 )
+                                             )
+                                             
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                                             .limit(100)
+                                             .context(ImmutableMap.of())
+                                             .build();
+    List<Segment> segs = NestedDataTestUtils.createSegmentsForJsonInput(
+        tempFolder,
+        closer,
+        NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
+        Granularities.HOUR,
+        true,
+        IndexSpec.DEFAULT
+    );
+
+    List<Segment> realtimeSegs = ImmutableList.of(
+        NestedDataTestUtils.createIncrementalIndexForJsonInput(
+            tempFolder,
+            NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
+            Granularities.NONE,
+            true
+        )
+    );
+
+    final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(segs, 
scanQuery);
+    final Sequence<ScanResultValue> seqRealtime = 
helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
+    List<ScanResultValue> results = seq.toList();
+    List<ScanResultValue> resultsRealtime = seqRealtime.toList();
+    logResults(results);
+    logResults(resultsRealtime);
+    Assert.assertEquals(1, results.size());
+    Assert.assertEquals(6, ((List) results.get(0).getEvents()).size());
+    Assert.assertEquals(results.size(), resultsRealtime.size());
+    Assert.assertEquals(results.get(0).getEvents().toString(), 
resultsRealtime.get(0).getEvents().toString());
+  }
+
   private static void logResults(List<ScanResultValue> results)
   {
     StringBuilder bob = new StringBuilder();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to