This is an automated email from the ASF dual-hosted git repository.
abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 911941b4a63 fix issue with nested virtual column index supplier for
partial paths when processing from raw (#15643)
911941b4a63 is described below
commit 911941b4a63eb6393fde77048fb2f7bad1cca1bc
Author: Clint Wylie <[email protected]>
AuthorDate: Mon Jan 8 18:25:08 2024 -0800
fix issue with nested virtual column index supplier for partial paths when
processing from raw (#15643)
---
.../segment/virtual/NestedFieldVirtualColumn.java | 7 +-
.../druid/query/scan/NestedDataScanQueryTest.java | 118 +++++++++++++++++++++
2 files changed, 124 insertions(+), 1 deletion(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
index 63b8598ef63..160415924a3 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java
@@ -1170,10 +1170,15 @@ public class NestedFieldVirtualColumn implements
VirtualColumn
if (theColumn instanceof CompressedNestedDataComplexColumn) {
final CompressedNestedDataComplexColumn<?> nestedColumn =
(CompressedNestedDataComplexColumn<?>) theColumn;
final ColumnIndexSupplier nestedColumnPathIndexSupplier =
nestedColumn.getColumnIndexSupplier(parts);
+ if (nestedColumnPathIndexSupplier == null && processFromRaw) {
+ // if processing from raw, a non-exstent path from parts doesn't mean
the path doesn't really exist
+ // so fall back to no indexes
+ return NoIndexesColumnIndexSupplier.getInstance();
+ }
if (expectedType != null) {
final Set<ColumnType> types = nestedColumn.getColumnTypes(parts);
// if the expected output type is numeric but not all of the input
types are numeric, we might have additional
- // null values than what the null value bitmap is tracking, wrap it
+ // null values than what the null value bitmap is tracking, fall back
to not using indexes
if (expectedType.isNumeric() && (types == null ||
types.stream().anyMatch(t -> !t.isNumeric()))) {
return NoIndexesColumnIndexSupplier.getInstance();
}
diff --git
a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
index 8435ea42f5f..cedef264a53 100644
---
a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
+++
b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
@@ -37,6 +37,8 @@ import
org.apache.druid.query.aggregation.AggregationTestHelper;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.filter.BoundDimFilter;
+import org.apache.druid.query.filter.NotDimFilter;
+import org.apache.druid.query.filter.NullFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
@@ -799,6 +801,122 @@ public class NestedDataScanQueryTest extends
InitializedNullHandlingTest
Assert.assertEquals(resultsSegments.get(0).getEvents().toString(),
resultsRealtime.get(0).getEvents().toString());
}
+ @Test
+ public void testIngestAndScanSegmentsAndFilterPartialPathArrayIndex() throws
Exception
+ {
+ Query<ScanResultValue> scanQuery = Druids.newScanQueryBuilder()
+ .dataSource("test_datasource")
+ .intervals(
+ new
MultipleIntervalSegmentSpec(
+
Collections.singletonList(Intervals.ETERNITY)
+ )
+ )
+ .filters(
+
NotDimFilter.of(NullFilter.forColumn("v0"))
+ )
+ .virtualColumns(
+ new NestedFieldVirtualColumn(
+ "complexObj",
+ "v0",
+ ColumnType.NESTED_DATA,
+ null,
+ true,
+ "$.y[0]",
+ false
+ )
+ )
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .limit(100)
+ .context(ImmutableMap.of())
+ .build();
+ List<Segment> segs = NestedDataTestUtils.createSegmentsForJsonInput(
+ tempFolder,
+ closer,
+ NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
+ Granularities.HOUR,
+ true,
+ IndexSpec.DEFAULT
+ );
+
+ List<Segment> realtimeSegs = ImmutableList.of(
+ NestedDataTestUtils.createIncrementalIndexForJsonInput(
+ tempFolder,
+ NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
+ Granularities.NONE,
+ true
+ )
+ );
+
+ final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(segs,
scanQuery);
+ final Sequence<ScanResultValue> seqRealtime =
helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
+ List<ScanResultValue> results = seq.toList();
+ List<ScanResultValue> resultsRealtime = seqRealtime.toList();
+ logResults(results);
+ logResults(resultsRealtime);
+ Assert.assertEquals(1, results.size());
+ Assert.assertEquals(4, ((List) results.get(0).getEvents()).size());
+ Assert.assertEquals(results.size(), resultsRealtime.size());
+ Assert.assertEquals(results.get(0).getEvents().toString(),
resultsRealtime.get(0).getEvents().toString());
+ }
+
+ @Test
+ public void testIngestAndScanSegmentsAndFilterPartialPath() throws Exception
+ {
+ Query<ScanResultValue> scanQuery = Druids.newScanQueryBuilder()
+ .dataSource("test_datasource")
+ .intervals(
+ new
MultipleIntervalSegmentSpec(
+
Collections.singletonList(Intervals.ETERNITY)
+ )
+ )
+ .filters(
+
NotDimFilter.of(NullFilter.forColumn("v0"))
+ )
+ .virtualColumns(
+ new NestedFieldVirtualColumn(
+ "obj",
+ "v0",
+ ColumnType.NESTED_DATA,
+ null,
+ true,
+ "$.b",
+ false
+ )
+ )
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .limit(100)
+ .context(ImmutableMap.of())
+ .build();
+ List<Segment> segs = NestedDataTestUtils.createSegmentsForJsonInput(
+ tempFolder,
+ closer,
+ NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
+ Granularities.HOUR,
+ true,
+ IndexSpec.DEFAULT
+ );
+
+ List<Segment> realtimeSegs = ImmutableList.of(
+ NestedDataTestUtils.createIncrementalIndexForJsonInput(
+ tempFolder,
+ NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE,
+ Granularities.NONE,
+ true
+ )
+ );
+
+ final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(segs,
scanQuery);
+ final Sequence<ScanResultValue> seqRealtime =
helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
+ List<ScanResultValue> results = seq.toList();
+ List<ScanResultValue> resultsRealtime = seqRealtime.toList();
+ logResults(results);
+ logResults(resultsRealtime);
+ Assert.assertEquals(1, results.size());
+ Assert.assertEquals(6, ((List) results.get(0).getEvents()).size());
+ Assert.assertEquals(results.size(), resultsRealtime.size());
+ Assert.assertEquals(results.get(0).getEvents().toString(),
resultsRealtime.get(0).getEvents().toString());
+ }
+
private static void logResults(List<ScanResultValue> results)
{
StringBuilder bob = new StringBuilder();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]