This is an automated email from the ASF dual-hosted git repository.
karan pushed a commit to branch 29.0.1
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/29.0.1 by this push:
new e59bba1d545 fix NestedCommonFormatColumnHandler to use nullable
comparator when castToType is set (#15921) (#16053)
e59bba1d545 is described below
commit e59bba1d54590c845979f9fa1d9515504a8f97fb
Author: Karan Kumar <[email protected]>
AuthorDate: Mon Mar 11 16:14:40 2024 +0530
fix NestedCommonFormatColumnHandler to use nullable comparator when
castToType is set (#15921) (#16053)
Fixes a bug when the undocumented castToType parameter is set on 'auto'
column schema, which should have been using the 'nullable' comparator to allow
null values to be present when merging columns, but wasn't which would lead to
null pointer exceptions. Also fixes an issue I noticed while adding tests that
if 'FLOAT' type was specified for the castToType parameter it would be an
exception because that type is not expected to be present, since 'auto' uses
the native expressions to det [...]
In the future I should probably split this functionality out of the 'auto'
schema (maybe even have a simpler version of the auto indexer dedicated to
handling non-nested data) but still have the same results of writing out the
newer 'nested common format' columns used by 'auto', but I haven't taken that
on in this PR.
Co-authored-by: Clint Wylie <[email protected]>
---
.../apache/druid/segment/AutoTypeColumnSchema.java | 9 ++-
.../segment/NestedCommonFormatColumnHandler.java | 3 +-
.../druid/query/scan/NestedDataScanQueryTest.java | 65 ++++++++++++++++++++++
3 files changed, 75 insertions(+), 2 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java
b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java
index a72ae37d874..9959c9ef249 100644
---
a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java
+++
b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java
@@ -76,7 +76,14 @@ public class AutoTypeColumnSchema extends DimensionSchema
)
{
super(name, null, true);
- this.castToType = castToType;
+ // auto doesn't currently do FLOAT since expressions only support DOUBLE
+ if (ColumnType.FLOAT.equals(castToType)) {
+ this.castToType = ColumnType.DOUBLE;
+ } else if (ColumnType.FLOAT_ARRAY.equals(castToType)) {
+ this.castToType = ColumnType.DOUBLE_ARRAY;
+ } else {
+ this.castToType = castToType;
+ }
}
@Override
diff --git
a/processing/src/main/java/org/apache/druid/segment/NestedCommonFormatColumnHandler.java
b/processing/src/main/java/org/apache/druid/segment/NestedCommonFormatColumnHandler.java
index 5e811e22995..e679b2d2d9c 100644
---
a/processing/src/main/java/org/apache/druid/segment/NestedCommonFormatColumnHandler.java
+++
b/processing/src/main/java/org/apache/druid/segment/NestedCommonFormatColumnHandler.java
@@ -99,8 +99,9 @@ public class NestedCommonFormatColumnHandler implements
DimensionHandler<Structu
public Comparator<ColumnValueSelector> getEncodedValueSelectorComparator()
{
if (castTo != null) {
+ final Comparator<Object> typeComparator = castTo.getNullableStrategy();
return (s1, s2) ->
- castTo.getStrategy().compare(
+ typeComparator.compare(
StructuredData.unwrap(s1.getObject()),
StructuredData.unwrap(s2.getObject())
);
diff --git
a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
index c6e3cd17438..64c68c943f5 100644
---
a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
+++
b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.Module;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.guice.NestedDataModule;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.granularity.Granularities;
@@ -42,6 +43,7 @@ import org.apache.druid.query.filter.NullFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
+import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.Segment;
import org.apache.druid.segment.column.ColumnType;
@@ -546,6 +548,69 @@ public class NestedDataScanQueryTest extends
InitializedNullHandlingTest
Assert.assertEquals(resultsRealtime.get(0).getEvents().toString(),
resultsSegments.get(0).getEvents().toString());
}
+ @Test
+ public void testIngestAndScanSegmentsRealtimeAutoExplicit() throws Exception
+ {
+ DimensionsSpec spec = DimensionsSpec.builder()
+ .setDimensions(
+ ImmutableList.of(
+ new
AutoTypeColumnSchema("str", ColumnType.STRING),
+ new
AutoTypeColumnSchema("long", ColumnType.LONG),
+ new
AutoTypeColumnSchema("double", ColumnType.FLOAT)
+ )
+ )
+ .build();
+ Query<ScanResultValue> scanQuery = Druids.newScanQueryBuilder()
+ .dataSource("test_datasource")
+ .intervals(
+ new
MultipleIntervalSegmentSpec(
+
Collections.singletonList(Intervals.ETERNITY)
+ )
+ )
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .limit(100)
+ .context(ImmutableMap.of())
+ .build();
+ List<Segment> realtimeSegs = ImmutableList.of(
+ NestedDataTestUtils.createIncrementalIndex(
+ tempFolder,
+ NestedDataTestUtils.TYPES_DATA_FILE,
+ NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+ NestedDataTestUtils.TIMESTAMP_SPEC,
+ spec,
+ TransformSpec.NONE,
+ NestedDataTestUtils.COUNT,
+ Granularities.DAY,
+ true
+ )
+ );
+ List<Segment> segs = NestedDataTestUtils.createSegments(
+ tempFolder,
+ closer,
+ NestedDataTestUtils.TYPES_DATA_FILE,
+ NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
+ NestedDataTestUtils.TIMESTAMP_SPEC,
+ spec,
+ TransformSpec.NONE,
+ NestedDataTestUtils.COUNT,
+ Granularities.DAY,
+ true,
+ IndexSpec.DEFAULT
+ );
+
+
+ final Sequence<ScanResultValue> seq =
helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
+ final Sequence<ScanResultValue> seq2 = helper.runQueryOnSegmentsObjs(segs,
scanQuery);
+
+ List<ScanResultValue> resultsRealtime = seq.toList();
+ List<ScanResultValue> resultsSegments = seq2.toList();
+ logResults(resultsSegments);
+ logResults(resultsRealtime);
+ Assert.assertEquals(1, resultsRealtime.size());
+ Assert.assertEquals(resultsRealtime.size(), resultsSegments.size());
+ Assert.assertEquals(resultsRealtime.get(0).getEvents().toString(),
resultsSegments.get(0).getEvents().toString());
+ }
+
@Test
public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryArrayTypes()
throws Exception
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]