This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new c241c6980c9 store auto columns with only empty or null containing
arrays as ARRAY<LONG> instead of COMPLEX<json> (#15505)
c241c6980c9 is described below
commit c241c6980c9e5e656da252649afb9cb5d01d31a7
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Dec 7 03:31:43 2023 -0800
store auto columns with only empty or null containing arrays as ARRAY<LONG>
instead of COMPLEX<json> (#15505)
---
.../druid/segment/AutoTypeColumnIndexer.java | 6 ++++-
.../apache/druid/segment/nested/FieldTypeInfo.java | 4 ++++
.../query/groupby/NestedGroupByArrayQueryTest.java | 25 +++++++++++++++++++
.../druid/segment/AutoTypeColumnIndexerTest.java | 2 +-
.../segment/nested/NestedFieldTypeInfoTest.java | 15 ++++++++++++
.../segment/nested/VariantColumnSupplierTest.java | 14 ++++++++++-
.../src/test/resources/nested-array-test-data.json | 28 +++++++++++-----------
.../sql/calcite/CalciteNestedDataQueryTest.java | 4 ++--
8 files changed, 79 insertions(+), 19 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
index 3ccde4221ae..3e47ef2da9e 100644
---
a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
+++
b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java
@@ -259,7 +259,7 @@ public class AutoTypeColumnIndexer implements
DimensionIndexer<StructuredData, S
final TreeMap<String, FieldTypeInfo.MutableTypeSet> fields = new
TreeMap<>();
for (Map.Entry<String, FieldIndexer> entry : fieldIndexers.entrySet()) {
// skip adding the field if no types are in the set, meaning only null
values have been processed
- if (!entry.getValue().getTypes().isEmpty()) {
+ if (!entry.getValue().getTypes().isEmpty() ||
entry.getValue().getTypes().hasUntypedArray()) {
fields.put(entry.getKey(), entry.getValue().getTypes());
}
}
@@ -421,6 +421,10 @@ public class AutoTypeColumnIndexer implements
DimensionIndexer<StructuredData, S
}
return ColumnTypeFactory.getInstance().ofArray(logicalType);
}
+ // if we only have empty an null arrays, ARRAY<LONG> is the most
restrictive type we can pick
+ if (rootField.getTypes().hasUntypedArray()) {
+ return ColumnType.LONG_ARRAY;
+ }
}
return ColumnType.NESTED_DATA;
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/FieldTypeInfo.java
b/processing/src/main/java/org/apache/druid/segment/nested/FieldTypeInfo.java
index 15691cfc9c4..b832e3d7350 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/FieldTypeInfo.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/FieldTypeInfo.java
@@ -183,6 +183,10 @@ public class FieldTypeInfo
if (hasEmptyArray && columnType != null && !columnType.isArray()) {
return null;
}
+ // if column only has empty arrays, call it long array
+ if (types == 0x00 && hasEmptyArray) {
+ return ColumnType.LONG_ARRAY;
+ }
return columnType;
}
diff --git
a/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java
b/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java
index a43509de921..afefe73adf7 100644
---
a/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java
+++
b/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java
@@ -53,6 +53,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.BiFunction;
@@ -438,6 +439,30 @@ public class NestedGroupByArrayQueryTest
);
}
+ @Test
+ public void testGroupByEmptyIshArrays()
+ {
+ GroupByQuery groupQuery = GroupByQuery.builder()
+ .setDataSource("test_datasource")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+
.setDimensions(DefaultDimensionSpec.of("arrayNoType", ColumnType.LONG_ARRAY))
+ .setAggregatorSpecs(new
CountAggregatorFactory("count"))
+ .setContext(getContext())
+ .build();
+
+
+ runResults(
+ groupQuery,
+ ImmutableList.of(
+ new Object[]{null, 4L},
+ new Object[]{new ComparableList<>(Collections.emptyList()), 18L},
+ new Object[]{new
ComparableList<>(Collections.singletonList(null)), 4L},
+ new Object[]{new ComparableList<>(Arrays.asList(null, null)), 2L}
+ )
+ );
+ }
+
private void runResults(
GroupByQuery groupQuery,
List<Object[]> expectedResults
diff --git
a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
index 6d2d29806df..8058980e17f 100644
---
a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java
@@ -760,7 +760,7 @@ public class AutoTypeColumnIndexerTest extends
InitializedNullHandlingTest
Assert.assertFalse(indexer.hasNulls);
Assert.assertFalse(indexer.hasNestedData);
Assert.assertTrue(indexer.isConstant());
- Assert.assertEquals(ColumnType.NESTED_DATA, indexer.getLogicalType());
+ Assert.assertEquals(ColumnType.LONG_ARRAY, indexer.getLogicalType());
}
@Test
diff --git
a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldTypeInfoTest.java
b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldTypeInfoTest.java
index 33df1887ea5..4c7020c7523 100644
---
a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldTypeInfoTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldTypeInfoTest.java
@@ -89,6 +89,21 @@ public class NestedFieldTypeInfoTest
}
}
+ @Test
+ public void testOnlyEmptyType()
+ {
+ FieldTypeInfo.MutableTypeSet typeSet = new FieldTypeInfo.MutableTypeSet();
+ Assert.assertNull(typeSet.getSingleType());
+ Assert.assertTrue(typeSet.isEmpty());
+
+ typeSet.addUntypedArray();
+
+ Assert.assertEquals(ColumnType.LONG_ARRAY, typeSet.getSingleType());
+ // no actual types in the type set, only getSingleType
+ Assert.assertEquals(ImmutableSet.of(),
FieldTypeInfo.convertToSet(typeSet.getByteValue()));
+ Assert.assertTrue(typeSet.hasUntypedArray());
+ }
+
@Test
public void testEqualsAndHashCode()
{
diff --git
a/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java
b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java
index 7910e49652f..4604aff1c20 100644
---
a/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java
@@ -156,6 +156,13 @@ public class VariantColumnSupplierTest extends
InitializedNullHandlingTest
Arrays.asList(null, 3.3)
);
+ static List<List<Object>> NO_TYPE_ARRAY = Arrays.asList(
+ Collections.emptyList(),
+ null,
+ Collections.emptyList(),
+ Arrays.asList(null, null)
+ );
+
@BeforeClass
public static void staticSetup()
@@ -186,7 +193,9 @@ public class VariantColumnSupplierTest extends
InitializedNullHandlingTest
new Object[]{"ARRAY<LONG>,ARRAY<STRING>,DOUBLE,LONG,STRING",
VARIANT_SCALAR_AND_ARRAY, IndexSpec.DEFAULT},
new Object[]{"ARRAY<LONG>,ARRAY<STRING>,DOUBLE,LONG,STRING",
VARIANT_SCALAR_AND_ARRAY, fancy},
new Object[]{"ARRAY<DOUBLE>,ARRAY<LONG>,ARRAY<STRING>", VARIANT_ARRAY,
IndexSpec.DEFAULT},
- new Object[]{"ARRAY<DOUBLE>,ARRAY<LONG>,ARRAY<STRING>", VARIANT_ARRAY,
fancy}
+ new Object[]{"ARRAY<DOUBLE>,ARRAY<LONG>,ARRAY<STRING>", VARIANT_ARRAY,
fancy},
+ new Object[]{"ARRAY<LONG>", NO_TYPE_ARRAY, IndexSpec.DEFAULT},
+ new Object[]{"ARRAY<LONG>", NO_TYPE_ARRAY, fancy}
);
return constructors;
@@ -254,6 +263,9 @@ public class VariantColumnSupplierTest extends
InitializedNullHandlingTest
for (ColumnType type :
FieldTypeInfo.convertToSet(expectedTypes.getByteValue())) {
expectedLogicalType =
ColumnType.leastRestrictiveType(expectedLogicalType, type);
}
+ if (expectedLogicalType == null &&
sortedFields.get(NestedPathFinder.JSON_PATH_ROOT).hasUntypedArray()) {
+ expectedLogicalType = ColumnType.LONG_ARRAY;
+ }
VariantColumnSerializer serializer = new VariantColumnSerializer(
fileNameBase,
expectedTypes.getSingleType() == null ? null : expectedLogicalType,
diff --git a/processing/src/test/resources/nested-array-test-data.json
b/processing/src/test/resources/nested-array-test-data.json
index b8ae3ace384..430fe165eac 100644
--- a/processing/src/test/resources/nested-array-test-data.json
+++ b/processing/src/test/resources/nested-array-test-data.json
@@ -1,14 +1,14 @@
-{"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b"],
"arrayStringNulls": ["a", "b"], "arrayLong":[1, 2, 3],
"arrayLongNulls":[1, null,3], "arrayDouble":[1.1, 2.2, 3.3],
"arrayDoubleNulls":[1.1, 2.2, null], "arrayVariant":["a", 1, 2.2],
"arrayNestedLong":[[1, 2, null], [3, 4]], "arrayObject":[{"x": 1},{"x":2}]}
-{"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b", "c"],
"arrayStringNulls": [null, "b"], "arrayLong":[2, 3],
"arrayDouble":[3.3, 4.4, 5.5], "arrayDoubleNulls":[999,
null, 5.5], "arrayVariant":[null, null, 2.2], "arrayNestedLong":[null,
[null], []], "arrayObject":[{"x": 3},{"x":4}]}
-{"timestamp": "2023-01-01T00:00:00", "arrayString": ["b", "c"],
"arrayStringNulls": ["d", null, "b"], "arrayLong":[1, 2, 3, 4],
"arrayLongNulls":[1, 2, 3], "arrayDouble":[1.1, 3.3],
"arrayDoubleNulls":[null, 2.2, null], "arrayVariant":[1, null, 1],
"arrayNestedLong":[[1], null, [1, 2, 3]], "arrayObject":[null,{"x":2}]}
-{"timestamp": "2023-01-01T00:00:00", "arrayString": ["d", "e"],
"arrayStringNulls": ["b", "b"], "arrayLong":[1, 4],
"arrayLongNulls":[1], "arrayDouble":[2.2, 3.3, 4.0],
"arrayVariant":["a", "b", "c"], "arrayNestedLong":[[1,
2], [3, 4], [5, 6, 7]], "arrayObject":[{"x": null},{"x":2}]}
-{"timestamp": "2023-01-01T00:00:00", "arrayString": null,
"arrayLong":[1, 2, 3], "arrayLongNulls":[],
"arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":null,
"arrayObject":[{"x": 1000},{"y":2000}]}
-{"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b"],
"arrayStringNulls": null,
"arrayLongNulls":[null, 2, 9], "arrayDouble":null,
"arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2],
"arrayNestedLong":[[1], [1, 2, null]], "arrayObject":[{"a": 1},{"b":2}]}
-{"timestamp": "2023-01-01T00:00:00",
"arrayStringNulls": ["a", "b"], "arrayLong":null,
"arrayLongNulls":[2, 3],
"arrayDoubleNulls":[null], "arrayVariant":null,
"arrayNestedLong":null, "arrayObject":[{"x": 1},{"x":2}]}
-{"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b"],
"arrayStringNulls": [], "arrayLong":[1, 2, 3],
"arrayLongNulls":[1, null,3], "arrayDouble":[1.1, 2.2, 3.3],
"arrayDoubleNulls":[1.1, 2.2, null], "arrayVariant":["a", 1, 2.2],
"arrayNestedLong":[[2, 3], [1, 5]], "arrayObject":[{"x": 1},{"x":2}]}
-{"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b", "c"],
"arrayStringNulls": [null, "b"], "arrayLong":[2, 3],
"arrayDouble":[3.3, 4.4, 5.5], "arrayDoubleNulls":[999,
null, 5.5], "arrayVariant":[null, null, 2.2], "arrayNestedLong":[null],
"arrayObject":[{"x": 3},{"x":4}]}
-{"timestamp": "2023-01-02T00:00:00", "arrayString": ["b", "c"],
"arrayStringNulls": ["d", null, "b"], "arrayLong":[1, 2, 3, 4],
"arrayLongNulls":[1, 2, 3], "arrayDouble":[1.1, 3.3],
"arrayDoubleNulls":[null, 2.2, null], "arrayVariant":[1, null, 1],
"arrayNestedLong":[[1], null, [1]], "arrayObject":[null,{"x":2}]}
-{"timestamp": "2023-01-02T00:00:00", "arrayString": ["d", "e"],
"arrayStringNulls": ["b", "b"], "arrayLong":[1, 4],
"arrayLongNulls":[null], "arrayDouble":[2.2, 3.3, 4.0],
"arrayVariant":["a", "b", "c"], "arrayNestedLong":[[1,
2], [3, 4], [5, 6, 7]], "arrayObject":[{"x": null},{"x":2}]}
-{"timestamp": "2023-01-02T00:00:00", "arrayString": null,
"arrayLong":[1, 2, 3], "arrayLongNulls":null,
"arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":[],
"arrayObject":[{"x": 1000},{"y":2000}]}
-{"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b"],
"arrayStringNulls": [null],
"arrayLongNulls":[null, 2, 9], "arrayDouble":null,
"arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2],
"arrayNestedLong":[], "arrayObject":[{"a": 1},{"b":2}]}
-{"timestamp": "2023-01-02T00:00:00",
"arrayStringNulls": ["a", "b"], "arrayLong":null,
"arrayLongNulls":[2, 3],
"arrayDoubleNulls":[null, 1.1], "arrayVariant":null,
"arrayNestedLong":null, "arrayObject":[{"x": 1},{"x":2}]}
\ No newline at end of file
+{"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b"],
"arrayStringNulls": ["a", "b"], "arrayLong":[1, 2, 3],
"arrayLongNulls":[1, null,3], "arrayDouble":[1.1, 2.2, 3.3],
"arrayDoubleNulls":[1.1, 2.2, null], "arrayVariant":["a", 1, 2.2],
"arrayNoType":[], "arrayNestedLong":[[1, 2, null], [3, 4]],
"arrayObject":[{"x": 1},{"x":2}]}
+{"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b", "c"],
"arrayStringNulls": [null, "b"], "arrayLong":[2, 3],
"arrayDouble":[3.3, 4.4, 5.5], "arrayDoubleNulls":[999,
null, 5.5], "arrayVariant":[null, null, 2.2], "arrayNoType":[null],
"arrayNestedLong":[null, [null], []], "arrayObject":[{"x": 3},{"x":4}]}
+{"timestamp": "2023-01-01T00:00:00", "arrayString": ["b", "c"],
"arrayStringNulls": ["d", null, "b"], "arrayLong":[1, 2, 3, 4],
"arrayLongNulls":[1, 2, 3], "arrayDouble":[1.1, 3.3],
"arrayDoubleNulls":[null, 2.2, null], "arrayVariant":[1, null, 1],
"arrayNestedLong":[[1], null, [1, 2, 3]],
"arrayObject":[null,{"x":2}]}
+{"timestamp": "2023-01-01T00:00:00", "arrayString": ["d", "e"],
"arrayStringNulls": ["b", "b"], "arrayLong":[1, 4],
"arrayLongNulls":[1], "arrayDouble":[2.2, 3.3, 4.0],
"arrayVariant":["a", "b", "c"], "arrayNoType":[],
"arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[{"x":
null},{"x":2}]}
+{"timestamp": "2023-01-01T00:00:00", "arrayString": null,
"arrayLong":[1, 2, 3], "arrayLongNulls":[],
"arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":null,
"arrayNoType":[],
"arrayObject":[{"x": 1000},{"y":2000}]}
+{"timestamp": "2023-01-01T00:00:00", "arrayString": ["a", "b"],
"arrayStringNulls": null,
"arrayLongNulls":[null, 2, 9], "arrayDouble":null,
"arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2],
"arrayNoType":[null, null], "arrayNestedLong":[[1], [1, 2, null]],
"arrayObject":[{"a": 1},{"b":2}]}
+{"timestamp": "2023-01-01T00:00:00",
"arrayStringNulls": ["a", "b"], "arrayLong":null,
"arrayLongNulls":[2, 3],
"arrayDoubleNulls":[null], "arrayVariant":null,
"arrayNoType":[], "arrayNestedLong":null,
"arrayObject":[{"x": 1},{"x":2}]}
+{"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b"],
"arrayStringNulls": [], "arrayLong":[1, 2, 3],
"arrayLongNulls":[1, null,3], "arrayDouble":[1.1, 2.2, 3.3],
"arrayDoubleNulls":[1.1, 2.2, null], "arrayVariant":["a", 1, 2.2],
"arrayNoType":[], "arrayNestedLong":[[2, 3], [1, 5]],
"arrayObject":[{"x": 1},{"x":2}]}
+{"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b", "c"],
"arrayStringNulls": [null, "b"], "arrayLong":[2, 3],
"arrayDouble":[3.3, 4.4, 5.5], "arrayDoubleNulls":[999,
null, 5.5], "arrayVariant":[null, null, 2.2], "arrayNoType":[],
"arrayNestedLong":[null], "arrayObject":[{"x": 3},{"x":4}]}
+{"timestamp": "2023-01-02T00:00:00", "arrayString": ["b", "c"],
"arrayStringNulls": ["d", null, "b"], "arrayLong":[1, 2, 3, 4],
"arrayLongNulls":[1, 2, 3], "arrayDouble":[1.1, 3.3],
"arrayDoubleNulls":[null, 2.2, null], "arrayVariant":[1, null, 1],
"arrayNoType":[null], "arrayNestedLong":[[1], null, [1]],
"arrayObject":[null,{"x":2}]}
+{"timestamp": "2023-01-02T00:00:00", "arrayString": ["d", "e"],
"arrayStringNulls": ["b", "b"], "arrayLong":[1, 4],
"arrayLongNulls":[null], "arrayDouble":[2.2, 3.3, 4.0],
"arrayVariant":["a", "b", "c"], "arrayNoType":[],
"arrayNestedLong":[[1, 2], [3, 4], [5, 6, 7]], "arrayObject":[{"x":
null},{"x":2}]}
+{"timestamp": "2023-01-02T00:00:00", "arrayString": null,
"arrayLong":[1, 2, 3], "arrayLongNulls":null,
"arrayDouble":[1.1, 2.2, 3.3], "arrayDoubleNulls":[],
"arrayNoType":[],
"arrayObject":[{"x": 1000},{"y":2000}]}
+{"timestamp": "2023-01-02T00:00:00", "arrayString": ["a", "b"],
"arrayStringNulls": [null],
"arrayLongNulls":[null, 2, 9], "arrayDouble":null,
"arrayDoubleNulls":[999, 5.5, null], "arrayVariant":["a", 1, 2.2],
"arrayNoType":null, "arrayNestedLong":[],
"arrayObject":[{"a": 1},{"b":2}]}
+{"timestamp": "2023-01-02T00:00:00",
"arrayStringNulls": ["a", "b"], "arrayLong":null,
"arrayLongNulls":[2, 3],
"arrayDoubleNulls":[null, 1.1], "arrayVariant":null,
"arrayNoType":[], "arrayNestedLong":null,
"arrayObject":[{"x": 1},{"x":2}]}
\ No newline at end of file
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 22e16f6e7e1..cea1bdbef7e 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -6258,9 +6258,9 @@ public class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
.add("cstringArray", ColumnType.STRING_ARRAY)
.add("cLongArray", ColumnType.LONG_ARRAY)
.add("cDoubleArray", ColumnType.DOUBLE_ARRAY)
- .add("cEmptyArray", ColumnType.NESTED_DATA)
+ .add("cEmptyArray", ColumnType.LONG_ARRAY)
.add("cEmptyObj", ColumnType.NESTED_DATA)
- .add("cNullArray", ColumnType.NESTED_DATA)
+ .add("cNullArray", ColumnType.LONG_ARRAY)
.add("cEmptyObjectArray", ColumnType.NESTED_DATA)
.add("cObjectArray", ColumnType.NESTED_DATA)
.add("cnt", ColumnType.LONG)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]