This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 1eafe983eca fix array presenting columns to not match single element
arrays to scalars for equality (#15503)
1eafe983eca is described below
commit 1eafe983eca409bdc5100a97797f542e08545a8b
Author: Clint Wylie <[email protected]>
AuthorDate: Fri Dec 8 01:22:07 2023 -0800
fix array presenting columns to not match single element arrays to scalars
for equality (#15503)
* fix array presenting columns to not match single element arrays to
scalars for equality
* update docs to clarify usage model of mixed type columns
---
docs/ingestion/schema-design.md | 7 +++-
.../apache/druid/query/filter/EqualityFilter.java | 3 ++
.../nested/VariantColumnAndIndexSupplier.java | 3 ++
.../timeseries/NestedDataTimeseriesQueryTest.java | 2 -
.../druid/segment/filter/EqualityFilterTests.java | 47 +++++++++++++++++-----
5 files changed, 48 insertions(+), 14 deletions(-)
diff --git a/docs/ingestion/schema-design.md b/docs/ingestion/schema-design.md
index 0749d20b695..ce57b207fea 100644
--- a/docs/ingestion/schema-design.md
+++ b/docs/ingestion/schema-design.md
@@ -263,13 +263,18 @@ native boolean types, Druid ingests these values as longs
if `druid.expressions.
the [array functions](../querying/sql-array-functions.md) or
[UNNEST](../querying/sql-functions.md#unnest). Nested
columns can be queried with the [JSON
functions](../querying/sql-json-functions.md).
-Mixed type columns are stored in the _least_ restrictive type that can
represent all values in the column. For example:
+Mixed type columns follow the same rules for schema differences between
segments, and present as the _least_ restrictive
+type that can represent all values in the column. For example:
- Mixed numeric columns are `DOUBLE`
- If there are any strings present, then the column is a `STRING`
- If there are arrays, then the column becomes an array with the least
restrictive element type
- Any nested data or arrays of nested data become `COMPLEX<json>` nested
columns.
+Grouping, filtering, and aggregating mixed type values will handle these
columns as if all values are represented as the
+least restrictive type. The exception to this is the scan query, which will
return the values in their original mixed
+types, but any downstream operations on these values will still coerce them to
the common type.
+
If you're already using string-based schema discovery and want to migrate, see
[Migrating to type-aware schema
discovery](#migrating-to-type-aware-schema-discovery).
#### String-based schema discovery
diff --git
a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
index ae7a5eaa134..f98a300b3dc 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
@@ -392,6 +392,9 @@ public class EqualityFilter extends
AbstractOptimizableDimFilter implements Filt
@Override
public Predicate<Object[]> makeArrayPredicate(@Nullable
TypeSignature<ValueType> arrayType)
{
+ if (!matchValue.isArray()) {
+ return Predicates.alwaysFalse();
+ }
if (arrayType == null) {
// fall back to per row detection if input array type is unknown
return typeDetectingArrayPredicateSupplier.get();
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
index fcd2e4dca08..f81bffcd251 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
@@ -320,6 +320,9 @@ public class VariantColumnAndIndexSupplier implements
Supplier<NestedCommonForma
@Override
public BitmapColumnIndex forValue(@Nonnull Object value,
TypeSignature<ValueType> valueType)
{
+ if (!valueType.isArray()) {
+ return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap);
+ }
final ExprEval<?> eval =
ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value);
final ExprEval<?> castForComparison = ExprEval.castForEqualityComparison(
eval,
diff --git
a/processing/src/test/java/org/apache/druid/query/timeseries/NestedDataTimeseriesQueryTest.java
b/processing/src/test/java/org/apache/druid/query/timeseries/NestedDataTimeseriesQueryTest.java
index 130654d092c..815a6ed9951 100644
---
a/processing/src/test/java/org/apache/druid/query/timeseries/NestedDataTimeseriesQueryTest.java
+++
b/processing/src/test/java/org/apache/druid/query/timeseries/NestedDataTimeseriesQueryTest.java
@@ -488,7 +488,6 @@ public class NestedDataTimeseriesQueryTest extends
InitializedNullHandlingTest
.intervals(Collections.singletonList(Intervals.ETERNITY))
.filters(
new AndDimFilter(
- new
EqualityFilter("variantWithArrays", ColumnType.STRING, "1", null),
new EqualityFilter("v0",
ColumnType.STRING, "1", null)
)
)
@@ -524,7 +523,6 @@ public class NestedDataTimeseriesQueryTest extends
InitializedNullHandlingTest
.intervals(Collections.singletonList(Intervals.ETERNITY))
.filters(
new AndDimFilter(
- new
EqualityFilter("variantWithArrays", ColumnType.DOUBLE, 3.0, null),
new EqualityFilter("v0",
ColumnType.DOUBLE, 3.0, null)
)
)
diff --git
a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
index 5a4431e0056..801d02a97ee 100644
---
a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
+++
b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
@@ -412,22 +412,18 @@ public class EqualityFilterTests
{
if (isAutoSchema()) {
// auto ingests arrays instead of strings
- // single values are implicitly upcast to single element arrays, so we
get some matches here...
if (NullHandling.sqlCompatible()) {
- assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING,
"", null), ImmutableList.of("2"));
+ assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING,
"", null), ImmutableList.of());
+ assertFilterMatches(new EqualityFilter("dim2",
ColumnType.STRING_ARRAY, ImmutableList.of(""), null), ImmutableList.of("2"));
}
- assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a",
null), ImmutableList.of("3"));
+ assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a",
null), ImmutableList.of());
+ assertFilterMatches(new EqualityFilter("dim2",
ColumnType.STRING_ARRAY, ImmutableList.of("a"), null), ImmutableList.of("3"));
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "b",
null), ImmutableList.of());
- assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c",
null), ImmutableList.of("4"));
+ assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c",
null), ImmutableList.of());
+ assertFilterMatches(new EqualityFilter("dim2",
ColumnType.STRING_ARRAY, ImmutableList.of("c"), null), ImmutableList.of("4"));
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "d",
null), ImmutableList.of());
// array matchers can match the whole array
- if (NullHandling.sqlCompatible()) {
- assertFilterMatches(
- new EqualityFilter("dim2", ColumnType.STRING,
ImmutableList.of(""), null),
- ImmutableList.of("2")
- );
- }
assertFilterMatches(
new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new
Object[]{"a", "b"}, null),
ImmutableList.of("0")
@@ -994,7 +990,7 @@ public class EqualityFilterTests
"3", .. [1.1, 2.2, 3.3]
"4", .. 12.34
"5", .. [100, 200, 300]
-
+
*/
Assume.assumeTrue(isAutoSchema());
assertFilterMatches(
@@ -1018,6 +1014,7 @@ public class EqualityFilterTests
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
+ // variant columns must be matched as arrays if they contain any arrays
assertFilterMatches(
new EqualityFilter(
"variant",
@@ -1025,6 +1022,15 @@ public class EqualityFilterTests
"abc",
null
),
+ ImmutableList.of()
+ );
+ assertFilterMatches(
+ new EqualityFilter(
+ "variant",
+ ColumnType.STRING_ARRAY,
+ ImmutableList.of("abc"),
+ null
+ ),
ImmutableList.of("0")
);
@@ -1035,6 +1041,15 @@ public class EqualityFilterTests
100L,
null
),
+ ImmutableList.of()
+ );
+ assertFilterMatches(
+ new EqualityFilter(
+ "variant",
+ ColumnType.LONG_ARRAY,
+ ImmutableList.of(100L),
+ null
+ ),
ImmutableList.of("1", "2")
);
@@ -1045,6 +1060,15 @@ public class EqualityFilterTests
"100",
null
),
+ ImmutableList.of()
+ );
+ assertFilterMatches(
+ new EqualityFilter(
+ "variant",
+ ColumnType.STRING_ARRAY,
+ new Object[]{"100"},
+ null
+ ),
ImmutableList.of("1", "2")
);
@@ -1255,6 +1279,7 @@ public class EqualityFilterTests
"cachedOptimizedFilter"
)
.withPrefabValues(ColumnType.class, ColumnType.STRING,
ColumnType.DOUBLE)
+ .withPrefabValues(ExprEval.class, ExprEval.of("hello"),
ExprEval.of(1.0))
.withIgnoredFields("predicateFactory",
"cachedOptimizedFilter", "matchValue")
.verify();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]