This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 1eafe983eca fix array presenting columns to not match single element 
arrays to scalars for equality (#15503)
1eafe983eca is described below

commit 1eafe983eca409bdc5100a97797f542e08545a8b
Author: Clint Wylie <[email protected]>
AuthorDate: Fri Dec 8 01:22:07 2023 -0800

    fix array presenting columns to not match single element arrays to scalars 
for equality (#15503)
    
    * fix array presenting columns to not match single element arrays to 
scalars for equality
    * update docs to clarify usage model of mixed type columns
---
 docs/ingestion/schema-design.md                    |  7 +++-
 .../apache/druid/query/filter/EqualityFilter.java  |  3 ++
 .../nested/VariantColumnAndIndexSupplier.java      |  3 ++
 .../timeseries/NestedDataTimeseriesQueryTest.java  |  2 -
 .../druid/segment/filter/EqualityFilterTests.java  | 47 +++++++++++++++++-----
 5 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/docs/ingestion/schema-design.md b/docs/ingestion/schema-design.md
index 0749d20b695..ce57b207fea 100644
--- a/docs/ingestion/schema-design.md
+++ b/docs/ingestion/schema-design.md
@@ -263,13 +263,18 @@ native boolean types, Druid ingests these values as longs 
if `druid.expressions.
 the [array functions](../querying/sql-array-functions.md) or 
[UNNEST](../querying/sql-functions.md#unnest). Nested
 columns can be queried with the [JSON 
functions](../querying/sql-json-functions.md).
 
-Mixed type columns are stored in the _least_ restrictive type that can 
represent all values in the column. For example:
+Mixed type columns follow the same rules for schema differences between 
segments, and present as the _least_ restrictive
+type that can represent all values in the column. For example:
 
 - Mixed numeric columns are `DOUBLE`
 - If there are any strings present, then the column is a `STRING`
 - If there are arrays, then the column becomes an array with the least 
restrictive element type
 - Any nested data or arrays of nested data become `COMPLEX<json>` nested 
columns.
 
+Grouping, filtering, and aggregating mixed type values will handle these 
columns as if all values are represented as the
+least restrictive type. The exception to this is the scan query, which will 
return the values in their original mixed
+types, but any downstream operations on these values will still coerce them to 
the common type.
+
 If you're already using string-based schema discovery and want to migrate, see 
[Migrating to type-aware schema 
discovery](#migrating-to-type-aware-schema-discovery).
 
 #### String-based schema discovery
diff --git 
a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java 
b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
index ae7a5eaa134..f98a300b3dc 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/EqualityFilter.java
@@ -392,6 +392,9 @@ public class EqualityFilter extends 
AbstractOptimizableDimFilter implements Filt
     @Override
     public Predicate<Object[]> makeArrayPredicate(@Nullable 
TypeSignature<ValueType> arrayType)
     {
+      if (!matchValue.isArray()) {
+        return Predicates.alwaysFalse();
+      }
       if (arrayType == null) {
         // fall back to per row detection if input array type is unknown
         return typeDetectingArrayPredicateSupplier.get();
diff --git 
a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
 
b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
index fcd2e4dca08..f81bffcd251 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java
@@ -320,6 +320,9 @@ public class VariantColumnAndIndexSupplier implements 
Supplier<NestedCommonForma
     @Override
     public BitmapColumnIndex forValue(@Nonnull Object value, 
TypeSignature<ValueType> valueType)
     {
+      if (!valueType.isArray()) {
+        return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap);
+      }
       final ExprEval<?> eval = 
ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value);
       final ExprEval<?> castForComparison = ExprEval.castForEqualityComparison(
           eval,
diff --git 
a/processing/src/test/java/org/apache/druid/query/timeseries/NestedDataTimeseriesQueryTest.java
 
b/processing/src/test/java/org/apache/druid/query/timeseries/NestedDataTimeseriesQueryTest.java
index 130654d092c..815a6ed9951 100644
--- 
a/processing/src/test/java/org/apache/druid/query/timeseries/NestedDataTimeseriesQueryTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/timeseries/NestedDataTimeseriesQueryTest.java
@@ -488,7 +488,6 @@ public class NestedDataTimeseriesQueryTest extends 
InitializedNullHandlingTest
                                   
.intervals(Collections.singletonList(Intervals.ETERNITY))
                                   .filters(
                                       new AndDimFilter(
-                                          new 
EqualityFilter("variantWithArrays", ColumnType.STRING, "1", null),
                                           new EqualityFilter("v0", 
ColumnType.STRING, "1", null)
                                       )
                                   )
@@ -524,7 +523,6 @@ public class NestedDataTimeseriesQueryTest extends 
InitializedNullHandlingTest
                                   
.intervals(Collections.singletonList(Intervals.ETERNITY))
                                   .filters(
                                       new AndDimFilter(
-                                          new 
EqualityFilter("variantWithArrays", ColumnType.DOUBLE, 3.0, null),
                                           new EqualityFilter("v0", 
ColumnType.DOUBLE, 3.0, null)
                                       )
                                   )
diff --git 
a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
 
b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
index 5a4431e0056..801d02a97ee 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java
@@ -412,22 +412,18 @@ public class EqualityFilterTests
     {
       if (isAutoSchema()) {
         // auto ingests arrays instead of strings
-        // single values are implicitly upcast to single element arrays, so we 
get some matches here...
         if (NullHandling.sqlCompatible()) {
-          assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, 
"", null), ImmutableList.of("2"));
+          assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, 
"", null), ImmutableList.of());
+          assertFilterMatches(new EqualityFilter("dim2", 
ColumnType.STRING_ARRAY, ImmutableList.of(""), null), ImmutableList.of("2"));
         }
-        assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a", 
null), ImmutableList.of("3"));
+        assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a", 
null), ImmutableList.of());
+        assertFilterMatches(new EqualityFilter("dim2", 
ColumnType.STRING_ARRAY, ImmutableList.of("a"), null), ImmutableList.of("3"));
         assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "b", 
null), ImmutableList.of());
-        assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", 
null), ImmutableList.of("4"));
+        assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", 
null), ImmutableList.of());
+        assertFilterMatches(new EqualityFilter("dim2", 
ColumnType.STRING_ARRAY, ImmutableList.of("c"), null), ImmutableList.of("4"));
         assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "d", 
null), ImmutableList.of());
 
         // array matchers can match the whole array
-        if (NullHandling.sqlCompatible()) {
-          assertFilterMatches(
-              new EqualityFilter("dim2", ColumnType.STRING, 
ImmutableList.of(""), null),
-              ImmutableList.of("2")
-          );
-        }
         assertFilterMatches(
             new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new 
Object[]{"a", "b"}, null),
             ImmutableList.of("0")
@@ -994,7 +990,7 @@ public class EqualityFilterTests
       "3", .. [1.1, 2.2, 3.3]
       "4", .. 12.34
       "5", .. [100, 200, 300]
-      
+
        */
       Assume.assumeTrue(isAutoSchema());
       assertFilterMatches(
@@ -1018,6 +1014,7 @@ public class EqualityFilterTests
           ImmutableList.of("0", "1", "2", "3", "4", "5")
       );
 
+      // variant columns must be matched as arrays if they contain any arrays
       assertFilterMatches(
           new EqualityFilter(
               "variant",
@@ -1025,6 +1022,15 @@ public class EqualityFilterTests
               "abc",
               null
           ),
+          ImmutableList.of()
+      );
+      assertFilterMatches(
+          new EqualityFilter(
+              "variant",
+              ColumnType.STRING_ARRAY,
+              ImmutableList.of("abc"),
+              null
+          ),
           ImmutableList.of("0")
       );
 
@@ -1035,6 +1041,15 @@ public class EqualityFilterTests
               100L,
               null
           ),
+          ImmutableList.of()
+      );
+      assertFilterMatches(
+          new EqualityFilter(
+              "variant",
+              ColumnType.LONG_ARRAY,
+              ImmutableList.of(100L),
+              null
+          ),
           ImmutableList.of("1", "2")
       );
 
@@ -1045,6 +1060,15 @@ public class EqualityFilterTests
               "100",
               null
           ),
+          ImmutableList.of()
+      );
+      assertFilterMatches(
+          new EqualityFilter(
+              "variant",
+              ColumnType.STRING_ARRAY,
+              new Object[]{"100"},
+              null
+          ),
           ImmutableList.of("1", "2")
       );
 
@@ -1255,6 +1279,7 @@ public class EqualityFilterTests
                         "cachedOptimizedFilter"
                     )
                     .withPrefabValues(ColumnType.class, ColumnType.STRING, 
ColumnType.DOUBLE)
+                    .withPrefabValues(ExprEval.class, ExprEval.of("hello"), 
ExprEval.of(1.0))
                     .withIgnoredFields("predicateFactory", 
"cachedOptimizedFilter", "matchValue")
                     .verify();
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to