This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 3fb6ba22e85 fix expression column capabilities to not report 
dictionary encoded unless input is string (#16577)
3fb6ba22e85 is described below

commit 3fb6ba22e85b82a654cbfb1b52b5ae7fed7b95e0
Author: Clint Wylie <[email protected]>
AuthorDate: Sat Jun 8 13:05:19 2024 -0700

    fix expression column capabilities to not report dictionary encoded unless 
input is string (#16577)
---
 .../serde/NestedCommonFormatColumnPartSerde.java   |   4 -
 .../druid/segment/virtual/ExpressionPlan.java      |  16 ++-
 .../segment/virtual/ExpressionPlannerTest.java     | 126 ++++++++++++++++++---
 .../virtual/ExpressionVirtualColumnTest.java       |   2 +-
 .../sql/calcite/CalciteNestedDataQueryTest.java    |  32 ++++++
 5 files changed, 157 insertions(+), 23 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java
 
b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java
index 7a0ea743784..6bf897cd6b2 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java
@@ -303,10 +303,6 @@ public class NestedCommonFormatColumnPartSerde implements 
ColumnPartSerde
           bitmapSerdeFactory,
           byteOrder
       );
-      ColumnCapabilitiesImpl capabilitiesBuilder = 
builder.getCapabilitiesBuilder();
-      capabilitiesBuilder.setDictionaryEncoded(true);
-      capabilitiesBuilder.setDictionaryValuesSorted(true);
-      capabilitiesBuilder.setDictionaryValuesUnique(true);
       ColumnType simpleType = supplier.getLogicalType();
       ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA : 
simpleType;
       builder.setType(logicalType);
diff --git 
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java 
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
index fecfe142069..71af403fa76 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
@@ -274,12 +274,18 @@ public class ExpressionPlan
             // since we don't know if the expression is 1:1 or if it retains 
ordering we can only piggy back only
             // report as dictionary encoded, but it still allows us to use 
algorithms which work with dictionaryIds
             // to create a dictionary encoded selector instead of an object 
selector to defer expression evaluation
-            // until query time
-            return ColumnCapabilitiesImpl.copyOf(underlyingCapabilities)
+            // until query time. However, currently dictionary encodedness is 
tied to string selectors and sad stuff
+            // happens if the input type isn't string, so we also limit this 
to string input types
+            final boolean useDictionary = 
underlyingCapabilities.isDictionaryEncoded().isTrue() &&
+                                          
underlyingCapabilities.is(ValueType.STRING);
+            return ColumnCapabilitiesImpl.createDefault()
                                          .setType(ColumnType.STRING)
-                                         .setDictionaryValuesSorted(false)
-                                         .setDictionaryValuesUnique(false)
-                                         .setHasBitmapIndexes(false)
+                                         .setDictionaryEncoded(useDictionary)
+                                         
.setHasBitmapIndexes(underlyingCapabilities.hasBitmapIndexes())
+                                         
.setHasMultipleValues(underlyingCapabilities.hasMultipleValues())
+                                         
.setHasSpatialIndexes(underlyingCapabilities.hasSpatialIndexes())
+                                         // we aren't sure if the expression 
might produce null values or not, so always
+                                         // set hasNulls to true
                                          .setHasNulls(true);
           }
         }
diff --git 
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java
 
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java
index c40c4a9bacd..9b4d1b84af2 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java
@@ -19,8 +19,13 @@
 
 package org.apache.druid.segment.virtual;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
 import org.apache.druid.math.expr.ExpressionType;
 import org.apache.druid.math.expr.Parser;
 import org.apache.druid.query.expression.TestExprMacroTable;
@@ -36,11 +41,13 @@ import org.junit.Test;
 import org.junit.rules.ExpectedException;
 
 import javax.annotation.Nullable;
+import java.util.List;
 import java.util.Map;
 
 public class ExpressionPlannerTest extends InitializedNullHandlingTest
 {
-  public static final ColumnInspector SYNTHETIC_INSPECTOR = new 
ColumnInspector()
+  private static ColumnType DICTIONARY_COMPLEX = 
ColumnType.ofComplex("dictionaryComplex");
+  private static final ColumnInspector SYNTHETIC_INSPECTOR = new 
ColumnInspector()
   {
     private final Map<String, ColumnCapabilities> capabilitiesMap =
         ImmutableMap.<String, ColumnCapabilities>builder()
@@ -141,6 +148,12 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
                         "double_array_2",
                         
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ColumnType.DOUBLE_ARRAY)
                     )
+                    .put(
+                        "dictionary_complex",
+                        ColumnCapabilitiesImpl.createDefault()
+                                              .setDictionaryEncoded(true)
+                                              .setType(DICTIONARY_COMPLEX)
+                    )
                     .build();
 
     @Nullable
@@ -151,6 +164,8 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
     }
   };
 
+  private static final TestMacroTable MACRO_TABLE = new TestMacroTable();
+
   @Rule
   public ExpectedException expectedException = ExpectedException.none();
 
@@ -369,7 +384,7 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
     Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
     Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
     Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
-    Assert.assertFalse(inferred.hasBitmapIndexes());
+    Assert.assertTrue(inferred.hasBitmapIndexes());
     Assert.assertFalse(inferred.hasSpatialIndexes());
 
     // multiple input columns
@@ -463,7 +478,7 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
     Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
     Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
     Assert.assertTrue(inferred.hasMultipleValues().isTrue());
-    Assert.assertFalse(inferred.hasBitmapIndexes());
+    Assert.assertTrue(inferred.hasBitmapIndexes());
     Assert.assertFalse(inferred.hasSpatialIndexes());
 
     thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)");
@@ -599,7 +614,7 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
         )
     );
     Assert.assertFalse(
-        thePlan.is(
+        thePlan.any(
             ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
             ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
             ExpressionPlan.Trait.UNKNOWN_INPUTS,
@@ -671,7 +686,7 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
         )
     );
     Assert.assertFalse(
-        thePlan.is(
+        thePlan.any(
             ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
             ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
             ExpressionPlan.Trait.UNKNOWN_INPUTS,
@@ -719,7 +734,22 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
 
     // what about a multi-valued input
     thePlan = plan("array_to_string(array_append(scalar_string, 
multi_dictionary_string), ',')");
-    assertArrayInput(thePlan);
+    Assert.assertTrue(
+        thePlan.is(
+            ExpressionPlan.Trait.NON_SCALAR_INPUTS,
+            ExpressionPlan.Trait.NEEDS_APPLIED
+        )
+    );
+    Assert.assertFalse(
+        thePlan.any(
+            ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
+            ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
+            ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
+            ExpressionPlan.Trait.INCOMPLETE_INPUTS,
+            ExpressionPlan.Trait.UNKNOWN_INPUTS,
+            ExpressionPlan.Trait.VECTORIZABLE
+        )
+    );
 
     Assert.assertEquals(
         "array_to_string(map((\"multi_dictionary_string\") -> 
array_append(\"scalar_string\", \"multi_dictionary_string\"), 
\"multi_dictionary_string\"), ',')",
@@ -789,7 +819,7 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
         )
     );
     Assert.assertFalse(
-        thePlan.is(
+        thePlan.any(
             ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
             ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
             ExpressionPlan.Trait.UNKNOWN_INPUTS,
@@ -812,15 +842,14 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
   {
     ExpressionPlan thePlan = plan("json_object('long1', long1, 'long2', 
long2)");
     Assert.assertFalse(
-        thePlan.is(
+        thePlan.any(
             ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
             ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
             ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
             ExpressionPlan.Trait.UNKNOWN_INPUTS,
             ExpressionPlan.Trait.INCOMPLETE_INPUTS,
             ExpressionPlan.Trait.NEEDS_APPLIED,
-            ExpressionPlan.Trait.NON_SCALAR_INPUTS,
-            ExpressionPlan.Trait.VECTORIZABLE
+            ExpressionPlan.Trait.NON_SCALAR_INPUTS
         )
     );
     Assert.assertEquals(ExpressionType.NESTED_DATA, thePlan.getOutputType());
@@ -837,9 +866,40 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
     );
   }
 
+  @Test
+  public void testDictionaryComplexStringOutput()
+  {
+    ExpressionPlan thePlan = 
plan("dict_complex_to_string(dictionary_complex)");
+    Assert.assertFalse(
+        thePlan.any(
+            ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
+            ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
+            ExpressionPlan.Trait.UNKNOWN_INPUTS,
+            ExpressionPlan.Trait.INCOMPLETE_INPUTS,
+            ExpressionPlan.Trait.NEEDS_APPLIED,
+            ExpressionPlan.Trait.NON_SCALAR_INPUTS
+        )
+    );
+    Assert.assertTrue(
+        thePlan.is(
+            ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
+            ExpressionPlan.Trait.VECTORIZABLE
+        )
+    );
+    Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
+    ColumnCapabilities inferred = thePlan.inferColumnCapabilities(
+        ExpressionType.toColumnType(thePlan.getOutputType())
+    );
+    Assert.assertEquals(
+        ColumnType.STRING.getType(),
+        inferred.getType()
+    );
+    Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
+  }
+
   private static ExpressionPlan plan(String expression)
   {
-    return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR, 
Parser.parse(expression, TestExprMacroTable.INSTANCE));
+    return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR, 
Parser.parse(expression, MACRO_TABLE));
   }
 
   private static void assertArrayInput(ExpressionPlan thePlan)
@@ -850,7 +910,7 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
         )
     );
     Assert.assertFalse(
-        thePlan.is(
+        thePlan.any(
             ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
             ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
             ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
@@ -871,7 +931,7 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
         )
     );
     Assert.assertFalse(
-        thePlan.is(
+        thePlan.any(
             ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
             ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
             ExpressionPlan.Trait.INCOMPLETE_INPUTS,
@@ -881,4 +941,44 @@ public class ExpressionPlannerTest extends 
InitializedNullHandlingTest
         )
     );
   }
+
+  private static class TestMacroTable extends ExprMacroTable
+  {
+    public TestMacroTable()
+    {
+      super(
+          ImmutableList.<ExprMacroTable.ExprMacro>builder()
+                       .addAll(TestExprMacroTable.INSTANCE.getMacros())
+                       .add(new ExprMacroTable.ExprMacro()
+                       {
+                         @Override
+                         public Expr apply(List<Expr> args)
+                         {
+                           return new 
ExprMacroTable.BaseScalarMacroFunctionExpr(this, args)
+                           {
+                             @Override
+                             public ExprEval eval(ObjectBinding bindings)
+                             {
+                               throw DruidException.defensive("just for 
planner test");
+                             }
+
+                             @Nullable
+                             @Override
+                             public ExpressionType 
getOutputType(InputBindingInspector inspector)
+                             {
+                               return ExpressionType.STRING;
+                             }
+                           };
+                         }
+
+                         @Override
+                         public String name()
+                         {
+                           return "dict_complex_to_string";
+                         }
+                       })
+                       .build()
+      );
+    }
+  }
 }
diff --git 
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
 
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
index 451acb09661..16e22c01080 100644
--- 
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
+++ 
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
@@ -115,7 +115,7 @@ public class ExpressionVirtualColumnTest extends 
InitializedNullHandlingTest
       ImmutableMap.of(
           "x", 3L,
           "y", 4L,
-          "b", Arrays.asList(new String[]{"3", null, "5"})
+          "b", Arrays.asList("3", null, "5")
       )
   );
 
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index a68e4d67cf7..08c25a43d4f 100644
--- 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++ 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -7558,4 +7558,36 @@ public class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
                     .build()
     );
   }
+
+  @Test
+  public void testToJsonString()
+  {
+    testQuery(
+        "SELECT TO_JSON_STRING(nester) FROM druid.nested GROUP BY 1",
+        ImmutableList.of(
+            GroupByQuery.builder()
+                        .setDataSource(DATA_SOURCE)
+                        .setInterval(querySegmentSpec(Filtration.eternity()))
+                        .setGranularity(Granularities.ALL)
+                        .setDimensions(
+                            dimensions(
+                                new DefaultDimensionSpec("v0", "d0", 
ColumnType.STRING)
+                            )
+                        )
+                        .setVirtualColumns(
+                            expressionVirtualColumn("v0", 
"to_json_string(\"nester\")", ColumnType.STRING)
+                        )
+                        .setContext(QUERY_CONTEXT_DEFAULT)
+                        .build()
+        ),
+        ImmutableList.of(
+            new Object[]{NullHandling.defaultStringValue()},
+            new Object[]{"\"hello\""},
+            new Object[]{"2"},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}"},
+            new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}"}
+        ),
+        RowSignature.builder().add("EXPR$0", ColumnType.STRING).build()
+    );
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to