This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 3fb6ba22e85 fix expression column capabilities to not report
dictionary encoded unless input is string (#16577)
3fb6ba22e85 is described below
commit 3fb6ba22e85b82a654cbfb1b52b5ae7fed7b95e0
Author: Clint Wylie <[email protected]>
AuthorDate: Sat Jun 8 13:05:19 2024 -0700
fix expression column capabilities to not report dictionary encoded unless
input is string (#16577)
---
.../serde/NestedCommonFormatColumnPartSerde.java | 4 -
.../druid/segment/virtual/ExpressionPlan.java | 16 ++-
.../segment/virtual/ExpressionPlannerTest.java | 126 ++++++++++++++++++---
.../virtual/ExpressionVirtualColumnTest.java | 2 +-
.../sql/calcite/CalciteNestedDataQueryTest.java | 32 ++++++
5 files changed, 157 insertions(+), 23 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java
b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java
index 7a0ea743784..6bf897cd6b2 100644
---
a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java
+++
b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java
@@ -303,10 +303,6 @@ public class NestedCommonFormatColumnPartSerde implements
ColumnPartSerde
bitmapSerdeFactory,
byteOrder
);
- ColumnCapabilitiesImpl capabilitiesBuilder =
builder.getCapabilitiesBuilder();
- capabilitiesBuilder.setDictionaryEncoded(true);
- capabilitiesBuilder.setDictionaryValuesSorted(true);
- capabilitiesBuilder.setDictionaryValuesUnique(true);
ColumnType simpleType = supplier.getLogicalType();
ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA :
simpleType;
builder.setType(logicalType);
diff --git
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
index fecfe142069..71af403fa76 100644
---
a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
+++
b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionPlan.java
@@ -274,12 +274,18 @@ public class ExpressionPlan
// since we don't know if the expression is 1:1 or if it retains
ordering we can only piggy back only
// report as dictionary encoded, but it still allows us to use
algorithms which work with dictionaryIds
// to create a dictionary encoded selector instead of an object
selector to defer expression evaluation
- // until query time
- return ColumnCapabilitiesImpl.copyOf(underlyingCapabilities)
+ // until query time. However, currently dictionary encodedness is
tied to string selectors and sad stuff
+ // happens if the input type isn't string, so we also limit this
to string input types
+ final boolean useDictionary =
underlyingCapabilities.isDictionaryEncoded().isTrue() &&
+
underlyingCapabilities.is(ValueType.STRING);
+ return ColumnCapabilitiesImpl.createDefault()
.setType(ColumnType.STRING)
- .setDictionaryValuesSorted(false)
- .setDictionaryValuesUnique(false)
- .setHasBitmapIndexes(false)
+ .setDictionaryEncoded(useDictionary)
+
.setHasBitmapIndexes(underlyingCapabilities.hasBitmapIndexes())
+
.setHasMultipleValues(underlyingCapabilities.hasMultipleValues())
+
.setHasSpatialIndexes(underlyingCapabilities.hasSpatialIndexes())
+ // we aren't sure if the expression
might produce null values or not, so always
+ // set hasNulls to true
.setHasNulls(true);
}
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java
index c40c4a9bacd..9b4d1b84af2 100644
---
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java
@@ -19,8 +19,13 @@
package org.apache.druid.segment.virtual;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.query.expression.TestExprMacroTable;
@@ -36,11 +41,13 @@ import org.junit.Test;
import org.junit.rules.ExpectedException;
import javax.annotation.Nullable;
+import java.util.List;
import java.util.Map;
public class ExpressionPlannerTest extends InitializedNullHandlingTest
{
- public static final ColumnInspector SYNTHETIC_INSPECTOR = new
ColumnInspector()
+ private static ColumnType DICTIONARY_COMPLEX =
ColumnType.ofComplex("dictionaryComplex");
+ private static final ColumnInspector SYNTHETIC_INSPECTOR = new
ColumnInspector()
{
private final Map<String, ColumnCapabilities> capabilitiesMap =
ImmutableMap.<String, ColumnCapabilities>builder()
@@ -141,6 +148,12 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
"double_array_2",
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ColumnType.DOUBLE_ARRAY)
)
+ .put(
+ "dictionary_complex",
+ ColumnCapabilitiesImpl.createDefault()
+ .setDictionaryEncoded(true)
+ .setType(DICTIONARY_COMPLEX)
+ )
.build();
@Nullable
@@ -151,6 +164,8 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
}
};
+ private static final TestMacroTable MACRO_TABLE = new TestMacroTable();
+
@Rule
public ExpectedException expectedException = ExpectedException.none();
@@ -369,7 +384,7 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
- Assert.assertFalse(inferred.hasBitmapIndexes());
+ Assert.assertTrue(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
// multiple input columns
@@ -463,7 +478,7 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
- Assert.assertFalse(inferred.hasBitmapIndexes());
+ Assert.assertTrue(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());
thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)");
@@ -599,7 +614,7 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
)
);
Assert.assertFalse(
- thePlan.is(
+ thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
@@ -671,7 +686,7 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
)
);
Assert.assertFalse(
- thePlan.is(
+ thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
@@ -719,7 +734,22 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
// what about a multi-valued input
thePlan = plan("array_to_string(array_append(scalar_string,
multi_dictionary_string), ',')");
- assertArrayInput(thePlan);
+ Assert.assertTrue(
+ thePlan.is(
+ ExpressionPlan.Trait.NON_SCALAR_INPUTS,
+ ExpressionPlan.Trait.NEEDS_APPLIED
+ )
+ );
+ Assert.assertFalse(
+ thePlan.any(
+ ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
+ ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
+ ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
+ ExpressionPlan.Trait.INCOMPLETE_INPUTS,
+ ExpressionPlan.Trait.UNKNOWN_INPUTS,
+ ExpressionPlan.Trait.VECTORIZABLE
+ )
+ );
Assert.assertEquals(
"array_to_string(map((\"multi_dictionary_string\") ->
array_append(\"scalar_string\", \"multi_dictionary_string\"),
\"multi_dictionary_string\"), ',')",
@@ -789,7 +819,7 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
)
);
Assert.assertFalse(
- thePlan.is(
+ thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
@@ -812,15 +842,14 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
{
ExpressionPlan thePlan = plan("json_object('long1', long1, 'long2',
long2)");
Assert.assertFalse(
- thePlan.is(
+ thePlan.any(
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
- ExpressionPlan.Trait.NON_SCALAR_INPUTS,
- ExpressionPlan.Trait.VECTORIZABLE
+ ExpressionPlan.Trait.NON_SCALAR_INPUTS
)
);
Assert.assertEquals(ExpressionType.NESTED_DATA, thePlan.getOutputType());
@@ -837,9 +866,40 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
);
}
+ @Test
+ public void testDictionaryComplexStringOutput()
+ {
+ ExpressionPlan thePlan =
plan("dict_complex_to_string(dictionary_complex)");
+ Assert.assertFalse(
+ thePlan.any(
+ ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
+ ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
+ ExpressionPlan.Trait.UNKNOWN_INPUTS,
+ ExpressionPlan.Trait.INCOMPLETE_INPUTS,
+ ExpressionPlan.Trait.NEEDS_APPLIED,
+ ExpressionPlan.Trait.NON_SCALAR_INPUTS
+ )
+ );
+ Assert.assertTrue(
+ thePlan.is(
+ ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
+ ExpressionPlan.Trait.VECTORIZABLE
+ )
+ );
+ Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
+ ColumnCapabilities inferred = thePlan.inferColumnCapabilities(
+ ExpressionType.toColumnType(thePlan.getOutputType())
+ );
+ Assert.assertEquals(
+ ColumnType.STRING.getType(),
+ inferred.getType()
+ );
+ Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
+ }
+
private static ExpressionPlan plan(String expression)
{
- return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR,
Parser.parse(expression, TestExprMacroTable.INSTANCE));
+ return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR,
Parser.parse(expression, MACRO_TABLE));
}
private static void assertArrayInput(ExpressionPlan thePlan)
@@ -850,7 +910,7 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
)
);
Assert.assertFalse(
- thePlan.is(
+ thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
@@ -871,7 +931,7 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
)
);
Assert.assertFalse(
- thePlan.is(
+ thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
@@ -881,4 +941,44 @@ public class ExpressionPlannerTest extends
InitializedNullHandlingTest
)
);
}
+
+ private static class TestMacroTable extends ExprMacroTable
+ {
+ public TestMacroTable()
+ {
+ super(
+ ImmutableList.<ExprMacroTable.ExprMacro>builder()
+ .addAll(TestExprMacroTable.INSTANCE.getMacros())
+ .add(new ExprMacroTable.ExprMacro()
+ {
+ @Override
+ public Expr apply(List<Expr> args)
+ {
+ return new
ExprMacroTable.BaseScalarMacroFunctionExpr(this, args)
+ {
+ @Override
+ public ExprEval eval(ObjectBinding bindings)
+ {
+ throw DruidException.defensive("just for
planner test");
+ }
+
+ @Nullable
+ @Override
+ public ExpressionType
getOutputType(InputBindingInspector inspector)
+ {
+ return ExpressionType.STRING;
+ }
+ };
+ }
+
+ @Override
+ public String name()
+ {
+ return "dict_complex_to_string";
+ }
+ })
+ .build()
+ );
+ }
+ }
}
diff --git
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
index 451acb09661..16e22c01080 100644
---
a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java
@@ -115,7 +115,7 @@ public class ExpressionVirtualColumnTest extends
InitializedNullHandlingTest
ImmutableMap.of(
"x", 3L,
"y", 4L,
- "b", Arrays.asList(new String[]{"3", null, "5"})
+ "b", Arrays.asList("3", null, "5")
)
);
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index a68e4d67cf7..08c25a43d4f 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -7558,4 +7558,36 @@ public class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
.build()
);
}
+
+ @Test
+ public void testToJsonString()
+ {
+ testQuery(
+ "SELECT TO_JSON_STRING(nester) FROM druid.nested GROUP BY 1",
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(DATA_SOURCE)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("v0", "d0",
ColumnType.STRING)
+ )
+ )
+ .setVirtualColumns(
+ expressionVirtualColumn("v0",
"to_json_string(\"nester\")", ColumnType.STRING)
+ )
+ .setContext(QUERY_CONTEXT_DEFAULT)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{NullHandling.defaultStringValue()},
+ new Object[]{"\"hello\""},
+ new Object[]{"2"},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}"},
+ new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}"}
+ ),
+ RowSignature.builder().add("EXPR$0", ColumnType.STRING).build()
+ );
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]