clintropolis commented on code in PR #13803:
URL: https://github.com/apache/druid/pull/13803#discussion_r1146902269
##########
processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexer.java:
##########
@@ -437,41 +471,51 @@ public Class<?> classOfObject()
this.typeSet = new NestedLiteralTypeInfo.MutableTypeSet();
}
- private StructuredDataProcessor.ProcessedLiteral<?> processValue(@Nullable
Object value)
+ private StructuredDataProcessor.ProcessedLiteral<?>
processValue(ExprEval<?> eval)
{
- // null value is always added to the global dictionary as id 0, so we
can ignore them here
- if (value != null) {
- // why not
- ExprEval<?> eval = ExprEval.bestEffortOf(value);
- final ColumnType columnType = ExpressionType.toColumnType(eval.type());
-
- switch (columnType.getType()) {
- case LONG:
- globalDimensionDictionary.addLongValue(eval.asLong());
- typeSet.add(ColumnType.LONG);
- return new StructuredDataProcessor.ProcessedLiteral<>(
- eval.asLong(),
- StructuredDataProcessor.getLongObjectEstimateSize()
- );
- case DOUBLE:
- globalDimensionDictionary.addDoubleValue(eval.asDouble());
- typeSet.add(ColumnType.DOUBLE);
- return new StructuredDataProcessor.ProcessedLiteral<>(
- eval.asDouble(),
- StructuredDataProcessor.getDoubleObjectEstimateSize()
- );
- case STRING:
- default:
- final String asString = eval.asString();
- globalDimensionDictionary.addStringValue(asString);
- typeSet.add(ColumnType.STRING);
- return new StructuredDataProcessor.ProcessedLiteral<>(
- eval.asString(),
- StructuredDataProcessor.estimateStringSize(asString)
- );
- }
+ final ColumnType columnType = ExpressionType.toColumnType(eval.type());
+ int sizeEstimate;
+ switch (columnType.getType()) {
+ case LONG:
+ typeSet.add(ColumnType.LONG);
+ sizeEstimate = globalDimensionDictionary.addLongValue(eval.asLong());
+ return new StructuredDataProcessor.ProcessedLiteral<>(eval.asLong(),
sizeEstimate);
+ case DOUBLE:
+ typeSet.add(ColumnType.DOUBLE);
+ sizeEstimate =
globalDimensionDictionary.addDoubleValue(eval.asDouble());
+ return new
StructuredDataProcessor.ProcessedLiteral<>(eval.asDouble(), sizeEstimate);
+ case ARRAY:
+ // skip empty arrays for now, they will always be called 'string'
arrays, which isn't very helpful here since
+ // it will pollute the type set
Review Comment:
it was missing some code to do that, updated and added tests for nulls,
empties, and arrays of nulls
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]