abhishekagarwal87 commented on code in PR #15417:
URL: https://github.com/apache/druid/pull/15417#discussion_r1407443280
##########
processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java:
##########
@@ -133,6 +150,51 @@ public EncodedKeyComponent<StructuredData>
processRowValsToUnsortedEncodedKeyCom
} else if (isConstant) {
isConstant = Objects.equals(dimValues, constantValue);
}
+
+ if (castToExpressionType != null) {
+ return processCast(dimValues);
+ } else {
+ return processAuto(dimValues);
+ }
+ }
+
+ /**
+ * Process values which will all be cast to {@link #castToExpressionType}.
This method should not be used for
+ * and does not handle actual nested data structures, use {@link
#processAuto(Object)} instead.
+ */
+ private EncodedKeyComponent<StructuredData> processCast(@Nullable Object
dimValues)
+ {
+ final long oldDictSizeInBytes = globalDictionary.sizeInBytes();
+ final int oldFieldKeySize = estimatedFieldKeySize;
+ ExprEval<?> eval = ExprEval.bestEffortOf(dimValues);
+ try {
+ eval = eval.castTo(castToExpressionType);
+ FieldIndexer fieldIndexer =
fieldIndexers.get(NestedPathFinder.JSON_PATH_ROOT);
+ if (fieldIndexer == null) {
+ estimatedFieldKeySize +=
StructuredDataProcessor.estimateStringSize(NestedPathFinder.JSON_PATH_ROOT);
+ fieldIndexer = new FieldIndexer(globalDictionary);
+ fieldIndexers.put(NestedPathFinder.JSON_PATH_ROOT, fieldIndexer);
+ }
+ StructuredDataProcessor.ProcessedValue<?> rootValue =
fieldIndexer.processValue(eval);
+ long effectiveSizeBytes = rootValue.getSize();
+ // then, we add the delta of size change to the global dictionaries to
account for any new space added by the
+ // 'raw' data
+ effectiveSizeBytes += (globalDictionary.sizeInBytes() -
oldDictSizeInBytes);
+ effectiveSizeBytes += (estimatedFieldKeySize - oldFieldKeySize);
+ return new EncodedKeyComponent<>(StructuredData.wrap(eval.value()),
effectiveSizeBytes);
+ }
+ catch (IAE invalidCast) {
+ throw new ParseException(eval.asString(), invalidCast, "Cannot coerce to
requested type [%s]", castToType);
Review Comment:
can we include the field / column name here?
##########
extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/DimensionSchemaUtils.java:
##########
@@ -73,7 +73,7 @@ public static DimensionSchema createDimensionSchema(
.getDimensionSchema(capabilities);
}
- return new AutoTypeColumnSchema(column);
+ return new AutoTypeColumnSchema(column, type != null &&
type.is(ValueType.COMPLEX) ? type : null);
Review Comment:
since you can't cast to complex types.
##########
extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/DimensionSchemaUtils.java:
##########
@@ -73,7 +73,7 @@ public static DimensionSchema createDimensionSchema(
.getDimensionSchema(capabilities);
}
- return new AutoTypeColumnSchema(column);
+ return new AutoTypeColumnSchema(column, type != null &&
type.is(ValueType.COMPLEX) ? type : null);
Review Comment:
shouldn't this be
type != null && !type.is(ValueType.COMPLEX)
##########
processing/src/main/java/org/apache/druid/segment/AutoTypeColumnMerger.java:
##########
@@ -148,20 +152,26 @@ public void
writeMergedValueDictionary(List<IndexableAdapter> adapters) throws I
final FieldTypeInfo.MutableTypeSet rootTypes =
mergedFields.get(NestedPathFinder.JSON_PATH_ROOT);
final boolean rootOnly = mergedFields.size() == 1 && rootTypes != null;
+ final ColumnType explicitType;
+ if (castToType != null && (castToType.isPrimitive() ||
castToType.isPrimitiveArray())) {
Review Comment:
do you actually need the `(castToType.isPrimitive() ||
castToType.isPrimitiveArray())` check here or is it just defensive coding?
##########
extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ExternalColumnSelectorFactory.java:
##########
@@ -107,7 +107,7 @@ public Object getObject()
if (expressionType == null) {
return delegateDimensionSelector.getObject();
}
- return ExprEval.ofType(expressionType,
delegateDimensionSelector.getObject()).value();
+ return
ExprEval.bestEffortOf(delegateDimensionSelector.getObject()).castTo(expressionType).value();
Review Comment:
whats the reason behind this change?
##########
processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java:
##########
@@ -133,6 +150,51 @@ public EncodedKeyComponent<StructuredData>
processRowValsToUnsortedEncodedKeyCom
} else if (isConstant) {
isConstant = Objects.equals(dimValues, constantValue);
}
+
+ if (castToExpressionType != null) {
+ return processCast(dimValues);
+ } else {
+ return processAuto(dimValues);
+ }
+ }
+
+ /**
+ * Process values which will all be cast to {@link #castToExpressionType}.
This method should not be used for
+ * and does not handle actual nested data structures, use {@link
#processAuto(Object)} instead.
+ */
+ private EncodedKeyComponent<StructuredData> processCast(@Nullable Object
dimValues)
+ {
+ final long oldDictSizeInBytes = globalDictionary.sizeInBytes();
+ final int oldFieldKeySize = estimatedFieldKeySize;
+ ExprEval<?> eval = ExprEval.bestEffortOf(dimValues);
+ try {
+ eval = eval.castTo(castToExpressionType);
+ FieldIndexer fieldIndexer =
fieldIndexers.get(NestedPathFinder.JSON_PATH_ROOT);
+ if (fieldIndexer == null) {
+ estimatedFieldKeySize +=
StructuredDataProcessor.estimateStringSize(NestedPathFinder.JSON_PATH_ROOT);
+ fieldIndexer = new FieldIndexer(globalDictionary);
+ fieldIndexers.put(NestedPathFinder.JSON_PATH_ROOT, fieldIndexer);
+ }
+ StructuredDataProcessor.ProcessedValue<?> rootValue =
fieldIndexer.processValue(eval);
+ long effectiveSizeBytes = rootValue.getSize();
+ // then, we add the delta of size change to the global dictionaries to
account for any new space added by the
+ // 'raw' data
+ effectiveSizeBytes += (globalDictionary.sizeInBytes() -
oldDictSizeInBytes);
+ effectiveSizeBytes += (estimatedFieldKeySize - oldFieldKeySize);
+ return new EncodedKeyComponent<>(StructuredData.wrap(eval.value()),
effectiveSizeBytes);
+ }
+ catch (IAE invalidCast) {
Review Comment:
we should leave some trace of this exception in the logs for
troubleshooting. You also have a wide net over which you are catching this
exception as opposed to just catching it over the `castTo` call. It's hard to
tell if some other lines of code throw an unrelated IAE too.
##########
processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java:
##########
@@ -584,7 +584,7 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow
row)
wasNewDim = true;
final DimensionHandler<?, ?, ?> handler;
if (useSchemaDiscovery) {
- handler = new NestedCommonFormatColumnHandler(dimension);
+ handler = new NestedCommonFormatColumnHandler(dimension, null);
Review Comment:
do we not need to pass the desired type here?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]