voonhous commented on code in PR #17599:
URL: https://github.com/apache/hudi/pull/17599#discussion_r2652123521
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java:
##########
@@ -150,50 +153,41 @@ static boolean
validateDataTypeForSecondaryIndex(List<String> sourceFields, Sche
* @param tableSchema table schema
* @return true if each field's data types are supported, false otherwise
*/
- public static boolean
validateDataTypeForSecondaryOrExpressionIndex(List<String> sourceFields, Schema
tableSchema) {
+ public static boolean
validateDataTypeForSecondaryOrExpressionIndex(List<String> sourceFields,
HoodieSchema tableSchema) {
return sourceFields.stream().anyMatch(fieldToIndex -> {
- Schema schema = getNestedFieldSchemaFromWriteSchema(tableSchema,
fieldToIndex);
- return schema.getType() != Schema.Type.RECORD && schema.getType() !=
Schema.Type.ARRAY && schema.getType() != Schema.Type.MAP;
+ Option<Pair<String, HoodieSchemaField>> nestedFieldOpt =
HoodieSchemaUtils.getNestedField(tableSchema, fieldToIndex);
+ if (nestedFieldOpt.isEmpty()) {
+ throw new HoodieException("Failed to get schema. Not a valid field
name: " + fieldToIndex);
+ }
+ HoodieSchema fieldSchema = nestedFieldOpt.get().getRight().schema();
+ return fieldSchema.getType() != HoodieSchemaType.RECORD &&
fieldSchema.getType() != HoodieSchemaType.ARRAY && fieldSchema.getType() !=
HoodieSchemaType.MAP;
});
}
/**
* Check if the given schema type is supported for secondary index.
* Supported types are: String (including CHAR), Integer types (Int, BigInt,
Long, Short), and timestamp
*/
- private static boolean isSecondaryIndexSupportedType(Schema schema) {
+ private static boolean isSecondaryIndexSupportedType(HoodieSchema schema) {
// Handle union types (nullable fields)
- if (schema.getType() == Schema.Type.UNION) {
+ if (schema.getType() == HoodieSchemaType.UNION) {
// For union types, check if any of the types is supported
return schema.getTypes().stream()
- .anyMatch(s -> s.getType() != Schema.Type.NULL &&
isSecondaryIndexSupportedType(s));
+ .anyMatch(s -> s.getType() != HoodieSchemaType.NULL &&
isSecondaryIndexSupportedType(s));
}
// Check basic types
switch (schema.getType()) {
case STRING:
- // STRING type can have UUID logical type which we don't support
- return schema.getLogicalType() == null; // UUID and other string-based
logical types are not supported
- // Regular STRING (includes CHAR)
case INT:
- // INT type can represent regular integers or dates/times with logical
types
- if (schema.getLogicalType() != null) {
- // Support date and time-millis logical types
- return schema.getLogicalType() == LogicalTypes.date()
- || schema.getLogicalType() == LogicalTypes.timeMillis();
- }
- return true; // Regular INT
case LONG:
- // LONG type can represent regular longs or timestamps with logical
types
- if (schema.getLogicalType() != null) {
- // Support timestamp logical types
- return schema.getLogicalType() == LogicalTypes.timestampMillis()
- || schema.getLogicalType() == LogicalTypes.timestampMicros()
- || schema.getLogicalType() == LogicalTypes.timeMicros();
- }
- return true; // Regular LONG
case DOUBLE:
- return true; // Support DOUBLE type
+ case DATE:
Review Comment:
Let me verify this, the comment is a little confusing. It says: `Float and
Double are now supported`, but the test itself for **Float** is a test for
`assertThrows` but for **Double**, it's a `assertDoesNotThrow`. Might need to
check separately with @linliu-code what's going on here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]