voonhous commented on code in PR #14311:
URL: https://github.com/apache/hudi/pull/14311#discussion_r2579950400
##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1869,62 +1889,70 @@ public static BigDecimal tryUpcastDecimal(BigDecimal
value, final LogicalTypes.D
return value;
}
- public static Option<Schema> tryResolveSchemaForTable(HoodieTableMetaClient
dataTableMetaClient) {
+ public static Option<HoodieSchema>
tryResolveSchemaForTable(HoodieTableMetaClient dataTableMetaClient) {
if
(dataTableMetaClient.getCommitsTimeline().filterCompletedInstants().countInstants()
== 0) {
return Option.empty();
}
try {
TableSchemaResolver schemaResolver = new
TableSchemaResolver(dataTableMetaClient);
- return Option.of(schemaResolver.getTableAvroSchema());
+ return
Option.of(HoodieSchema.fromAvroSchema(schemaResolver.getTableAvroSchema()));
} catch (Exception e) {
throw new HoodieException("Failed to get latest columns for " +
dataTableMetaClient.getBasePath(), e);
}
}
/**
- * Given a schema, coerces provided value to instance of {@link
Comparable<?>} such that
- * it could subsequently be used in column stats
+ * Given a HoodieSchema, coerces provided value to instance of {@link
Comparable<?>} such that it could subsequently be used in column stats. This
method uses HoodieSchema for in-memory processing
+ * while maintaining compatibility with existing Avro-based serialization.
+ * <p>
+ * NOTE: This method has to stay compatible with the semantic of {@link
FileFormatUtils#readColumnStatsFromMetadata} as they are used in tandem
*
- * NOTE: This method has to stay compatible with the semantic of
- * {@link FileFormatUtils#readColumnStatsFromMetadata} as they are used
in tandem
+ * @param schema the HoodieSchema to use for type coercion
+ * @param val the value to coerce
+ * @return the coerced value as a Comparable
+ * @since 1.2.0
*/
- public static Comparable<?> coerceToComparable(Schema schema, Object val) {
+ public static Comparable<?> coerceToComparable(HoodieSchema schema, Object
val) {
if (val == null) {
return null;
}
- switch (schema.getType()) {
+ HoodieSchemaType schemaType = schema.getType();
+
+ switch (schemaType) {
case UNION:
// TODO we need to handle unions in general case as well
- return coerceToComparable(getNonNullTypeFromUnion(schema), val);
+ return coerceToComparable(schema.getNonNullType(), val);
case FIXED:
case BYTES:
- if (schema.getLogicalType() instanceof LogicalTypes.Decimal) {
- return (Comparable<?>) val;
- }
return (ByteBuffer) val;
-
-
+ case DECIMAL:
+ return (Comparable<?>) val;
case INT:
- if (schema.getLogicalType() == LogicalTypes.date()
- || schema.getLogicalType() == LogicalTypes.timeMillis()) {
- // NOTE: This type will be either {@code java.sql.Date} or
{org.joda.LocalDate}
- // depending on the Avro version. Hence, we simply cast it to
{@code Comparable<?>}
+ return castToInteger(val);
+ case DATE:
+ // NOTE: This type will be either {@code java.sql.Date} or
{org.joda.LocalDate}
+ // depending on the Avro version. Hence, we simply cast it to
{@code Comparable<?>}
+ return (Comparable<?>) val;
+ case TIME:
+ HoodieSchema.Time timeSchema = (HoodieSchema.Time) schema;
+ TimePrecision precision = timeSchema.getPrecision();
+ if (precision.equals(TimePrecision.MILLIS) ||
precision.equals(TimePrecision.MICROS)) {
return (Comparable<?>) val;
}
return castToInteger(val);
Review Comment:
Make sense, since time is either millis or micros. I was overthinking
this...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]