This is an automated email from the ASF dual-hosted git repository. vhs pushed a commit to branch release-1.0.2 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit b4dec4006f201e5b12f2990994791e339033f7da Author: Sagar Sumit <[email protected]> AuthorDate: Thu Apr 10 14:52:31 2025 +0530 [HUDI-9262] Skip building stats for decimal field with very high precision (#13097) * [HUDI-9262] Skip building stats for decimal field with very high precision * address comments, account for upscaling (cherry picked from commit 948f4d7d01febaa960a2f7610e6f67e61372a63c) --- .../hudi/metadata/HoodieTableMetadataUtil.java | 15 +++++++++++++++ .../hudi/metadata/TestHoodieTableMetadataUtil.java | 21 +++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index e574b8bc11c..27e7c727020 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -108,6 +108,7 @@ import org.apache.hudi.storage.StoragePathInfo; import org.apache.hudi.util.Lazy; import org.apache.avro.AvroTypeException; +import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; @@ -203,6 +204,11 @@ public class HoodieTableMetadataUtil { HoodieRecord.HoodieMetadataField.PARTITION_PATH_METADATA_FIELD.getFieldName(), HoodieRecord.HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.getFieldName())); + // The maximum allowed precision and scale as per the payload schema. See DecimalWrapper in HoodieMetadata.avsc: + // https://github.com/apache/hudi/blob/45dedd819e56e521148bde51a3dfa4e472ea70cd/hudi-common/src/main/avro/HoodieMetadata.avsc#L247 + private static final int DECIMAL_MAX_PRECISION = 30; + private static final int DECIMAL_MAX_SCALE = 15; + private HoodieTableMetadataUtil() { } @@ -1908,6 +1914,15 @@ public class HoodieTableMetadataUtil { public static boolean isColumnTypeSupported(Schema schema, Option<HoodieRecordType> recordType) { Schema schemaToCheck = resolveNullableSchema(schema); + // Check for precision and scale if the schema has a logical decimal type. + LogicalType logicalType = schemaToCheck.getLogicalType(); + if (logicalType != null && logicalType instanceof LogicalTypes.Decimal) { + LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType; + if (decimalType.getPrecision() + (DECIMAL_MAX_SCALE - decimalType.getScale()) > DECIMAL_MAX_PRECISION || decimalType.getScale() > DECIMAL_MAX_SCALE) { + return false; + } + } + // if record type is set and if its AVRO, MAP, ARRAY, RECORD and ENUM types are unsupported. if (recordType.isPresent() && recordType.get() == HoodieRecordType.AVRO) { return (schemaToCheck.getType() != Schema.Type.RECORD && schemaToCheck.getType() != Schema.Type.ARRAY && schemaToCheck.getType() != Schema.Type.MAP diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java index 7947c0571c1..bcf67bfb9e3 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java @@ -652,6 +652,27 @@ public class TestHoodieTableMetadataUtil extends HoodieCommonTestHarness { .name("dateField").type(dateFieldSchema).noDefault() .endRecord(); assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(schema.getField("dateField").schema(), Option.empty())); + + // Test for logical decimal type with allowed precision and scale + schema = Schema.create(Schema.Type.BYTES); + LogicalTypes.Decimal decimalType = LogicalTypes.decimal(30, 15); + decimalType.addToSchema(schema); + // Expect the column to be supported. + assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(schema, Option.of(HoodieRecord.HoodieRecordType.AVRO))); + + // Test for logical decimal type with precision and scale exceeding the limit + schema = Schema.create(Schema.Type.BYTES); + decimalType = LogicalTypes.decimal(35, 20); + decimalType.addToSchema(schema); + // Expect the column to be unsupported. + assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(schema, Option.of(HoodieRecord.HoodieRecordType.AVRO))); + + // Test for logical decimal type with precision exceeding limit after upscaling + schema = Schema.create(Schema.Type.BYTES); + decimalType = LogicalTypes.decimal(28, 10); + decimalType.addToSchema(schema); + // Expect the column to be unsupported. + assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(schema, Option.of(HoodieRecord.HoodieRecordType.AVRO))); } @Test
