This is an automated email from the ASF dual-hosted git repository.

vhs pushed a commit to branch release-1.0.2
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit b4dec4006f201e5b12f2990994791e339033f7da
Author: Sagar Sumit <[email protected]>
AuthorDate: Thu Apr 10 14:52:31 2025 +0530

    [HUDI-9262] Skip building stats for decimal field with very high precision 
(#13097)
    
    * [HUDI-9262] Skip building stats for decimal field with very high precision
    
    * address comments, account for upscaling
    
    (cherry picked from commit 948f4d7d01febaa960a2f7610e6f67e61372a63c)
---
 .../hudi/metadata/HoodieTableMetadataUtil.java      | 15 +++++++++++++++
 .../hudi/metadata/TestHoodieTableMetadataUtil.java  | 21 +++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index e574b8bc11c..27e7c727020 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -108,6 +108,7 @@ import org.apache.hudi.storage.StoragePathInfo;
 import org.apache.hudi.util.Lazy;
 
 import org.apache.avro.AvroTypeException;
+import org.apache.avro.LogicalType;
 import org.apache.avro.LogicalTypes;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
@@ -203,6 +204,11 @@ public class HoodieTableMetadataUtil {
       
HoodieRecord.HoodieMetadataField.PARTITION_PATH_METADATA_FIELD.getFieldName(),
       
HoodieRecord.HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.getFieldName()));
 
+  // The maximum allowed precision and scale as per the payload schema. See 
DecimalWrapper in HoodieMetadata.avsc:
+  // 
https://github.com/apache/hudi/blob/45dedd819e56e521148bde51a3dfa4e472ea70cd/hudi-common/src/main/avro/HoodieMetadata.avsc#L247
+  private static final int DECIMAL_MAX_PRECISION = 30;
+  private static final int DECIMAL_MAX_SCALE = 15;
+
   private HoodieTableMetadataUtil() {
   }
 
@@ -1908,6 +1914,15 @@ public class HoodieTableMetadataUtil {
 
   public static boolean isColumnTypeSupported(Schema schema, 
Option<HoodieRecordType> recordType) {
     Schema schemaToCheck = resolveNullableSchema(schema);
+    // Check for precision and scale if the schema has a logical decimal type.
+    LogicalType logicalType = schemaToCheck.getLogicalType();
+    if (logicalType != null && logicalType instanceof LogicalTypes.Decimal) {
+      LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
+      if (decimalType.getPrecision() + (DECIMAL_MAX_SCALE - 
decimalType.getScale()) > DECIMAL_MAX_PRECISION || decimalType.getScale() > 
DECIMAL_MAX_SCALE) {
+        return false;
+      }
+    }
+
     // if record type is set and if its AVRO, MAP, ARRAY, RECORD and ENUM 
types are unsupported.
     if (recordType.isPresent() && recordType.get() == HoodieRecordType.AVRO) {
       return (schemaToCheck.getType() != Schema.Type.RECORD && 
schemaToCheck.getType() != Schema.Type.ARRAY && schemaToCheck.getType() != 
Schema.Type.MAP
diff --git 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
index 7947c0571c1..bcf67bfb9e3 100644
--- 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
+++ 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java
@@ -652,6 +652,27 @@ public class TestHoodieTableMetadataUtil extends 
HoodieCommonTestHarness {
         .name("dateField").type(dateFieldSchema).noDefault()
         .endRecord();
     
assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(schema.getField("dateField").schema(),
 Option.empty()));
+
+    // Test for logical decimal type with allowed precision and scale
+    schema = Schema.create(Schema.Type.BYTES);
+    LogicalTypes.Decimal decimalType = LogicalTypes.decimal(30, 15);
+    decimalType.addToSchema(schema);
+    // Expect the column to be supported.
+    assertTrue(HoodieTableMetadataUtil.isColumnTypeSupported(schema, 
Option.of(HoodieRecord.HoodieRecordType.AVRO)));
+
+    // Test for logical decimal type with precision and scale exceeding the 
limit
+    schema = Schema.create(Schema.Type.BYTES);
+    decimalType = LogicalTypes.decimal(35, 20);
+    decimalType.addToSchema(schema);
+    // Expect the column to be unsupported.
+    assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(schema, 
Option.of(HoodieRecord.HoodieRecordType.AVRO)));
+
+    // Test for logical decimal type with precision exceeding limit after 
upscaling
+    schema = Schema.create(Schema.Type.BYTES);
+    decimalType = LogicalTypes.decimal(28, 10);
+    decimalType.addToSchema(schema);
+    // Expect the column to be unsupported.
+    assertFalse(HoodieTableMetadataUtil.isColumnTypeSupported(schema, 
Option.of(HoodieRecord.HoodieRecordType.AVRO)));
   }
 
   @Test

Reply via email to