This is an automated email from the ASF dual-hosted git repository.

timbrown pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git


The following commit(s) were added to refs/heads/main by this push:
     new 8c143a7e scale BigDecimal col stats correctly
8c143a7e is described below

commit 8c143a7e839bb0b14e4a21cff654be777e6353c7
Author: Timothy Brown <[email protected]>
AuthorDate: Tue Jan 7 09:22:17 2025 -0600

    scale BigDecimal col stats correctly
---
 .../apache/xtable/delta/DeltaValueConverter.java   |  30 +++---
 .../apache/xtable/hudi/HudiFileStatsExtractor.java |  15 ++-
 .../xtable/delta/TestDeltaValueConverter.java      | 110 +++++++++++++++++++++
 .../xtable/hudi/TestHudiFileStatsExtractor.java    |   8 +-
 .../apache/xtable/testutil/ColumnStatMapUtil.java  |  15 ++-
 5 files changed, 148 insertions(+), 30 deletions(-)

diff --git 
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java 
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
index 6f837fe9..5eea1faa 100644
--- a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
+++ b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
@@ -19,7 +19,8 @@
 package org.apache.xtable.delta;
 
 import java.math.BigDecimal;
-import java.math.BigInteger;
+import java.math.MathContext;
+import java.math.RoundingMode;
 import java.nio.charset.StandardCharsets;
 import java.sql.Date;
 import java.text.DateFormat;
@@ -63,7 +64,7 @@ public class DeltaValueConverter {
       return null;
     }
     if (noConversionForSchema(fieldSchema)) {
-      return castObjectToInternalType(value, fieldSchema.getDataType());
+      return castObjectToInternalType(value, fieldSchema);
     }
     // Needs special handling for date and time.
     InternalType fieldType = fieldSchema.getDataType();
@@ -198,7 +199,8 @@ public class DeltaValueConverter {
     }
   }
 
-  private static Object castObjectToInternalType(Object value, InternalType 
valueType) {
+  private static Object castObjectToInternalType(Object value, InternalSchema 
schema) {
+    InternalType valueType = schema.getDataType();
     switch (valueType) {
       case DOUBLE:
         if (value instanceof String)
@@ -232,7 +234,7 @@ public class DeltaValueConverter {
         }
         break;
       case DECIMAL:
-        return numberTypeToBigDecimal(value);
+        return numberTypeToBigDecimal(value, schema);
       case LONG:
         if (value instanceof Integer) {
           return ((Integer) value).longValue();
@@ -242,18 +244,12 @@ public class DeltaValueConverter {
     return value;
   }
 
-  private static BigDecimal numberTypeToBigDecimal(Object value) {
-    // BigDecimal is parsed as Integer, Long, BigInteger and double if none of 
the above.
-    if (value instanceof Integer) {
-      return BigDecimal.valueOf((Integer) value);
-    } else if (value instanceof Long) {
-      return BigDecimal.valueOf((Long) value);
-    } else if (value instanceof BigInteger) {
-      return new BigDecimal((BigInteger) value);
-    } else if (value instanceof Double) {
-      return BigDecimal.valueOf((Double) value);
-    } else {
-      return (BigDecimal) value;
-    }
+  private static BigDecimal numberTypeToBigDecimal(Object value, 
InternalSchema schema) {
+    // BigDecimal is parsed by converting the value to a string and setting 
the proper scale and
+    // precision.
+    int precision = (int) 
schema.getMetadata().get(InternalSchema.MetadataKey.DECIMAL_PRECISION);
+    int scale = (int) 
schema.getMetadata().get(InternalSchema.MetadataKey.DECIMAL_SCALE);
+    return new BigDecimal(String.valueOf(value), new MathContext(precision))
+        .setScale(scale, RoundingMode.UNNECESSARY);
   }
 }
diff --git 
a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java 
b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
index 2ffbad04..e47ef72e 100644
--- 
a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
+++ 
b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
@@ -20,6 +20,7 @@ package org.apache.xtable.hudi;
 
 import java.math.BigDecimal;
 import java.math.BigInteger;
+import java.math.RoundingMode;
 import java.nio.ByteBuffer;
 import java.sql.Date;
 import java.util.ArrayList;
@@ -200,14 +201,16 @@ public class HudiFileStatsExtractor {
     Comparable<?> minValue = 
HoodieAvroUtils.unwrapAvroValueWrapper(columnStats.getMinValue());
     Comparable<?> maxValue = 
HoodieAvroUtils.unwrapAvroValueWrapper(columnStats.getMaxValue());
     if (field.getSchema().getDataType() == InternalType.DECIMAL) {
+      int scale =
+          (int) 
field.getSchema().getMetadata().get(InternalSchema.MetadataKey.DECIMAL_SCALE);
       minValue =
           minValue instanceof ByteBuffer
-              ? convertBytesToBigDecimal((ByteBuffer) minValue, 
DECIMAL_WRAPPER_SCALE)
-              : minValue;
+              ? convertBytesToBigDecimal((ByteBuffer) minValue, scale)
+              : ((BigDecimal) minValue).setScale(scale, 
RoundingMode.UNNECESSARY);
       maxValue =
           maxValue instanceof ByteBuffer
-              ? convertBytesToBigDecimal((ByteBuffer) maxValue, 
DECIMAL_WRAPPER_SCALE)
-              : maxValue;
+              ? convertBytesToBigDecimal((ByteBuffer) maxValue, scale)
+              : ((BigDecimal) maxValue).setScale(scale, 
RoundingMode.UNNECESSARY);
     }
     return getColumnStatFromValues(
         minValue,
@@ -221,7 +224,9 @@ public class HudiFileStatsExtractor {
   private static BigDecimal convertBytesToBigDecimal(ByteBuffer value, int 
scale) {
     byte[] bytes = new byte[value.remaining()];
     value.duplicate().get(bytes);
-    return new BigDecimal(new BigInteger(bytes), scale);
+    BigDecimal serializedValue = new BigDecimal(new BigInteger(bytes), 
DECIMAL_WRAPPER_SCALE);
+    // set the scale to match the schema
+    return serializedValue.setScale(scale, RoundingMode.UNNECESSARY);
   }
 
   private static ColumnStat getColumnStatFromColRange(
diff --git 
a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
 
b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
index 1cdd33a7..d69021f1 100644
--- 
a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
+++ 
b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
@@ -21,6 +21,9 @@ package org.apache.xtable.delta;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.math.RoundingMode;
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
@@ -33,6 +36,8 @@ import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
 
+import com.google.common.collect.ImmutableMap;
+
 import org.apache.xtable.model.schema.InternalSchema;
 import org.apache.xtable.model.schema.InternalType;
 import org.apache.xtable.model.schema.PartitionTransformType;
@@ -214,4 +219,109 @@ public class TestDeltaValueConverter {
         Arguments.of(Double.NaN, doubleSchema, Double.NaN),
         Arguments.of(Double.POSITIVE_INFINITY, doubleSchema, 
Double.POSITIVE_INFINITY));
   }
+
+  @ParameterizedTest
+  @MethodSource("decimalValues")
+  void parseDecimalValues(
+      Object deltaValue, InternalSchema fieldSchema, BigDecimal 
expectedOutput) {
+    assertEquals(
+        expectedOutput,
+        DeltaValueConverter.convertFromDeltaColumnStatValue(deltaValue, 
fieldSchema));
+  }
+
+  private static Stream<Arguments> decimalValues() {
+    return Stream.of(
+        Arguments.of(
+            -8.00,
+            InternalSchema.builder()
+                .name("decimal")
+                .dataType(InternalType.DECIMAL)
+                .metadata(
+                    ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+                        .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+                        .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 5)
+                        .build())
+                .build(),
+            new BigDecimal("-8.00", new MathContext(5, 
RoundingMode.UNNECESSARY))
+                .setScale(2, RoundingMode.UNNECESSARY)),
+        Arguments.of(
+            -8.00f,
+            InternalSchema.builder()
+                .name("decimal")
+                .dataType(InternalType.DECIMAL)
+                .metadata(
+                    ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+                        .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+                        .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 5)
+                        .build())
+                .build(),
+            new BigDecimal("-8.00", new MathContext(5, 
RoundingMode.UNNECESSARY))
+                .setScale(2, RoundingMode.UNNECESSARY)),
+        Arguments.of(
+            1000,
+            InternalSchema.builder()
+                .name("decimal")
+                .dataType(InternalType.DECIMAL)
+                .metadata(
+                    ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+                        .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+                        .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+                        .build())
+                .build(),
+            new BigDecimal("1000.00", new MathContext(6, 
RoundingMode.UNNECESSARY))
+                .setScale(2, RoundingMode.UNNECESSARY)),
+        Arguments.of(
+            1000L,
+            InternalSchema.builder()
+                .name("decimal")
+                .dataType(InternalType.DECIMAL)
+                .metadata(
+                    ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+                        .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+                        .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+                        .build())
+                .build(),
+            new BigDecimal("1000.00", new MathContext(6, 
RoundingMode.UNNECESSARY))
+                .setScale(2, RoundingMode.UNNECESSARY)),
+        Arguments.of(
+            "1000",
+            InternalSchema.builder()
+                .name("decimal")
+                .dataType(InternalType.DECIMAL)
+                .metadata(
+                    ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+                        .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+                        .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+                        .build())
+                .build(),
+            new BigDecimal("1000.00", new MathContext(6, 
RoundingMode.UNNECESSARY))
+                .setScale(2, RoundingMode.UNNECESSARY)),
+        Arguments.of(
+            1234.56,
+            InternalSchema.builder()
+                .name("decimal")
+                .dataType(InternalType.DECIMAL)
+                .metadata(
+                    ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+                        .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+                        .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+                        .build())
+                .build(),
+            new BigDecimal("1234.56", new MathContext(6, 
RoundingMode.UNNECESSARY))
+                .setScale(2, RoundingMode.UNNECESSARY)),
+        Arguments.of(
+            new BigDecimal("1234.56", new MathContext(6, 
RoundingMode.UNNECESSARY))
+                .setScale(2, RoundingMode.UNNECESSARY),
+            InternalSchema.builder()
+                .name("decimal")
+                .dataType(InternalType.DECIMAL)
+                .metadata(
+                    ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+                        .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+                        .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+                        .build())
+                .build(),
+            new BigDecimal("1234.56", new MathContext(6, 
RoundingMode.UNNECESSARY))
+                .setScale(2, RoundingMode.UNNECESSARY)));
+  }
 }
diff --git 
a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
 
b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
index 82149c8b..a18bb743 100644
--- 
a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
+++ 
b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
@@ -286,12 +286,8 @@ public class TestHudiFileStatsExtractor {
     assertEquals(1, decimalColumnStat.getNumNulls());
     assertEquals(2, decimalColumnStat.getNumValues());
     assertTrue(decimalColumnStat.getTotalSize() > 0);
-    assertEquals(
-        new BigDecimal("1234.56"),
-        ((BigDecimal) decimalColumnStat.getRange().getMinValue()).setScale(2));
-    assertEquals(
-        new BigDecimal("1234.56"),
-        ((BigDecimal) decimalColumnStat.getRange().getMaxValue()).setScale(2));
+    assertEquals(new BigDecimal("1234.56"), 
decimalColumnStat.getRange().getMinValue());
+    assertEquals(new BigDecimal("1234.56"), 
decimalColumnStat.getRange().getMaxValue());
   }
 
   private HoodieRecord<HoodieAvroPayload> buildRecord(GenericRecord record) {
diff --git 
a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java 
b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
index 09b34308..1703b916 100644
--- 
a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
+++ 
b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
@@ -26,6 +26,8 @@ import java.util.List;
 import lombok.AccessLevel;
 import lombok.NoArgsConstructor;
 
+import com.google.common.collect.ImmutableMap;
+
 import org.apache.xtable.model.schema.InternalField;
 import org.apache.xtable.model.schema.InternalSchema;
 import org.apache.xtable.model.schema.InternalType;
@@ -174,7 +176,16 @@ public class ColumnStatMapUtil {
   private static final InternalField DECIMAL_FIELD =
       InternalField.builder()
           .name("decimal_field")
-          
.schema(InternalSchema.builder().name("decimal").dataType(InternalType.DECIMAL).build())
+          .schema(
+              InternalSchema.builder()
+                  .name("decimal")
+                  .dataType(InternalType.DECIMAL)
+                  .metadata(
+                      ImmutableMap.<InternalSchema.MetadataKey, 
Object>builder()
+                          .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+                          .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 5)
+                          .build())
+                  .build())
           .build();
 
   private static final InternalField FLOAT_FIELD =
@@ -312,7 +323,7 @@ public class ColumnStatMapUtil {
         ColumnStat.builder()
             .field(DECIMAL_FIELD)
             .numNulls(1)
-            .range(Range.vector(new BigDecimal("1.0"), new BigDecimal("2.0")))
+            .range(Range.vector(new BigDecimal("1.00"), new 
BigDecimal("2.00")))
             .numValues(50)
             .totalSize(123)
             .build();

Reply via email to