This is an automated email from the ASF dual-hosted git repository.
timbrown pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git
The following commit(s) were added to refs/heads/main by this push:
new 8c143a7e scale BigDecimal col stats correctly
8c143a7e is described below
commit 8c143a7e839bb0b14e4a21cff654be777e6353c7
Author: Timothy Brown <[email protected]>
AuthorDate: Tue Jan 7 09:22:17 2025 -0600
scale BigDecimal col stats correctly
---
.../apache/xtable/delta/DeltaValueConverter.java | 30 +++---
.../apache/xtable/hudi/HudiFileStatsExtractor.java | 15 ++-
.../xtable/delta/TestDeltaValueConverter.java | 110 +++++++++++++++++++++
.../xtable/hudi/TestHudiFileStatsExtractor.java | 8 +-
.../apache/xtable/testutil/ColumnStatMapUtil.java | 15 ++-
5 files changed, 148 insertions(+), 30 deletions(-)
diff --git
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
index 6f837fe9..5eea1faa 100644
--- a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
+++ b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
@@ -19,7 +19,8 @@
package org.apache.xtable.delta;
import java.math.BigDecimal;
-import java.math.BigInteger;
+import java.math.MathContext;
+import java.math.RoundingMode;
import java.nio.charset.StandardCharsets;
import java.sql.Date;
import java.text.DateFormat;
@@ -63,7 +64,7 @@ public class DeltaValueConverter {
return null;
}
if (noConversionForSchema(fieldSchema)) {
- return castObjectToInternalType(value, fieldSchema.getDataType());
+ return castObjectToInternalType(value, fieldSchema);
}
// Needs special handling for date and time.
InternalType fieldType = fieldSchema.getDataType();
@@ -198,7 +199,8 @@ public class DeltaValueConverter {
}
}
- private static Object castObjectToInternalType(Object value, InternalType
valueType) {
+ private static Object castObjectToInternalType(Object value, InternalSchema
schema) {
+ InternalType valueType = schema.getDataType();
switch (valueType) {
case DOUBLE:
if (value instanceof String)
@@ -232,7 +234,7 @@ public class DeltaValueConverter {
}
break;
case DECIMAL:
- return numberTypeToBigDecimal(value);
+ return numberTypeToBigDecimal(value, schema);
case LONG:
if (value instanceof Integer) {
return ((Integer) value).longValue();
@@ -242,18 +244,12 @@ public class DeltaValueConverter {
return value;
}
- private static BigDecimal numberTypeToBigDecimal(Object value) {
- // BigDecimal is parsed as Integer, Long, BigInteger and double if none of
the above.
- if (value instanceof Integer) {
- return BigDecimal.valueOf((Integer) value);
- } else if (value instanceof Long) {
- return BigDecimal.valueOf((Long) value);
- } else if (value instanceof BigInteger) {
- return new BigDecimal((BigInteger) value);
- } else if (value instanceof Double) {
- return BigDecimal.valueOf((Double) value);
- } else {
- return (BigDecimal) value;
- }
+ private static BigDecimal numberTypeToBigDecimal(Object value,
InternalSchema schema) {
+ // BigDecimal is parsed by converting the value to a string and setting
the proper scale and
+ // precision.
+ int precision = (int)
schema.getMetadata().get(InternalSchema.MetadataKey.DECIMAL_PRECISION);
+ int scale = (int)
schema.getMetadata().get(InternalSchema.MetadataKey.DECIMAL_SCALE);
+ return new BigDecimal(String.valueOf(value), new MathContext(precision))
+ .setScale(scale, RoundingMode.UNNECESSARY);
}
}
diff --git
a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
index 2ffbad04..e47ef72e 100644
---
a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
+++
b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
@@ -20,6 +20,7 @@ package org.apache.xtable.hudi;
import java.math.BigDecimal;
import java.math.BigInteger;
+import java.math.RoundingMode;
import java.nio.ByteBuffer;
import java.sql.Date;
import java.util.ArrayList;
@@ -200,14 +201,16 @@ public class HudiFileStatsExtractor {
Comparable<?> minValue =
HoodieAvroUtils.unwrapAvroValueWrapper(columnStats.getMinValue());
Comparable<?> maxValue =
HoodieAvroUtils.unwrapAvroValueWrapper(columnStats.getMaxValue());
if (field.getSchema().getDataType() == InternalType.DECIMAL) {
+ int scale =
+ (int)
field.getSchema().getMetadata().get(InternalSchema.MetadataKey.DECIMAL_SCALE);
minValue =
minValue instanceof ByteBuffer
- ? convertBytesToBigDecimal((ByteBuffer) minValue,
DECIMAL_WRAPPER_SCALE)
- : minValue;
+ ? convertBytesToBigDecimal((ByteBuffer) minValue, scale)
+ : ((BigDecimal) minValue).setScale(scale,
RoundingMode.UNNECESSARY);
maxValue =
maxValue instanceof ByteBuffer
- ? convertBytesToBigDecimal((ByteBuffer) maxValue,
DECIMAL_WRAPPER_SCALE)
- : maxValue;
+ ? convertBytesToBigDecimal((ByteBuffer) maxValue, scale)
+ : ((BigDecimal) maxValue).setScale(scale,
RoundingMode.UNNECESSARY);
}
return getColumnStatFromValues(
minValue,
@@ -221,7 +224,9 @@ public class HudiFileStatsExtractor {
private static BigDecimal convertBytesToBigDecimal(ByteBuffer value, int
scale) {
byte[] bytes = new byte[value.remaining()];
value.duplicate().get(bytes);
- return new BigDecimal(new BigInteger(bytes), scale);
+ BigDecimal serializedValue = new BigDecimal(new BigInteger(bytes),
DECIMAL_WRAPPER_SCALE);
+ // set the scale to match the schema
+ return serializedValue.setScale(scale, RoundingMode.UNNECESSARY);
}
private static ColumnStat getColumnStatFromColRange(
diff --git
a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
index 1cdd33a7..d69021f1 100644
---
a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
+++
b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
@@ -21,6 +21,9 @@ package org.apache.xtable.delta;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.math.RoundingMode;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@@ -33,6 +36,8 @@ import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
+import com.google.common.collect.ImmutableMap;
+
import org.apache.xtable.model.schema.InternalSchema;
import org.apache.xtable.model.schema.InternalType;
import org.apache.xtable.model.schema.PartitionTransformType;
@@ -214,4 +219,109 @@ public class TestDeltaValueConverter {
Arguments.of(Double.NaN, doubleSchema, Double.NaN),
Arguments.of(Double.POSITIVE_INFINITY, doubleSchema,
Double.POSITIVE_INFINITY));
}
+
+ @ParameterizedTest
+ @MethodSource("decimalValues")
+ void parseDecimalValues(
+ Object deltaValue, InternalSchema fieldSchema, BigDecimal
expectedOutput) {
+ assertEquals(
+ expectedOutput,
+ DeltaValueConverter.convertFromDeltaColumnStatValue(deltaValue,
fieldSchema));
+ }
+
+ private static Stream<Arguments> decimalValues() {
+ return Stream.of(
+ Arguments.of(
+ -8.00,
+ InternalSchema.builder()
+ .name("decimal")
+ .dataType(InternalType.DECIMAL)
+ .metadata(
+ ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+ .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+ .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 5)
+ .build())
+ .build(),
+ new BigDecimal("-8.00", new MathContext(5,
RoundingMode.UNNECESSARY))
+ .setScale(2, RoundingMode.UNNECESSARY)),
+ Arguments.of(
+ -8.00f,
+ InternalSchema.builder()
+ .name("decimal")
+ .dataType(InternalType.DECIMAL)
+ .metadata(
+ ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+ .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+ .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 5)
+ .build())
+ .build(),
+ new BigDecimal("-8.00", new MathContext(5,
RoundingMode.UNNECESSARY))
+ .setScale(2, RoundingMode.UNNECESSARY)),
+ Arguments.of(
+ 1000,
+ InternalSchema.builder()
+ .name("decimal")
+ .dataType(InternalType.DECIMAL)
+ .metadata(
+ ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+ .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+ .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+ .build())
+ .build(),
+ new BigDecimal("1000.00", new MathContext(6,
RoundingMode.UNNECESSARY))
+ .setScale(2, RoundingMode.UNNECESSARY)),
+ Arguments.of(
+ 1000L,
+ InternalSchema.builder()
+ .name("decimal")
+ .dataType(InternalType.DECIMAL)
+ .metadata(
+ ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+ .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+ .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+ .build())
+ .build(),
+ new BigDecimal("1000.00", new MathContext(6,
RoundingMode.UNNECESSARY))
+ .setScale(2, RoundingMode.UNNECESSARY)),
+ Arguments.of(
+ "1000",
+ InternalSchema.builder()
+ .name("decimal")
+ .dataType(InternalType.DECIMAL)
+ .metadata(
+ ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+ .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+ .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+ .build())
+ .build(),
+ new BigDecimal("1000.00", new MathContext(6,
RoundingMode.UNNECESSARY))
+ .setScale(2, RoundingMode.UNNECESSARY)),
+ Arguments.of(
+ 1234.56,
+ InternalSchema.builder()
+ .name("decimal")
+ .dataType(InternalType.DECIMAL)
+ .metadata(
+ ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+ .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+ .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+ .build())
+ .build(),
+ new BigDecimal("1234.56", new MathContext(6,
RoundingMode.UNNECESSARY))
+ .setScale(2, RoundingMode.UNNECESSARY)),
+ Arguments.of(
+ new BigDecimal("1234.56", new MathContext(6,
RoundingMode.UNNECESSARY))
+ .setScale(2, RoundingMode.UNNECESSARY),
+ InternalSchema.builder()
+ .name("decimal")
+ .dataType(InternalType.DECIMAL)
+ .metadata(
+ ImmutableMap.<InternalSchema.MetadataKey, Object>builder()
+ .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+ .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6)
+ .build())
+ .build(),
+ new BigDecimal("1234.56", new MathContext(6,
RoundingMode.UNNECESSARY))
+ .setScale(2, RoundingMode.UNNECESSARY)));
+ }
}
diff --git
a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
index 82149c8b..a18bb743 100644
---
a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
+++
b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
@@ -286,12 +286,8 @@ public class TestHudiFileStatsExtractor {
assertEquals(1, decimalColumnStat.getNumNulls());
assertEquals(2, decimalColumnStat.getNumValues());
assertTrue(decimalColumnStat.getTotalSize() > 0);
- assertEquals(
- new BigDecimal("1234.56"),
- ((BigDecimal) decimalColumnStat.getRange().getMinValue()).setScale(2));
- assertEquals(
- new BigDecimal("1234.56"),
- ((BigDecimal) decimalColumnStat.getRange().getMaxValue()).setScale(2));
+ assertEquals(new BigDecimal("1234.56"),
decimalColumnStat.getRange().getMinValue());
+ assertEquals(new BigDecimal("1234.56"),
decimalColumnStat.getRange().getMaxValue());
}
private HoodieRecord<HoodieAvroPayload> buildRecord(GenericRecord record) {
diff --git
a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
index 09b34308..1703b916 100644
---
a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
+++
b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java
@@ -26,6 +26,8 @@ import java.util.List;
import lombok.AccessLevel;
import lombok.NoArgsConstructor;
+import com.google.common.collect.ImmutableMap;
+
import org.apache.xtable.model.schema.InternalField;
import org.apache.xtable.model.schema.InternalSchema;
import org.apache.xtable.model.schema.InternalType;
@@ -174,7 +176,16 @@ public class ColumnStatMapUtil {
private static final InternalField DECIMAL_FIELD =
InternalField.builder()
.name("decimal_field")
-
.schema(InternalSchema.builder().name("decimal").dataType(InternalType.DECIMAL).build())
+ .schema(
+ InternalSchema.builder()
+ .name("decimal")
+ .dataType(InternalType.DECIMAL)
+ .metadata(
+ ImmutableMap.<InternalSchema.MetadataKey,
Object>builder()
+ .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2)
+ .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 5)
+ .build())
+ .build())
.build();
private static final InternalField FLOAT_FIELD =
@@ -312,7 +323,7 @@ public class ColumnStatMapUtil {
ColumnStat.builder()
.field(DECIMAL_FIELD)
.numNulls(1)
- .range(Range.vector(new BigDecimal("1.0"), new BigDecimal("2.0")))
+ .range(Range.vector(new BigDecimal("1.00"), new
BigDecimal("2.00")))
.numValues(50)
.totalSize(123)
.build();