This is an automated email from the ASF dual-hosted git repository.
timbrown pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git
The following commit(s) were added to refs/heads/main by this push:
new 486c407b Fix the edge case when handling non numeric values of double
type in delta stats
486c407b is described below
commit 486c407b7906193c60b4e44ce3a9f1d1c61f1af5
Author: Hanzhi Wang <[email protected]>
AuthorDate: Wed Sep 18 10:20:15 2024 -0700
Fix the edge case when handling non numeric values of double type in delta
stats
When reading the delta snapshot and load the information into Delta object
AddFile, the non-numeric values of float or double type (example, "NaN",
"-Infinity") from col stats become string type.
These special values need special handling and see how delta handled:
https://github.com/delta-io/delta/blob/master/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/DefaultJsonRow.java#L210
---
.../apache/xtable/delta/DeltaValueConverter.java | 32 ++++++++++++++++++++++
.../xtable/delta/TestDeltaValueConverter.java | 27 ++++++++++++++++++
2 files changed, 59 insertions(+)
diff --git
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
index d9d15a04..6f837fe9 100644
--- a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
+++ b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
@@ -44,6 +44,12 @@ import org.apache.xtable.model.schema.PartitionTransformType;
public class DeltaValueConverter {
private static final String DATE_FORMAT_STR = "yyyy-MM-dd HH:mm:ss";
private static final TimeZone TIME_ZONE = TimeZone.getTimeZone("UTC");
+ protected static final String NAN_VALUE = "NaN";
+ protected static final String INFINITY_VALUE = "Infinity";
+ protected static final String POSITIVE_INFINITY_VALUE = "+Infinity";
+ protected static final String POSITIVE_INF_VALUE = "+INF";
+ protected static final String NEGATIVE_INFINITY_VALUE = "-Infinity";
+ protected static final String NEGATIVE_INF_VALUE = "-INF";
static DateFormat getDateFormat(String dataFormatString) {
DateFormat dateFormat = new SimpleDateFormat(dataFormatString);
@@ -194,9 +200,35 @@ public class DeltaValueConverter {
private static Object castObjectToInternalType(Object value, InternalType
valueType) {
switch (valueType) {
+ case DOUBLE:
+ if (value instanceof String)
+ switch (value.toString()) {
+ case NAN_VALUE:
+ return Double.NaN;
+ case POSITIVE_INF_VALUE:
+ case POSITIVE_INFINITY_VALUE:
+ case INFINITY_VALUE:
+ return Double.POSITIVE_INFINITY;
+ case NEGATIVE_INF_VALUE:
+ case NEGATIVE_INFINITY_VALUE:
+ return Double.NEGATIVE_INFINITY;
+ }
+ break;
case FLOAT:
if (value instanceof Double) {
return ((Double) value).floatValue();
+ } else if (value instanceof String) {
+ switch (value.toString()) {
+ case NAN_VALUE:
+ return Float.NaN;
+ case POSITIVE_INF_VALUE:
+ case POSITIVE_INFINITY_VALUE:
+ case INFINITY_VALUE:
+ return Float.POSITIVE_INFINITY;
+ case NEGATIVE_INF_VALUE:
+ case NEGATIVE_INFINITY_VALUE:
+ return Float.NEGATIVE_INFINITY;
+ }
}
break;
case DECIMAL:
diff --git
a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
index 0fb501c9..1cdd33a7 100644
---
a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
+++
b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
@@ -82,6 +82,15 @@ public class TestDeltaValueConverter {
assertThrows(ParseException.class, () ->
strictDateFormat.parse(wrongDateTime));
}
+ @ParameterizedTest
+ @MethodSource("nonNumericValuesForColStats")
+ void formattedDifferentNonNumericValuesFromDeltaColumnStat(
+ Object fieldValue, InternalSchema fieldSchema, Object
expectedDeltaValue) {
+ Object internalRepresentation =
+ DeltaValueConverter.convertFromDeltaColumnStatValue(fieldValue,
fieldSchema);
+ assertEquals(internalRepresentation, expectedDeltaValue);
+ }
+
private static Stream<Arguments> valuesWithSchemaProviderForColStats() {
return Stream.of(
Arguments.of(
@@ -187,4 +196,22 @@ public class TestDeltaValueConverter {
"yyyy",
"2022"));
}
+
+ private static Stream<Arguments> nonNumericValuesForColStats() {
+ InternalSchema doubleSchema =
+
InternalSchema.builder().name("double").dataType(InternalType.DOUBLE).build();
+ InternalSchema floatSchema =
+
InternalSchema.builder().name("float").dataType(InternalType.FLOAT).build();
+ return Stream.of(
+ Arguments.of("NaN", doubleSchema, Double.NaN),
+ Arguments.of("Infinity", doubleSchema, Double.POSITIVE_INFINITY),
+ Arguments.of("-Infinity", doubleSchema, Double.NEGATIVE_INFINITY),
+ Arguments.of("+Infinity", doubleSchema, Double.POSITIVE_INFINITY),
+ Arguments.of("NaN", floatSchema, Float.NaN),
+ Arguments.of("Infinity", floatSchema, Float.POSITIVE_INFINITY),
+ Arguments.of("-Infinity", floatSchema, Float.NEGATIVE_INFINITY),
+ Arguments.of("+Infinity", floatSchema, Float.POSITIVE_INFINITY),
+ Arguments.of(Double.NaN, doubleSchema, Double.NaN),
+ Arguments.of(Double.POSITIVE_INFINITY, doubleSchema,
Double.POSITIVE_INFINITY));
+ }
}