yihua commented on code in PR #13711:
URL: https://github.com/apache/hudi/pull/13711#discussion_r2353702608
##########
hudi-common/src/main/java/org/apache/hudi/internal/schema/Type.java:
##########
@@ -64,7 +64,14 @@ enum TypeID {
TIME(Long.class),
TIMESTAMP(Long.class),
DECIMAL(BigDecimal.class),
- UUID(UUID.class);
+ UUID(UUID.class),
+ DECIMAL_BYTES(BigDecimal.class),
+ DECIMAL_FIXED(BigDecimal.class),
Review Comment:
Is this for Avro compatibility? I'm trying to recollect the reason of
adding two more decimal representation types.
##########
hudi-common/src/main/java/org/apache/hudi/internal/schema/Types.java:
##########
@@ -353,6 +347,51 @@ public int precision() {
return precision;
}
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ } else if (!(o instanceof DecimalBase)) {
+ return false;
+ }
+
+ DecimalBase that = (DecimalBase) o;
+ if (scale != that.scale) {
+ return false;
+ }
+ return precision == that.precision;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(this.getClass(), scale, precision);
+ }
+ }
+
+ /**
+ * Decimal primitive type.
+ */
+ public static class DecimalType extends DecimalTypeFixed {
+ public static DecimalType get(int precision, int scale) {
+ return new DecimalType(precision, scale);
+ }
+
+ /**
+ * Return the minimum number of bytes needed to store a decimal with a
give 'precision'.
+ * reference from Spark release 3.1 .
+ */
+ private static int computeMinBytesForDecimalPrecision(int precision) {
Review Comment:
Now I see the min bytes calculation is moved here.
##########
hudi-common/src/main/java/org/apache/hudi/stats/ValueType.java:
##########
@@ -0,0 +1,668 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.stats;
+
+import org.apache.hudi.avro.AvroSchemaUtils;
+import org.apache.hudi.avro.HoodieAvroWrapperUtils;
+import org.apache.hudi.common.util.DateTimeUtils;
+
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.util.Utf8;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeTokenParser;
+import org.apache.parquet.schema.PrimitiveType;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.math.MathContext;
+import java.math.RoundingMode;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneOffset;
+import java.util.Objects;
+import java.util.UUID;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+
+import static org.apache.hudi.avro.HoodieAvroUtils.convertBytesToBigDecimal;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToNanos;
+import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
+import static org.apache.hudi.common.util.DateTimeUtils.nanosToInstant;
+import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
+
+public enum ValueType {
+ V1(HoodieAvroWrapperUtils.PrimitiveWrapperType.V1.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.V1,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ NULL(HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ BOOLEAN(HoodieAvroWrapperUtils.PrimitiveWrapperType.BOOLEAN,
ValueType::castToBoolean),
+ INT(HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
ValueType::castToInteger),
+ LONG(HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
ValueType::castToLong),
+ FLOAT(HoodieAvroWrapperUtils.PrimitiveWrapperType.FLOAT,
ValueType::castToFloat),
+ DOUBLE(HoodieAvroWrapperUtils.PrimitiveWrapperType.DOUBLE,
ValueType::castToDouble),
+ STRING(HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
ValueType::castToString),
+ BYTES(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToBytes),
+ FIXED(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToFixed),
+ DECIMAL(BigDecimal.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
+ ValueType::castToDecimal, ValueType::toDecimal, ValueType::fromDecimal),
+ UUID(UUID.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
+ ValueType::castToUUID, ValueType::toUUID, ValueType::fromUUID),
+ DATE(LocalDate.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToDate, ValueType::toDate, ValueType::fromDate),
+ TIME_MILLIS(LocalTime.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToTimeMillis, ValueType::toTimeMillis,
ValueType::fromTimeMillis),
+ TIME_MICROS(LocalTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimeMicros, ValueType::toTimeMicros,
ValueType::fromTimeMicros),
+ TIMESTAMP_MILLIS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMillis, ValueType::toTimestampMillis,
ValueType::fromTimestampMillis),
+ TIMESTAMP_MICROS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMicros, ValueType::toTimestampMicros,
ValueType::fromTimestampMicros),
+ TIMESTAMP_NANOS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampNanos, ValueType::toTimestampNanos,
ValueType::fromTimestampNanos),
+ LOCAL_TIMESTAMP_MILLIS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMillis,
ValueType::toLocalTimestampMillis, ValueType::fromLocalTimestampMillis),
+ LOCAL_TIMESTAMP_MICROS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMicros,
ValueType::toLocalTimestampMicros, ValueType::fromLocalTimestampMicros),
+ LOCAL_TIMESTAMP_NANOS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampNanos, ValueType::toLocalTimestampNanos,
ValueType::fromLocalTimestampNanos);
+
+ private final Class<?> internalType;
+ private final HoodieAvroWrapperUtils.PrimitiveWrapperType
primitiveWrapperType;
+ private final BiFunction<Object, ValueMetadata, Comparable<?>> standardize;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toComplex;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toPrimitive;
+
+ ValueType(HoodieAvroWrapperUtils.PrimitiveWrapperType primitiveWrapperType,
Function<Object, Object> single) {
+ this(primitiveWrapperType.getClazz(),
+ primitiveWrapperType,
+ (val, meta) -> (Comparable<?>) single.apply(val),
+ ValueType::passThrough,
+ ValueType::passThrough);
+ }
+
+ ValueType(Class<?> internalType,
+ HoodieAvroWrapperUtils.PrimitiveWrapperType primitiveWrapperType,
+ BiFunction<Object, ValueMetadata, Comparable<?>> standardize,
+ BiFunction<Comparable<?>, ValueMetadata, Comparable<?>> toComplex,
+ BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toPrimitive) {
+ this.internalType = internalType;
+ this.primitiveWrapperType = primitiveWrapperType;
+ this.standardize = standardize;
+ this.toComplex = toComplex;
+ this.toPrimitive = toPrimitive;
+ }
+
+ Comparable<?> standardizeJavaTypeAndPromote(Object val, ValueMetadata meta) {
+ if (val == null) {
+ return null;
+ }
+ return standardize.apply(val, meta);
+ }
+
+ private Comparable<?> convertIntoPrimitive(Comparable<?> val, ValueMetadata
meta) {
+ if (val == null) {
+ return null;
+ }
+ return toPrimitive.apply(val, meta);
+ }
+
+ private Comparable<?> convertIntoComplex(Comparable<?> val, ValueMetadata
meta) {
+ if (val == null) {
+ return null;
+ }
+ return toComplex.apply(val, meta);
+ }
+
+ void validate(Object val) {
+ if (val == null) {
+ return;
+ }
+
+ if (!internalType.isInstance(val)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ internalType.getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ }
+
+ public Object wrapValue(Comparable<?> val, ValueMetadata meta) {
+ if (meta.getValueType() == V1) {
+ return primitiveWrapperType.wrap(val);
+ }
+
+ if (val == null) {
+ return null;
+ }
+ if (!this.internalType.isInstance(val)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ this.internalType.getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ return primitiveWrapperType.wrap(convertIntoPrimitive(val, meta));
+ }
+
+ public Comparable<?> unwrapValue(Object val, ValueMetadata meta) {
+ if (meta.getValueType() == V1) {
+ return primitiveWrapperType.unwrap(val);
Review Comment:
Are V1 logical value wrappers e.g., `DateWrapper`, `DecimalWrapper`,
`TimeMicrosWrapper`, handled separately?
##########
hudi-common/src/main/java/org/apache/hudi/stats/ValueType.java:
##########
@@ -0,0 +1,668 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.stats;
+
+import org.apache.hudi.avro.AvroSchemaUtils;
+import org.apache.hudi.avro.HoodieAvroWrapperUtils;
+import org.apache.hudi.common.util.DateTimeUtils;
+
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.util.Utf8;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeTokenParser;
+import org.apache.parquet.schema.PrimitiveType;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.math.MathContext;
+import java.math.RoundingMode;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneOffset;
+import java.util.Objects;
+import java.util.UUID;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+
+import static org.apache.hudi.avro.HoodieAvroUtils.convertBytesToBigDecimal;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToNanos;
+import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
+import static org.apache.hudi.common.util.DateTimeUtils.nanosToInstant;
+import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
+
+public enum ValueType {
+ V1(HoodieAvroWrapperUtils.PrimitiveWrapperType.V1.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.V1,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ NULL(HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ BOOLEAN(HoodieAvroWrapperUtils.PrimitiveWrapperType.BOOLEAN,
ValueType::castToBoolean),
+ INT(HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
ValueType::castToInteger),
+ LONG(HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
ValueType::castToLong),
+ FLOAT(HoodieAvroWrapperUtils.PrimitiveWrapperType.FLOAT,
ValueType::castToFloat),
+ DOUBLE(HoodieAvroWrapperUtils.PrimitiveWrapperType.DOUBLE,
ValueType::castToDouble),
+ STRING(HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
ValueType::castToString),
+ BYTES(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToBytes),
+ FIXED(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToFixed),
+ DECIMAL(BigDecimal.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
+ ValueType::castToDecimal, ValueType::toDecimal, ValueType::fromDecimal),
+ UUID(UUID.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
+ ValueType::castToUUID, ValueType::toUUID, ValueType::fromUUID),
+ DATE(LocalDate.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToDate, ValueType::toDate, ValueType::fromDate),
+ TIME_MILLIS(LocalTime.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToTimeMillis, ValueType::toTimeMillis,
ValueType::fromTimeMillis),
+ TIME_MICROS(LocalTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimeMicros, ValueType::toTimeMicros,
ValueType::fromTimeMicros),
+ TIMESTAMP_MILLIS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMillis, ValueType::toTimestampMillis,
ValueType::fromTimestampMillis),
+ TIMESTAMP_MICROS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMicros, ValueType::toTimestampMicros,
ValueType::fromTimestampMicros),
+ TIMESTAMP_NANOS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampNanos, ValueType::toTimestampNanos,
ValueType::fromTimestampNanos),
+ LOCAL_TIMESTAMP_MILLIS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMillis,
ValueType::toLocalTimestampMillis, ValueType::fromLocalTimestampMillis),
+ LOCAL_TIMESTAMP_MICROS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMicros,
ValueType::toLocalTimestampMicros, ValueType::fromLocalTimestampMicros),
+ LOCAL_TIMESTAMP_NANOS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampNanos, ValueType::toLocalTimestampNanos,
ValueType::fromLocalTimestampNanos);
+
+ private final Class<?> internalType;
+ private final HoodieAvroWrapperUtils.PrimitiveWrapperType
primitiveWrapperType;
+ private final BiFunction<Object, ValueMetadata, Comparable<?>> standardize;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toComplex;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toPrimitive;
+
+ ValueType(HoodieAvroWrapperUtils.PrimitiveWrapperType primitiveWrapperType,
Function<Object, Object> single) {
+ this(primitiveWrapperType.getClazz(),
+ primitiveWrapperType,
+ (val, meta) -> (Comparable<?>) single.apply(val),
+ ValueType::passThrough,
+ ValueType::passThrough);
+ }
+
+ ValueType(Class<?> internalType,
+ HoodieAvroWrapperUtils.PrimitiveWrapperType primitiveWrapperType,
+ BiFunction<Object, ValueMetadata, Comparable<?>> standardize,
+ BiFunction<Comparable<?>, ValueMetadata, Comparable<?>> toComplex,
+ BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toPrimitive) {
+ this.internalType = internalType;
+ this.primitiveWrapperType = primitiveWrapperType;
+ this.standardize = standardize;
+ this.toComplex = toComplex;
+ this.toPrimitive = toPrimitive;
+ }
+
+ Comparable<?> standardizeJavaTypeAndPromote(Object val, ValueMetadata meta) {
+ if (val == null) {
+ return null;
+ }
+ return standardize.apply(val, meta);
+ }
+
+ private Comparable<?> convertIntoPrimitive(Comparable<?> val, ValueMetadata
meta) {
+ if (val == null) {
+ return null;
+ }
+ return toPrimitive.apply(val, meta);
+ }
+
+ private Comparable<?> convertIntoComplex(Comparable<?> val, ValueMetadata
meta) {
+ if (val == null) {
+ return null;
+ }
+ return toComplex.apply(val, meta);
+ }
+
+ void validate(Object val) {
+ if (val == null) {
+ return;
+ }
+
+ if (!internalType.isInstance(val)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ internalType.getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ }
+
+ public Object wrapValue(Comparable<?> val, ValueMetadata meta) {
+ if (meta.getValueType() == V1) {
+ return primitiveWrapperType.wrap(val);
+ }
+
+ if (val == null) {
+ return null;
+ }
+ if (!this.internalType.isInstance(val)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ this.internalType.getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ return primitiveWrapperType.wrap(convertIntoPrimitive(val, meta));
+ }
+
+ public Comparable<?> unwrapValue(Object val, ValueMetadata meta) {
+ if (meta.getValueType() == V1) {
+ return primitiveWrapperType.unwrap(val);
+ }
+
+ if (val == null) {
+ return null;
+ }
+
+ if (!primitiveWrapperType.getWrapperClass().isInstance(val)) {
+ if (!(val instanceof GenericRecord)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ primitiveWrapperType.getWrapperClass().getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ } else if (((GenericRecord) val).getSchema().getField("value") != null) {
+ return
standardizeJavaTypeAndPromote(HoodieAvroWrapperUtils.unwrapGenericRecord(val),
meta);
+ } else {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ primitiveWrapperType.getWrapperClass().getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ }
+ return convertIntoComplex(primitiveWrapperType.unwrap(val), meta);
+ }
+
+ private static ValueType[] myEnumValues;
+
+ public static ValueType fromInt(int i) {
+ if (ValueType.myEnumValues == null) {
+ ValueType.myEnumValues = ValueType.values();
+ }
+ return ValueType.myEnumValues[i];
+ }
+
+ public static ValueType fromPrimitiveType(PrimitiveType primitiveType) {
Review Comment:
This is parquet specific so rename it to be specific?
```suggestion
public static ValueType fromParquetPrimitiveType(PrimitiveType
primitiveType) {
```
##########
hudi-common/src/main/java/org/apache/hudi/stats/ValueType.java:
##########
@@ -0,0 +1,668 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.stats;
+
+import org.apache.hudi.avro.AvroSchemaUtils;
+import org.apache.hudi.avro.HoodieAvroWrapperUtils;
+import org.apache.hudi.common.util.DateTimeUtils;
+
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.util.Utf8;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeTokenParser;
+import org.apache.parquet.schema.PrimitiveType;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.math.MathContext;
+import java.math.RoundingMode;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneOffset;
+import java.util.Objects;
+import java.util.UUID;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+
+import static org.apache.hudi.avro.HoodieAvroUtils.convertBytesToBigDecimal;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToNanos;
+import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
+import static org.apache.hudi.common.util.DateTimeUtils.nanosToInstant;
+import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
+
+public enum ValueType {
+ V1(HoodieAvroWrapperUtils.PrimitiveWrapperType.V1.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.V1,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ NULL(HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ BOOLEAN(HoodieAvroWrapperUtils.PrimitiveWrapperType.BOOLEAN,
ValueType::castToBoolean),
+ INT(HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
ValueType::castToInteger),
+ LONG(HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
ValueType::castToLong),
+ FLOAT(HoodieAvroWrapperUtils.PrimitiveWrapperType.FLOAT,
ValueType::castToFloat),
+ DOUBLE(HoodieAvroWrapperUtils.PrimitiveWrapperType.DOUBLE,
ValueType::castToDouble),
+ STRING(HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
ValueType::castToString),
+ BYTES(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToBytes),
+ FIXED(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToFixed),
+ DECIMAL(BigDecimal.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
+ ValueType::castToDecimal, ValueType::toDecimal, ValueType::fromDecimal),
+ UUID(UUID.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
+ ValueType::castToUUID, ValueType::toUUID, ValueType::fromUUID),
+ DATE(LocalDate.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToDate, ValueType::toDate, ValueType::fromDate),
+ TIME_MILLIS(LocalTime.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToTimeMillis, ValueType::toTimeMillis,
ValueType::fromTimeMillis),
+ TIME_MICROS(LocalTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimeMicros, ValueType::toTimeMicros,
ValueType::fromTimeMicros),
+ TIMESTAMP_MILLIS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMillis, ValueType::toTimestampMillis,
ValueType::fromTimestampMillis),
+ TIMESTAMP_MICROS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMicros, ValueType::toTimestampMicros,
ValueType::fromTimestampMicros),
+ TIMESTAMP_NANOS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampNanos, ValueType::toTimestampNanos,
ValueType::fromTimestampNanos),
+ LOCAL_TIMESTAMP_MILLIS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMillis,
ValueType::toLocalTimestampMillis, ValueType::fromLocalTimestampMillis),
+ LOCAL_TIMESTAMP_MICROS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMicros,
ValueType::toLocalTimestampMicros, ValueType::fromLocalTimestampMicros),
+ LOCAL_TIMESTAMP_NANOS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampNanos, ValueType::toLocalTimestampNanos,
ValueType::fromLocalTimestampNanos);
+
+ private final Class<?> internalType;
+ private final HoodieAvroWrapperUtils.PrimitiveWrapperType
primitiveWrapperType;
+ private final BiFunction<Object, ValueMetadata, Comparable<?>> standardize;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toComplex;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toPrimitive;
+
+ ValueType(HoodieAvroWrapperUtils.PrimitiveWrapperType primitiveWrapperType,
Function<Object, Object> single) {
+ this(primitiveWrapperType.getClazz(),
+ primitiveWrapperType,
+ (val, meta) -> (Comparable<?>) single.apply(val),
+ ValueType::passThrough,
+ ValueType::passThrough);
+ }
+
+ ValueType(Class<?> internalType,
+ HoodieAvroWrapperUtils.PrimitiveWrapperType primitiveWrapperType,
+ BiFunction<Object, ValueMetadata, Comparable<?>> standardize,
+ BiFunction<Comparable<?>, ValueMetadata, Comparable<?>> toComplex,
+ BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toPrimitive) {
+ this.internalType = internalType;
+ this.primitiveWrapperType = primitiveWrapperType;
+ this.standardize = standardize;
+ this.toComplex = toComplex;
+ this.toPrimitive = toPrimitive;
+ }
+
+ Comparable<?> standardizeJavaTypeAndPromote(Object val, ValueMetadata meta) {
+ if (val == null) {
+ return null;
+ }
+ return standardize.apply(val, meta);
+ }
+
+ private Comparable<?> convertIntoPrimitive(Comparable<?> val, ValueMetadata
meta) {
+ if (val == null) {
+ return null;
+ }
+ return toPrimitive.apply(val, meta);
+ }
+
+ private Comparable<?> convertIntoComplex(Comparable<?> val, ValueMetadata
meta) {
+ if (val == null) {
+ return null;
+ }
+ return toComplex.apply(val, meta);
+ }
+
+ void validate(Object val) {
+ if (val == null) {
+ return;
+ }
+
+ if (!internalType.isInstance(val)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ internalType.getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ }
+
+ public Object wrapValue(Comparable<?> val, ValueMetadata meta) {
+ if (meta.getValueType() == V1) {
+ return primitiveWrapperType.wrap(val);
+ }
+
+ if (val == null) {
+ return null;
+ }
+ if (!this.internalType.isInstance(val)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ this.internalType.getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ return primitiveWrapperType.wrap(convertIntoPrimitive(val, meta));
+ }
+
+ public Comparable<?> unwrapValue(Object val, ValueMetadata meta) {
+ if (meta.getValueType() == V1) {
+ return primitiveWrapperType.unwrap(val);
+ }
+
+ if (val == null) {
+ return null;
+ }
+
+ if (!primitiveWrapperType.getWrapperClass().isInstance(val)) {
+ if (!(val instanceof GenericRecord)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ primitiveWrapperType.getWrapperClass().getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ } else if (((GenericRecord) val).getSchema().getField("value") != null) {
+ return
standardizeJavaTypeAndPromote(HoodieAvroWrapperUtils.unwrapGenericRecord(val),
meta);
+ } else {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ primitiveWrapperType.getWrapperClass().getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ }
+ return convertIntoComplex(primitiveWrapperType.unwrap(val), meta);
+ }
+
+ private static ValueType[] myEnumValues;
+
+ public static ValueType fromInt(int i) {
Review Comment:
```suggestion
public static ValueType fromOrdinal(int i) {
```
##########
hudi-common/src/main/java/org/apache/hudi/stats/ValueType.java:
##########
@@ -0,0 +1,668 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.stats;
+
+import org.apache.hudi.avro.AvroSchemaUtils;
+import org.apache.hudi.avro.HoodieAvroWrapperUtils;
+import org.apache.hudi.common.util.DateTimeUtils;
+
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.util.Utf8;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeTokenParser;
+import org.apache.parquet.schema.PrimitiveType;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.math.MathContext;
+import java.math.RoundingMode;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneOffset;
+import java.util.Objects;
+import java.util.UUID;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+
+import static org.apache.hudi.avro.HoodieAvroUtils.convertBytesToBigDecimal;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToNanos;
+import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
+import static org.apache.hudi.common.util.DateTimeUtils.nanosToInstant;
+import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
+
+public enum ValueType {
+ V1(HoodieAvroWrapperUtils.PrimitiveWrapperType.V1.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.V1,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ NULL(HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ BOOLEAN(HoodieAvroWrapperUtils.PrimitiveWrapperType.BOOLEAN,
ValueType::castToBoolean),
+ INT(HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
ValueType::castToInteger),
+ LONG(HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
ValueType::castToLong),
+ FLOAT(HoodieAvroWrapperUtils.PrimitiveWrapperType.FLOAT,
ValueType::castToFloat),
+ DOUBLE(HoodieAvroWrapperUtils.PrimitiveWrapperType.DOUBLE,
ValueType::castToDouble),
+ STRING(HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
ValueType::castToString),
+ BYTES(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToBytes),
+ FIXED(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToFixed),
+ DECIMAL(BigDecimal.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
+ ValueType::castToDecimal, ValueType::toDecimal, ValueType::fromDecimal),
+ UUID(UUID.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
+ ValueType::castToUUID, ValueType::toUUID, ValueType::fromUUID),
+ DATE(LocalDate.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToDate, ValueType::toDate, ValueType::fromDate),
+ TIME_MILLIS(LocalTime.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToTimeMillis, ValueType::toTimeMillis,
ValueType::fromTimeMillis),
+ TIME_MICROS(LocalTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimeMicros, ValueType::toTimeMicros,
ValueType::fromTimeMicros),
+ TIMESTAMP_MILLIS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMillis, ValueType::toTimestampMillis,
ValueType::fromTimestampMillis),
+ TIMESTAMP_MICROS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMicros, ValueType::toTimestampMicros,
ValueType::fromTimestampMicros),
+ TIMESTAMP_NANOS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampNanos, ValueType::toTimestampNanos,
ValueType::fromTimestampNanos),
+ LOCAL_TIMESTAMP_MILLIS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMillis,
ValueType::toLocalTimestampMillis, ValueType::fromLocalTimestampMillis),
+ LOCAL_TIMESTAMP_MICROS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMicros,
ValueType::toLocalTimestampMicros, ValueType::fromLocalTimestampMicros),
+ LOCAL_TIMESTAMP_NANOS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampNanos, ValueType::toLocalTimestampNanos,
ValueType::fromLocalTimestampNanos);
+
+ private final Class<?> internalType;
+ private final HoodieAvroWrapperUtils.PrimitiveWrapperType
primitiveWrapperType;
+ private final BiFunction<Object, ValueMetadata, Comparable<?>> standardize;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toComplex;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toPrimitive;
+
+ ValueType(HoodieAvroWrapperUtils.PrimitiveWrapperType primitiveWrapperType,
Function<Object, Object> single) {
Review Comment:
nit: what does `single` mean here?
##########
hudi-common/src/main/java/org/apache/hudi/stats/ValueType.java:
##########
@@ -0,0 +1,668 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.stats;
+
+import org.apache.hudi.avro.AvroSchemaUtils;
+import org.apache.hudi.avro.HoodieAvroWrapperUtils;
+import org.apache.hudi.common.util.DateTimeUtils;
+
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.util.Utf8;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeTokenParser;
+import org.apache.parquet.schema.PrimitiveType;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.math.MathContext;
+import java.math.RoundingMode;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneOffset;
+import java.util.Objects;
+import java.util.UUID;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+
+import static org.apache.hudi.avro.HoodieAvroUtils.convertBytesToBigDecimal;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToNanos;
+import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
+import static org.apache.hudi.common.util.DateTimeUtils.nanosToInstant;
+import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
+
+public enum ValueType {
+ V1(HoodieAvroWrapperUtils.PrimitiveWrapperType.V1.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.V1,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ NULL(HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ BOOLEAN(HoodieAvroWrapperUtils.PrimitiveWrapperType.BOOLEAN,
ValueType::castToBoolean),
+ INT(HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
ValueType::castToInteger),
+ LONG(HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
ValueType::castToLong),
+ FLOAT(HoodieAvroWrapperUtils.PrimitiveWrapperType.FLOAT,
ValueType::castToFloat),
+ DOUBLE(HoodieAvroWrapperUtils.PrimitiveWrapperType.DOUBLE,
ValueType::castToDouble),
+ STRING(HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
ValueType::castToString),
+ BYTES(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToBytes),
+ FIXED(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToFixed),
+ DECIMAL(BigDecimal.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
+ ValueType::castToDecimal, ValueType::toDecimal, ValueType::fromDecimal),
+ UUID(UUID.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
+ ValueType::castToUUID, ValueType::toUUID, ValueType::fromUUID),
+ DATE(LocalDate.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToDate, ValueType::toDate, ValueType::fromDate),
+ TIME_MILLIS(LocalTime.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToTimeMillis, ValueType::toTimeMillis,
ValueType::fromTimeMillis),
+ TIME_MICROS(LocalTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimeMicros, ValueType::toTimeMicros,
ValueType::fromTimeMicros),
+ TIMESTAMP_MILLIS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMillis, ValueType::toTimestampMillis,
ValueType::fromTimestampMillis),
+ TIMESTAMP_MICROS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMicros, ValueType::toTimestampMicros,
ValueType::fromTimestampMicros),
+ TIMESTAMP_NANOS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampNanos, ValueType::toTimestampNanos,
ValueType::fromTimestampNanos),
+ LOCAL_TIMESTAMP_MILLIS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMillis,
ValueType::toLocalTimestampMillis, ValueType::fromLocalTimestampMillis),
+ LOCAL_TIMESTAMP_MICROS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMicros,
ValueType::toLocalTimestampMicros, ValueType::fromLocalTimestampMicros),
+ LOCAL_TIMESTAMP_NANOS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampNanos, ValueType::toLocalTimestampNanos,
ValueType::fromLocalTimestampNanos);
+
+ private final Class<?> internalType;
+ private final HoodieAvroWrapperUtils.PrimitiveWrapperType
primitiveWrapperType;
+ private final BiFunction<Object, ValueMetadata, Comparable<?>> standardize;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toComplex;
Review Comment:
"complex" usually refers to records, arrays, maps, etc.
```suggestion
private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toLogical;
```
##########
hudi-common/src/main/java/org/apache/hudi/stats/ValueType.java:
##########
@@ -0,0 +1,668 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.stats;
+
+import org.apache.hudi.avro.AvroSchemaUtils;
+import org.apache.hudi.avro.HoodieAvroWrapperUtils;
+import org.apache.hudi.common.util.DateTimeUtils;
+
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.util.Utf8;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeTokenParser;
+import org.apache.parquet.schema.PrimitiveType;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.math.MathContext;
+import java.math.RoundingMode;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneOffset;
+import java.util.Objects;
+import java.util.UUID;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+
+import static org.apache.hudi.avro.HoodieAvroUtils.convertBytesToBigDecimal;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
+import static org.apache.hudi.common.util.DateTimeUtils.instantToNanos;
+import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
+import static org.apache.hudi.common.util.DateTimeUtils.nanosToInstant;
+import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
+
+public enum ValueType {
+ V1(HoodieAvroWrapperUtils.PrimitiveWrapperType.V1.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.V1,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ NULL(HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL.getClazz(),
HoodieAvroWrapperUtils.PrimitiveWrapperType.NULL,
+ ValueType::passThrough, ValueType::passThrough, ValueType::passThrough),
+ BOOLEAN(HoodieAvroWrapperUtils.PrimitiveWrapperType.BOOLEAN,
ValueType::castToBoolean),
+ INT(HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
ValueType::castToInteger),
+ LONG(HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
ValueType::castToLong),
+ FLOAT(HoodieAvroWrapperUtils.PrimitiveWrapperType.FLOAT,
ValueType::castToFloat),
+ DOUBLE(HoodieAvroWrapperUtils.PrimitiveWrapperType.DOUBLE,
ValueType::castToDouble),
+ STRING(HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
ValueType::castToString),
+ BYTES(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToBytes),
+ FIXED(HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
ValueType::castToFixed),
+ DECIMAL(BigDecimal.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.BYTES,
+ ValueType::castToDecimal, ValueType::toDecimal, ValueType::fromDecimal),
+ UUID(UUID.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.STRING,
+ ValueType::castToUUID, ValueType::toUUID, ValueType::fromUUID),
+ DATE(LocalDate.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToDate, ValueType::toDate, ValueType::fromDate),
+ TIME_MILLIS(LocalTime.class, HoodieAvroWrapperUtils.PrimitiveWrapperType.INT,
+ ValueType::castToTimeMillis, ValueType::toTimeMillis,
ValueType::fromTimeMillis),
+ TIME_MICROS(LocalTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimeMicros, ValueType::toTimeMicros,
ValueType::fromTimeMicros),
+ TIMESTAMP_MILLIS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMillis, ValueType::toTimestampMillis,
ValueType::fromTimestampMillis),
+ TIMESTAMP_MICROS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampMicros, ValueType::toTimestampMicros,
ValueType::fromTimestampMicros),
+ TIMESTAMP_NANOS(Instant.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToTimestampNanos, ValueType::toTimestampNanos,
ValueType::fromTimestampNanos),
+ LOCAL_TIMESTAMP_MILLIS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMillis,
ValueType::toLocalTimestampMillis, ValueType::fromLocalTimestampMillis),
+ LOCAL_TIMESTAMP_MICROS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampMicros,
ValueType::toLocalTimestampMicros, ValueType::fromLocalTimestampMicros),
+ LOCAL_TIMESTAMP_NANOS(LocalDateTime.class,
HoodieAvroWrapperUtils.PrimitiveWrapperType.LONG,
+ ValueType::castToLocalTimestampNanos, ValueType::toLocalTimestampNanos,
ValueType::fromLocalTimestampNanos);
+
+ private final Class<?> internalType;
+ private final HoodieAvroWrapperUtils.PrimitiveWrapperType
primitiveWrapperType;
+ private final BiFunction<Object, ValueMetadata, Comparable<?>> standardize;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toComplex;
+ private final BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toPrimitive;
+
+ ValueType(HoodieAvroWrapperUtils.PrimitiveWrapperType primitiveWrapperType,
Function<Object, Object> single) {
+ this(primitiveWrapperType.getClazz(),
+ primitiveWrapperType,
+ (val, meta) -> (Comparable<?>) single.apply(val),
+ ValueType::passThrough,
+ ValueType::passThrough);
+ }
+
+ ValueType(Class<?> internalType,
+ HoodieAvroWrapperUtils.PrimitiveWrapperType primitiveWrapperType,
+ BiFunction<Object, ValueMetadata, Comparable<?>> standardize,
+ BiFunction<Comparable<?>, ValueMetadata, Comparable<?>> toComplex,
+ BiFunction<Comparable<?>, ValueMetadata, Comparable<?>>
toPrimitive) {
+ this.internalType = internalType;
+ this.primitiveWrapperType = primitiveWrapperType;
+ this.standardize = standardize;
+ this.toComplex = toComplex;
+ this.toPrimitive = toPrimitive;
+ }
+
+ Comparable<?> standardizeJavaTypeAndPromote(Object val, ValueMetadata meta) {
+ if (val == null) {
+ return null;
+ }
+ return standardize.apply(val, meta);
+ }
+
+ private Comparable<?> convertIntoPrimitive(Comparable<?> val, ValueMetadata
meta) {
+ if (val == null) {
+ return null;
+ }
+ return toPrimitive.apply(val, meta);
+ }
+
+ private Comparable<?> convertIntoComplex(Comparable<?> val, ValueMetadata
meta) {
+ if (val == null) {
+ return null;
+ }
+ return toComplex.apply(val, meta);
+ }
+
+ void validate(Object val) {
+ if (val == null) {
+ return;
+ }
+
+ if (!internalType.isInstance(val)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ internalType.getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ }
+
+ public Object wrapValue(Comparable<?> val, ValueMetadata meta) {
+ if (meta.getValueType() == V1) {
+ return primitiveWrapperType.wrap(val);
+ }
+
+ if (val == null) {
+ return null;
+ }
+ if (!this.internalType.isInstance(val)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ this.internalType.getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ return primitiveWrapperType.wrap(convertIntoPrimitive(val, meta));
+ }
+
+ public Comparable<?> unwrapValue(Object val, ValueMetadata meta) {
+ if (meta.getValueType() == V1) {
+ return primitiveWrapperType.unwrap(val);
+ }
+
+ if (val == null) {
+ return null;
+ }
+
+ if (!primitiveWrapperType.getWrapperClass().isInstance(val)) {
+ if (!(val instanceof GenericRecord)) {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ primitiveWrapperType.getWrapperClass().getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ } else if (((GenericRecord) val).getSchema().getField("value") != null) {
+ return
standardizeJavaTypeAndPromote(HoodieAvroWrapperUtils.unwrapGenericRecord(val),
meta);
+ } else {
+ throw new IllegalArgumentException(String.format(
+ "should be %s, but got %s",
+ primitiveWrapperType.getWrapperClass().getSimpleName(),
+ val.getClass().getSimpleName()
+ ));
+ }
+ }
+ return convertIntoComplex(primitiveWrapperType.unwrap(val), meta);
+ }
+
+ private static ValueType[] myEnumValues;
+
+ public static ValueType fromInt(int i) {
+ if (ValueType.myEnumValues == null) {
+ ValueType.myEnumValues = ValueType.values();
+ }
+ return ValueType.myEnumValues[i];
+ }
+
+ public static ValueType fromPrimitiveType(PrimitiveType primitiveType) {
+ if (primitiveType.getLogicalTypeAnnotation() != null) {
+ return LogicalTypeTokenParser.fromLogicalTypeAnnotation(primitiveType);
+ }
+ switch (primitiveType.getPrimitiveTypeName()) {
+ case INT64:
+ return ValueType.LONG;
+ case INT32:
+ return ValueType.INT;
+ case BOOLEAN:
+ return ValueType.BOOLEAN;
+ case BINARY:
+ return ValueType.BYTES;
+ case FLOAT:
+ return ValueType.FLOAT;
+ case DOUBLE:
+ return ValueType.DOUBLE;
+ case INT96:
+ // TODO: probably wrong
+ return ValueType.DECIMAL;
Review Comment:
I'm not sure if INT96 is supported; I think Spark has deprecated writing
INT96 in Parquet.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]