This is an automated email from the ASF dual-hosted git repository.
jonvex pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new ad79968bca78 [MINOR] Move Avro Wrapper Utils from HoodieAvroUtils to
HoodieAvroWrapperUtils (#13730)
ad79968bca78 is described below
commit ad79968bca78ba33a87d13937b297251d1bedd9b
Author: Jon Vexler <[email protected]>
AuthorDate: Sun Aug 17 17:48:01 2025 -0400
[MINOR] Move Avro Wrapper Utils from HoodieAvroUtils to
HoodieAvroWrapperUtils (#13730)
---
.../apache/hudi/index/bloom/HoodieBloomIndex.java | 2 +-
.../java/org/apache/hudi/avro/HoodieAvroUtils.java | 201 -----------------
.../apache/hudi/avro/HoodieAvroWrapperUtils.java | 241 +++++++++++++++++++++
.../common/model/HoodieColumnRangeMetadata.java | 2 +-
.../common/table/log/block/HoodieDeleteBlock.java | 6 +-
.../hudi/metadata/HoodieMetadataPayload.java | 2 +-
.../hudi/metadata/HoodieTableMetadataUtil.java | 4 +-
.../hudi/metadata/MetadataPartitionType.java | 4 +-
.../org/apache/hudi/avro/TestHoodieAvroUtils.java | 4 +-
.../hudi/functional/TestHoodieBackedMetadata.java | 2 +-
.../TestMetadataTableWithSparkDataSource.scala | 10 +-
11 files changed, 259 insertions(+), 219 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
index e6f530a01d2d..4e9340664e20 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
@@ -52,7 +52,7 @@ import java.util.stream.Stream;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.mapping;
import static java.util.stream.Collectors.toList;
-import static org.apache.hudi.avro.HoodieAvroUtils.unwrapAvroValueWrapper;
+import static
org.apache.hudi.avro.HoodieAvroWrapperUtils.unwrapAvroValueWrapper;
import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
import static
org.apache.hudi.index.HoodieIndexUtils.getLatestBaseFilesForAllPartitions;
import static org.apache.hudi.metadata.MetadataPartitionType.COLUMN_STATS;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
index c74c6a698831..ff35d70158c7 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
@@ -18,34 +18,19 @@
package org.apache.hudi.avro;
-import org.apache.hudi.avro.model.ArrayWrapper;
-import org.apache.hudi.avro.model.BooleanWrapper;
-import org.apache.hudi.avro.model.BytesWrapper;
-import org.apache.hudi.avro.model.DateWrapper;
-import org.apache.hudi.avro.model.DecimalWrapper;
-import org.apache.hudi.avro.model.DoubleWrapper;
-import org.apache.hudi.avro.model.FloatWrapper;
-import org.apache.hudi.avro.model.IntWrapper;
-import org.apache.hudi.avro.model.LocalDateWrapper;
-import org.apache.hudi.avro.model.LongWrapper;
-import org.apache.hudi.avro.model.StringWrapper;
-import org.apache.hudi.avro.model.TimestampMicrosWrapper;
import org.apache.hudi.common.config.SerializableSchema;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.OrderingValues;
import org.apache.hudi.common.util.SpillableMapUtils;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.VisibleForTesting;
-import org.apache.hudi.common.util.collection.ArrayComparable;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieAvroSchemaException;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.SchemaCompatibilityException;
-import org.apache.hudi.util.Lazy;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Conversions;
@@ -70,7 +55,6 @@ import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.JsonDecoder;
import org.apache.avro.io.JsonEncoder;
import org.apache.avro.specific.SpecificRecordBase;
-import org.apache.avro.util.Utf8;
import javax.annotation.Nullable;
@@ -84,7 +68,6 @@ import java.math.RoundingMode;
import java.nio.ByteBuffer;
import java.sql.Date;
import java.sql.Timestamp;
-import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.ZonedDateTime;
@@ -115,11 +98,8 @@ import static
org.apache.hudi.avro.AvroSchemaUtils.createNewSchemaFromFieldsWith
import static org.apache.hudi.avro.AvroSchemaUtils.createNullableSchema;
import static org.apache.hudi.avro.AvroSchemaUtils.isNullable;
import static org.apache.hudi.avro.AvroSchemaUtils.resolveNullableSchema;
-import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
-import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
import static org.apache.hudi.common.util.ValidationUtils.checkState;
-import static
org.apache.hudi.metadata.HoodieTableMetadataUtil.tryUpcastDecimal;
/**
* Helper class to do common stuff across Avro.
@@ -131,35 +111,6 @@ public class HoodieAvroUtils {
private static final ThreadLocal<BinaryEncoder> BINARY_ENCODER =
ThreadLocal.withInitial(() -> null);
private static final ThreadLocal<BinaryDecoder> BINARY_DECODER =
ThreadLocal.withInitial(() -> null);
- private static final Conversions.DecimalConversion AVRO_DECIMAL_CONVERSION =
new Conversions.DecimalConversion();
- /**
- * NOTE: PLEASE READ CAREFULLY
- * <p>
- * In Avro 1.10 generated builders rely on {@code SpecificData.getForSchema}
invocation that in turn
- * does use reflection to load the code-gen'd class corresponding to the
Avro record model. This has
- * serious adverse effects in terms of performance when gets executed on the
hot-path (both, in terms
- * of runtime and efficiency).
- * <p>
- * To work this around instead of using default code-gen'd builder invoking
{@code SpecificData.getForSchema},
- * we instead rely on overloaded ctor accepting another instance of the
builder: {@code Builder(Builder)},
- * which bypasses such invocation. Following corresponding builder's stubs
are statically initialized
- * to be used exactly for that purpose.
- * <p>
- * You can find more details in HUDI-3834.
- */
- private static final Lazy<StringWrapper.Builder> STRING_WRAPPER_BUILDER_STUB
= Lazy.lazily(StringWrapper::newBuilder);
- private static final Lazy<BytesWrapper.Builder> BYTES_WRAPPER_BUILDER_STUB =
Lazy.lazily(BytesWrapper::newBuilder);
- private static final Lazy<DoubleWrapper.Builder> DOUBLE_WRAPPER_BUILDER_STUB
= Lazy.lazily(DoubleWrapper::newBuilder);
- private static final Lazy<FloatWrapper.Builder> FLOAT_WRAPPER_BUILDER_STUB =
Lazy.lazily(FloatWrapper::newBuilder);
- private static final Lazy<LongWrapper.Builder> LONG_WRAPPER_BUILDER_STUB =
Lazy.lazily(LongWrapper::newBuilder);
- private static final Lazy<IntWrapper.Builder> INT_WRAPPER_BUILDER_STUB =
Lazy.lazily(IntWrapper::newBuilder);
- private static final Lazy<BooleanWrapper.Builder>
BOOLEAN_WRAPPER_BUILDER_STUB = Lazy.lazily(BooleanWrapper::newBuilder);
- private static final Lazy<TimestampMicrosWrapper.Builder>
TIMESTAMP_MICROS_WRAPPER_BUILDER_STUB =
Lazy.lazily(TimestampMicrosWrapper::newBuilder);
- private static final Lazy<DecimalWrapper.Builder>
DECIMAL_WRAPPER_BUILDER_STUB = Lazy.lazily(DecimalWrapper::newBuilder);
- private static final Lazy<DateWrapper.Builder> DATE_WRAPPER_BUILDER_STUB =
Lazy.lazily(DateWrapper::newBuilder);
- private static final Lazy<LocalDateWrapper.Builder>
LOCAL_DATE_WRAPPER_BUILDER_STUB = Lazy.lazily(LocalDateWrapper::newBuilder);
- private static final Lazy<ArrayWrapper.Builder> ARRAY_WRAPPER_BUILDER_STUB =
Lazy.lazily(ArrayWrapper::newBuilder);
-
private static final long MILLIS_PER_DAY = 86400000L;
//Export for test
@@ -1580,158 +1531,6 @@ public class HoodieAvroUtils {
return StringUtils.compareVersions(AVRO_VERSION, "1.10") >= 0;
}
- /**
- * Wraps a value into Avro type wrapper.
- *
- * @param value Java value.
- * @return A wrapped value with Avro type wrapper.
- */
- public static Object wrapValueIntoAvro(Comparable<?> value) {
- if (value == null) {
- return null;
- } else if (value instanceof Date) {
- // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we
can't
- // rely on logical types to do proper encoding of the native Java
types,
- // and hereby have to encode value manually
- LocalDate localDate = ((Date) value).toLocalDate();
- return DateWrapper.newBuilder(DATE_WRAPPER_BUILDER_STUB.get())
- .setValue((int) localDate.toEpochDay())
- .build();
- } else if (value instanceof LocalDate) {
- // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we
can't
- // rely on logical types to do proper encoding of the native Java
types,
- // and hereby have to encode value manually
- LocalDate localDate = (LocalDate) value;
- return LocalDateWrapper.newBuilder(LOCAL_DATE_WRAPPER_BUILDER_STUB.get())
- .setValue((int) localDate.toEpochDay())
- .build();
- } else if (value instanceof BigDecimal) {
- Schema valueSchema = DecimalWrapper.SCHEMA$.getField("value").schema();
- BigDecimal upcastDecimal = tryUpcastDecimal((BigDecimal) value,
(LogicalTypes.Decimal) valueSchema.getLogicalType());
- return DecimalWrapper.newBuilder(DECIMAL_WRAPPER_BUILDER_STUB.get())
- .setValue(AVRO_DECIMAL_CONVERSION.toBytes(upcastDecimal,
valueSchema, valueSchema.getLogicalType()))
- .build();
- } else if (value instanceof Timestamp) {
- // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we
can't
- // rely on logical types to do proper encoding of the native Java
types,
- // and hereby have to encode value manually
- Instant instant = ((Timestamp) value).toInstant();
- return
TimestampMicrosWrapper.newBuilder(TIMESTAMP_MICROS_WRAPPER_BUILDER_STUB.get())
- .setValue(instantToMicros(instant))
- .build();
- } else if (value instanceof Boolean) {
- return
BooleanWrapper.newBuilder(BOOLEAN_WRAPPER_BUILDER_STUB.get()).setValue((Boolean)
value).build();
- } else if (value instanceof Integer) {
- return
IntWrapper.newBuilder(INT_WRAPPER_BUILDER_STUB.get()).setValue((Integer)
value).build();
- } else if (value instanceof Long) {
- return
LongWrapper.newBuilder(LONG_WRAPPER_BUILDER_STUB.get()).setValue((Long)
value).build();
- } else if (value instanceof Float) {
- return
FloatWrapper.newBuilder(FLOAT_WRAPPER_BUILDER_STUB.get()).setValue((Float)
value).build();
- } else if (value instanceof Double) {
- return
DoubleWrapper.newBuilder(DOUBLE_WRAPPER_BUILDER_STUB.get()).setValue((Double)
value).build();
- } else if (value instanceof ByteBuffer) {
- return
BytesWrapper.newBuilder(BYTES_WRAPPER_BUILDER_STUB.get()).setValue((ByteBuffer)
value).build();
- } else if (value instanceof String || value instanceof Utf8) {
- return
StringWrapper.newBuilder(STRING_WRAPPER_BUILDER_STUB.get()).setValue(value.toString()).build();
- } else if (value instanceof ArrayComparable) {
- List<Object> avroValues = OrderingValues.getValues((ArrayComparable)
value).stream().map(HoodieAvroUtils::wrapValueIntoAvro).collect(Collectors.toList());
- return
ArrayWrapper.newBuilder(ARRAY_WRAPPER_BUILDER_STUB.get()).setWrappedValues(avroValues).build();
- } else {
- throw new UnsupportedOperationException(String.format("Unsupported type
of the value (%s)", value.getClass()));
- }
- }
-
- /**
- * Unwraps Avro value wrapper into Java value.
- *
- * @param avroValueWrapper A wrapped value with Avro type wrapper.
- * @return Java value.
- */
- public static Comparable<?> unwrapAvroValueWrapper(Object avroValueWrapper) {
- if (avroValueWrapper == null) {
- return null;
- }
-
- Pair<Boolean, String> isValueWrapperObfuscated =
getIsValueWrapperObfuscated(avroValueWrapper);
- if (isValueWrapperObfuscated.getKey()) {
- return unwrapAvroValueWrapper(avroValueWrapper,
isValueWrapperObfuscated.getValue());
- }
-
- if (avroValueWrapper instanceof DateWrapper) {
- return Date.valueOf(LocalDate.ofEpochDay(((DateWrapper)
avroValueWrapper).getValue()));
- } else if (avroValueWrapper instanceof LocalDateWrapper) {
- return LocalDate.ofEpochDay(((LocalDateWrapper)
avroValueWrapper).getValue());
- } else if (avroValueWrapper instanceof DecimalWrapper) {
- Schema valueSchema = DecimalWrapper.SCHEMA$.getField("value").schema();
- return AVRO_DECIMAL_CONVERSION.fromBytes(((DecimalWrapper)
avroValueWrapper).getValue(), valueSchema, valueSchema.getLogicalType());
- } else if (avroValueWrapper instanceof TimestampMicrosWrapper) {
- return microsToInstant(((TimestampMicrosWrapper)
avroValueWrapper).getValue());
- } else if (avroValueWrapper instanceof BooleanWrapper) {
- return ((BooleanWrapper) avroValueWrapper).getValue();
- } else if (avroValueWrapper instanceof IntWrapper) {
- return ((IntWrapper) avroValueWrapper).getValue();
- } else if (avroValueWrapper instanceof LongWrapper) {
- return ((LongWrapper) avroValueWrapper).getValue();
- } else if (avroValueWrapper instanceof FloatWrapper) {
- return ((FloatWrapper) avroValueWrapper).getValue();
- } else if (avroValueWrapper instanceof DoubleWrapper) {
- return ((DoubleWrapper) avroValueWrapper).getValue();
- } else if (avroValueWrapper instanceof BytesWrapper) {
- return ((BytesWrapper) avroValueWrapper).getValue();
- } else if (avroValueWrapper instanceof StringWrapper) {
- return ((StringWrapper) avroValueWrapper).getValue();
- } else if (avroValueWrapper instanceof ArrayWrapper) {
- ArrayWrapper arrayWrapper = (ArrayWrapper) avroValueWrapper;
- return OrderingValues.create(arrayWrapper.getWrappedValues().stream()
- .map(HoodieAvroUtils::unwrapAvroValueWrapper)
- .toArray(Comparable[]::new));
- } else if (avroValueWrapper instanceof GenericRecord) {
- // NOTE: This branch could be hit b/c Avro records could be reconstructed
- // as {@code GenericRecord)
- // TODO add logical type decoding
- GenericRecord genRec = (GenericRecord) avroValueWrapper;
- return (Comparable<?>) genRec.get("value");
- } else {
- throw new UnsupportedOperationException(String.format("Unsupported type
of the value (%s)", avroValueWrapper.getClass()));
- }
- }
-
- public static Comparable<?> unwrapAvroValueWrapper(Object avroValueWrapper,
String wrapperClassName) {
- if (avroValueWrapper == null) {
- return null;
- } else if (DateWrapper.class.getSimpleName().equals(wrapperClassName)) {
- ValidationUtils.checkArgument(avroValueWrapper instanceof GenericRecord);
- return Date.valueOf(LocalDate.ofEpochDay((Integer) ((GenericRecord)
avroValueWrapper).get(0)));
- } else if
(LocalDateWrapper.class.getSimpleName().equals(wrapperClassName)) {
- ValidationUtils.checkArgument(avroValueWrapper instanceof GenericRecord);
- return LocalDate.ofEpochDay((Integer) ((GenericRecord)
avroValueWrapper).get(0));
- } else if
(TimestampMicrosWrapper.class.getSimpleName().equals(wrapperClassName)) {
- ValidationUtils.checkArgument(avroValueWrapper instanceof GenericRecord);
- Instant instant = microsToInstant((Long) ((GenericRecord)
avroValueWrapper).get(0));
- return Timestamp.from(instant);
- } else if (DecimalWrapper.class.getSimpleName().equals(wrapperClassName)) {
- Schema valueSchema = DecimalWrapper.SCHEMA$.getField("value").schema();
- ValidationUtils.checkArgument(avroValueWrapper instanceof GenericRecord);
- return AVRO_DECIMAL_CONVERSION.fromBytes((ByteBuffer)((GenericRecord)
avroValueWrapper).get(0), valueSchema, valueSchema.getLogicalType());
- } else {
- throw new UnsupportedOperationException(String.format("Unsupported type
of the value (%s)", avroValueWrapper.getClass()));
- }
- }
-
- private static Pair<Boolean, String> getIsValueWrapperObfuscated(Object
statsValue) {
- if (statsValue != null) {
- String statsValueSchemaClassName = ((GenericRecord)
statsValue).getSchema().getName();
- boolean toReturn =
statsValueSchemaClassName.equals(DateWrapper.class.getSimpleName())
- ||
statsValueSchemaClassName.equals(LocalDateWrapper.class.getSimpleName())
- ||
statsValueSchemaClassName.equals(TimestampMicrosWrapper.class.getSimpleName())
- ||
statsValueSchemaClassName.equals(DecimalWrapper.class.getSimpleName());
- if (toReturn) {
- return Pair.of(true, ((GenericRecord)
statsValue).getSchema().getName());
- }
- }
- return Pair.of(false, null);
- }
-
/**
* Returns field name and the resp data type of the field. The data type
will always refer to the leaf node.
* for eg, for a.b.c, we turn Pair.of(a.b.c, DataType(c))
diff --git
a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWrapperUtils.java
b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWrapperUtils.java
new file mode 100644
index 000000000000..3ba3ecb63658
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWrapperUtils.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.avro;
+
+import org.apache.hudi.avro.model.ArrayWrapper;
+import org.apache.hudi.avro.model.BooleanWrapper;
+import org.apache.hudi.avro.model.BytesWrapper;
+import org.apache.hudi.avro.model.DateWrapper;
+import org.apache.hudi.avro.model.DecimalWrapper;
+import org.apache.hudi.avro.model.DoubleWrapper;
+import org.apache.hudi.avro.model.FloatWrapper;
+import org.apache.hudi.avro.model.IntWrapper;
+import org.apache.hudi.avro.model.LocalDateWrapper;
+import org.apache.hudi.avro.model.LongWrapper;
+import org.apache.hudi.avro.model.StringWrapper;
+import org.apache.hudi.avro.model.TimestampMicrosWrapper;
+import org.apache.hudi.common.util.OrderingValues;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.collection.ArrayComparable;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.util.Lazy;
+
+import org.apache.avro.Conversions;
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.util.Utf8;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros;
+import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant;
+import static
org.apache.hudi.metadata.HoodieTableMetadataUtil.tryUpcastDecimal;
+
+public class HoodieAvroWrapperUtils {
+
+ private static final Conversions.DecimalConversion AVRO_DECIMAL_CONVERSION =
new Conversions.DecimalConversion();
+ /**
+ * NOTE: PLEASE READ CAREFULLY
+ * <p>
+ * In Avro 1.10 generated builders rely on {@code SpecificData.getForSchema}
invocation that in turn
+ * does use reflection to load the code-gen'd class corresponding to the
Avro record model. This has
+ * serious adverse effects in terms of performance when gets executed on the
hot-path (both, in terms
+ * of runtime and efficiency).
+ * <p>
+ * To work this around instead of using default code-gen'd builder invoking
{@code SpecificData.getForSchema},
+ * we instead rely on overloaded ctor accepting another instance of the
builder: {@code Builder(Builder)},
+ * which bypasses such invocation. Following corresponding builder's stubs
are statically initialized
+ * to be used exactly for that purpose.
+ * <p>
+ * You can find more details in HUDI-3834.
+ */
+ private static final Lazy<StringWrapper.Builder> STRING_WRAPPER_BUILDER_STUB
= Lazy.lazily(StringWrapper::newBuilder);
+ private static final Lazy<BytesWrapper.Builder> BYTES_WRAPPER_BUILDER_STUB =
Lazy.lazily(BytesWrapper::newBuilder);
+ private static final Lazy<DoubleWrapper.Builder> DOUBLE_WRAPPER_BUILDER_STUB
= Lazy.lazily(DoubleWrapper::newBuilder);
+ private static final Lazy<FloatWrapper.Builder> FLOAT_WRAPPER_BUILDER_STUB =
Lazy.lazily(FloatWrapper::newBuilder);
+ private static final Lazy<LongWrapper.Builder> LONG_WRAPPER_BUILDER_STUB =
Lazy.lazily(LongWrapper::newBuilder);
+ private static final Lazy<IntWrapper.Builder> INT_WRAPPER_BUILDER_STUB =
Lazy.lazily(IntWrapper::newBuilder);
+ private static final Lazy<BooleanWrapper.Builder>
BOOLEAN_WRAPPER_BUILDER_STUB = Lazy.lazily(BooleanWrapper::newBuilder);
+ private static final Lazy<TimestampMicrosWrapper.Builder>
TIMESTAMP_MICROS_WRAPPER_BUILDER_STUB =
Lazy.lazily(TimestampMicrosWrapper::newBuilder);
+ private static final Lazy<DecimalWrapper.Builder>
DECIMAL_WRAPPER_BUILDER_STUB = Lazy.lazily(DecimalWrapper::newBuilder);
+ private static final Lazy<DateWrapper.Builder> DATE_WRAPPER_BUILDER_STUB =
Lazy.lazily(DateWrapper::newBuilder);
+ private static final Lazy<LocalDateWrapper.Builder>
LOCAL_DATE_WRAPPER_BUILDER_STUB = Lazy.lazily(LocalDateWrapper::newBuilder);
+ private static final Lazy<ArrayWrapper.Builder> ARRAY_WRAPPER_BUILDER_STUB =
Lazy.lazily(ArrayWrapper::newBuilder);
+
+ /**
+ * Wraps a value into Avro type wrapper.
+ *
+ * @param value Java value.
+ * @return A wrapped value with Avro type wrapper.
+ */
+ public static Object wrapValueIntoAvro(Comparable<?> value) {
+ if (value == null) {
+ return null;
+ } else if (value instanceof Date) {
+ // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we
can't
+ // rely on logical types to do proper encoding of the native Java
types,
+ // and hereby have to encode value manually
+ LocalDate localDate = ((Date) value).toLocalDate();
+ return DateWrapper.newBuilder(DATE_WRAPPER_BUILDER_STUB.get())
+ .setValue((int) localDate.toEpochDay())
+ .build();
+ } else if (value instanceof LocalDate) {
+ // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we
can't
+ // rely on logical types to do proper encoding of the native Java
types,
+ // and hereby have to encode value manually
+ LocalDate localDate = (LocalDate) value;
+ return LocalDateWrapper.newBuilder(LOCAL_DATE_WRAPPER_BUILDER_STUB.get())
+ .setValue((int) localDate.toEpochDay())
+ .build();
+ } else if (value instanceof BigDecimal) {
+ Schema valueSchema = DecimalWrapper.SCHEMA$.getField("value").schema();
+ BigDecimal upcastDecimal = tryUpcastDecimal((BigDecimal) value,
(LogicalTypes.Decimal) valueSchema.getLogicalType());
+ return DecimalWrapper.newBuilder(DECIMAL_WRAPPER_BUILDER_STUB.get())
+ .setValue(AVRO_DECIMAL_CONVERSION.toBytes(upcastDecimal,
valueSchema, valueSchema.getLogicalType()))
+ .build();
+ } else if (value instanceof Timestamp) {
+ // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we
can't
+ // rely on logical types to do proper encoding of the native Java
types,
+ // and hereby have to encode value manually
+ Instant instant = ((Timestamp) value).toInstant();
+ return
TimestampMicrosWrapper.newBuilder(TIMESTAMP_MICROS_WRAPPER_BUILDER_STUB.get())
+ .setValue(instantToMicros(instant))
+ .build();
+ } else if (value instanceof Boolean) {
+ return
BooleanWrapper.newBuilder(BOOLEAN_WRAPPER_BUILDER_STUB.get()).setValue((Boolean)
value).build();
+ } else if (value instanceof Integer) {
+ return
IntWrapper.newBuilder(INT_WRAPPER_BUILDER_STUB.get()).setValue((Integer)
value).build();
+ } else if (value instanceof Long) {
+ return
LongWrapper.newBuilder(LONG_WRAPPER_BUILDER_STUB.get()).setValue((Long)
value).build();
+ } else if (value instanceof Float) {
+ return
FloatWrapper.newBuilder(FLOAT_WRAPPER_BUILDER_STUB.get()).setValue((Float)
value).build();
+ } else if (value instanceof Double) {
+ return
DoubleWrapper.newBuilder(DOUBLE_WRAPPER_BUILDER_STUB.get()).setValue((Double)
value).build();
+ } else if (value instanceof ByteBuffer) {
+ return
BytesWrapper.newBuilder(BYTES_WRAPPER_BUILDER_STUB.get()).setValue((ByteBuffer)
value).build();
+ } else if (value instanceof String || value instanceof Utf8) {
+ return
StringWrapper.newBuilder(STRING_WRAPPER_BUILDER_STUB.get()).setValue(value.toString()).build();
+ } else if (value instanceof ArrayComparable) {
+ List<Object> avroValues = OrderingValues.getValues((ArrayComparable)
value).stream().map(HoodieAvroWrapperUtils::wrapValueIntoAvro).collect(Collectors.toList());
+ return
ArrayWrapper.newBuilder(ARRAY_WRAPPER_BUILDER_STUB.get()).setWrappedValues(avroValues).build();
+ } else {
+ throw new UnsupportedOperationException(String.format("Unsupported type
of the value (%s)", value.getClass()));
+ }
+ }
+
+ /**
+ * Unwraps Avro value wrapper into Java value.
+ *
+ * @param avroValueWrapper A wrapped value with Avro type wrapper.
+ * @return Java value.
+ */
+ public static Comparable<?> unwrapAvroValueWrapper(Object avroValueWrapper) {
+ if (avroValueWrapper == null) {
+ return null;
+ }
+
+ Pair<Boolean, String> isValueWrapperObfuscated =
getIsValueWrapperObfuscated(avroValueWrapper);
+ if (isValueWrapperObfuscated.getKey()) {
+ return unwrapAvroValueWrapper(avroValueWrapper,
isValueWrapperObfuscated.getValue());
+ }
+
+ if (avroValueWrapper instanceof DateWrapper) {
+ return Date.valueOf(LocalDate.ofEpochDay(((DateWrapper)
avroValueWrapper).getValue()));
+ } else if (avroValueWrapper instanceof LocalDateWrapper) {
+ return LocalDate.ofEpochDay(((LocalDateWrapper)
avroValueWrapper).getValue());
+ } else if (avroValueWrapper instanceof DecimalWrapper) {
+ Schema valueSchema = DecimalWrapper.SCHEMA$.getField("value").schema();
+ return AVRO_DECIMAL_CONVERSION.fromBytes(((DecimalWrapper)
avroValueWrapper).getValue(), valueSchema, valueSchema.getLogicalType());
+ } else if (avroValueWrapper instanceof TimestampMicrosWrapper) {
+ return microsToInstant(((TimestampMicrosWrapper)
avroValueWrapper).getValue());
+ } else if (avroValueWrapper instanceof BooleanWrapper) {
+ return ((BooleanWrapper) avroValueWrapper).getValue();
+ } else if (avroValueWrapper instanceof IntWrapper) {
+ return ((IntWrapper) avroValueWrapper).getValue();
+ } else if (avroValueWrapper instanceof LongWrapper) {
+ return ((LongWrapper) avroValueWrapper).getValue();
+ } else if (avroValueWrapper instanceof FloatWrapper) {
+ return ((FloatWrapper) avroValueWrapper).getValue();
+ } else if (avroValueWrapper instanceof DoubleWrapper) {
+ return ((DoubleWrapper) avroValueWrapper).getValue();
+ } else if (avroValueWrapper instanceof BytesWrapper) {
+ return ((BytesWrapper) avroValueWrapper).getValue();
+ } else if (avroValueWrapper instanceof StringWrapper) {
+ return ((StringWrapper) avroValueWrapper).getValue();
+ } else if (avroValueWrapper instanceof ArrayWrapper) {
+ ArrayWrapper arrayWrapper = (ArrayWrapper) avroValueWrapper;
+ return OrderingValues.create(arrayWrapper.getWrappedValues().stream()
+ .map(HoodieAvroWrapperUtils::unwrapAvroValueWrapper)
+ .toArray(Comparable[]::new));
+ } else if (avroValueWrapper instanceof GenericRecord) {
+ // NOTE: This branch could be hit b/c Avro records could be reconstructed
+ // as {@code GenericRecord)
+ // TODO add logical type decoding
+ GenericRecord genRec = (GenericRecord) avroValueWrapper;
+ return (Comparable<?>) genRec.get("value");
+ } else {
+ throw new UnsupportedOperationException(String.format("Unsupported type
of the value (%s)", avroValueWrapper.getClass()));
+ }
+ }
+
+ public static Comparable<?> unwrapAvroValueWrapper(Object avroValueWrapper,
String wrapperClassName) {
+ if (avroValueWrapper == null) {
+ return null;
+ } else if (DateWrapper.class.getSimpleName().equals(wrapperClassName)) {
+ ValidationUtils.checkArgument(avroValueWrapper instanceof GenericRecord);
+ return Date.valueOf(LocalDate.ofEpochDay((Integer) ((GenericRecord)
avroValueWrapper).get(0)));
+ } else if
(LocalDateWrapper.class.getSimpleName().equals(wrapperClassName)) {
+ ValidationUtils.checkArgument(avroValueWrapper instanceof GenericRecord);
+ return LocalDate.ofEpochDay((Integer) ((GenericRecord)
avroValueWrapper).get(0));
+ } else if
(TimestampMicrosWrapper.class.getSimpleName().equals(wrapperClassName)) {
+ ValidationUtils.checkArgument(avroValueWrapper instanceof GenericRecord);
+ Instant instant = microsToInstant((Long) ((GenericRecord)
avroValueWrapper).get(0));
+ return Timestamp.from(instant);
+ } else if (DecimalWrapper.class.getSimpleName().equals(wrapperClassName)) {
+ Schema valueSchema = DecimalWrapper.SCHEMA$.getField("value").schema();
+ ValidationUtils.checkArgument(avroValueWrapper instanceof GenericRecord);
+ return AVRO_DECIMAL_CONVERSION.fromBytes((ByteBuffer)((GenericRecord)
avroValueWrapper).get(0), valueSchema, valueSchema.getLogicalType());
+ } else {
+ throw new UnsupportedOperationException(String.format("Unsupported type
of the value (%s)", avroValueWrapper.getClass()));
+ }
+ }
+
+ private static Pair<Boolean, String> getIsValueWrapperObfuscated(Object
statsValue) {
+ if (statsValue != null) {
+ String statsValueSchemaClassName = ((GenericRecord)
statsValue).getSchema().getName();
+ boolean toReturn =
statsValueSchemaClassName.equals(DateWrapper.class.getSimpleName())
+ ||
statsValueSchemaClassName.equals(LocalDateWrapper.class.getSimpleName())
+ ||
statsValueSchemaClassName.equals(TimestampMicrosWrapper.class.getSimpleName())
+ ||
statsValueSchemaClassName.equals(DecimalWrapper.class.getSimpleName());
+ if (toReturn) {
+ return Pair.of(true, ((GenericRecord)
statsValue).getSchema().getName());
+ }
+ }
+ return Pair.of(false, null);
+ }
+}
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
index 2e979e29e15f..e5325d7424a1 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
@@ -26,7 +26,7 @@ import javax.annotation.Nullable;
import java.io.Serializable;
import java.util.Objects;
-import static org.apache.hudi.avro.HoodieAvroUtils.unwrapAvroValueWrapper;
+import static
org.apache.hudi.avro.HoodieAvroWrapperUtils.unwrapAvroValueWrapper;
/**
* Hoodie metadata for the column range of data stored in columnar format
(like Parquet)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
index 5d2f02ec21fa..8f648e8efc64 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
@@ -18,7 +18,6 @@
package org.apache.hudi.common.table.log.block;
-import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieDeleteRecord;
import org.apache.hudi.avro.model.HoodieDeleteRecordList;
import org.apache.hudi.common.fs.SizeAwareDataInputStream;
@@ -55,7 +54,8 @@ import java.util.Map;
import java.util.function.Supplier;
import java.util.stream.Collectors;
-import static org.apache.hudi.avro.HoodieAvroUtils.unwrapAvroValueWrapper;
+import static
org.apache.hudi.avro.HoodieAvroWrapperUtils.unwrapAvroValueWrapper;
+import static org.apache.hudi.avro.HoodieAvroWrapperUtils.wrapValueIntoAvro;
/**
* Delete block contains a list of keys to be deleted from scanning the blocks
so far.
@@ -145,7 +145,7 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
.map(record -> HoodieDeleteRecord.newBuilder(recordBuilder)
.setRecordKey(record.getRecordKey())
.setPartitionPath(record.getPartitionPath())
-
.setOrderingVal(HoodieAvroUtils.wrapValueIntoAvro(record.getOrderingValue()))
+ .setOrderingVal(wrapValueIntoAvro(record.getOrderingValue()))
.build())
.collect(Collectors.toList());
writer.write(HoodieDeleteRecordList.newBuilder(recordListBuilder)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
index fc851d02609b..b45aaca87786 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
@@ -67,7 +67,7 @@ import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import static org.apache.hudi.avro.HoodieAvroUtils.wrapValueIntoAvro;
+import static org.apache.hudi.avro.HoodieAvroWrapperUtils.wrapValueIntoAvro;
import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.common.util.ValidationUtils.checkState;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index aa993624dbd0..e99b8b79eb67 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -160,8 +160,8 @@ import static java.util.stream.Collectors.toList;
import static org.apache.hudi.avro.AvroSchemaUtils.resolveNullableSchema;
import static org.apache.hudi.avro.HoodieAvroUtils.addMetadataFields;
import static org.apache.hudi.avro.HoodieAvroUtils.projectSchema;
-import static org.apache.hudi.avro.HoodieAvroUtils.unwrapAvroValueWrapper;
-import static org.apache.hudi.avro.HoodieAvroUtils.wrapValueIntoAvro;
+import static
org.apache.hudi.avro.HoodieAvroWrapperUtils.unwrapAvroValueWrapper;
+import static org.apache.hudi.avro.HoodieAvroWrapperUtils.wrapValueIntoAvro;
import static
org.apache.hudi.common.config.HoodieCommonConfig.DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES;
import static
org.apache.hudi.common.config.HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED;
import static
org.apache.hudi.common.config.HoodieCommonConfig.MAX_MEMORY_FOR_COMPACTION;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
index 3c8202c4922c..65366e84982c 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
@@ -43,8 +43,8 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
-import static org.apache.hudi.avro.HoodieAvroUtils.unwrapAvroValueWrapper;
-import static org.apache.hudi.avro.HoodieAvroUtils.wrapValueIntoAvro;
+import static
org.apache.hudi.avro.HoodieAvroWrapperUtils.unwrapAvroValueWrapper;
+import static org.apache.hudi.avro.HoodieAvroWrapperUtils.wrapValueIntoAvro;
import static org.apache.hudi.common.util.TypeUtils.unsafeCast;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.common.util.ValidationUtils.checkState;
diff --git
a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
index cdb69431927d..6e83b6cd7bac 100644
--- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java
@@ -87,8 +87,8 @@ import java.util.stream.Stream;
import static org.apache.hudi.avro.AvroSchemaUtils.resolveNullableSchema;
import static
org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldSchemaFromWriteSchema;
import static org.apache.hudi.avro.HoodieAvroUtils.sanitizeName;
-import static org.apache.hudi.avro.HoodieAvroUtils.unwrapAvroValueWrapper;
-import static org.apache.hudi.avro.HoodieAvroUtils.wrapValueIntoAvro;
+import static
org.apache.hudi.avro.HoodieAvroWrapperUtils.unwrapAvroValueWrapper;
+import static org.apache.hudi.avro.HoodieAvroWrapperUtils.wrapValueIntoAvro;
import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
index f943c4e41f3e..755a8081b58b 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
+++
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java
@@ -167,7 +167,7 @@ import java.util.stream.Collectors;
import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
-import static org.apache.hudi.avro.HoodieAvroUtils.unwrapAvroValueWrapper;
+import static
org.apache.hudi.avro.HoodieAvroWrapperUtils.unwrapAvroValueWrapper;
import static
org.apache.hudi.common.config.LockConfiguration.FILESYSTEM_LOCK_PATH_PROP_KEY;
import static org.apache.hudi.common.model.HoodieTableType.COPY_ON_WRITE;
import static org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ;
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
index 59f24a0ad173..43b6b6511297 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
@@ -19,7 +19,7 @@
package org.apache.hudi.functional
import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions}
-import org.apache.hudi.avro.HoodieAvroUtils
+import org.apache.hudi.avro.HoodieAvroWrapperUtils.unwrapAvroValueWrapper
import org.apache.hudi.client.common.HoodieSparkEngineContext
import org.apache.hudi.common.config.HoodieMetadataConfig
import org.apache.hudi.common.model.HoodieColumnRangeMetadata
@@ -166,8 +166,8 @@ class TestMetadataTableWithSparkDataSource extends
SparkClientFunctionalTestHarn
assertEquals(metadataColStats.getValueCount,
columnRangeMetadata.getValueCount)
assertEquals(metadataColStats.getTotalSize,
columnRangeMetadata.getTotalSize)
-
assertEquals(HoodieAvroUtils.unwrapAvroValueWrapper(metadataColStats.getMaxValue),
columnRangeMetadata.getMaxValue)
-
assertEquals(HoodieAvroUtils.unwrapAvroValueWrapper(metadataColStats.getMinValue),
columnRangeMetadata.getMinValue)
+ assertEquals(unwrapAvroValueWrapper(metadataColStats.getMaxValue),
columnRangeMetadata.getMaxValue)
+ assertEquals(unwrapAvroValueWrapper(metadataColStats.getMinValue),
columnRangeMetadata.getMinValue)
assertEquals(metadataColStats.getFileName, fileName)
}
@@ -224,8 +224,8 @@ class TestMetadataTableWithSparkDataSource extends
SparkClientFunctionalTestHarn
assertEquals(metadataColStats.getValueCount,
columnRangeMetadata.getValueCount)
assertEquals(metadataColStats.getTotalSize,
columnRangeMetadata.getTotalSize)
-
assertEquals(HoodieAvroUtils.unwrapAvroValueWrapper(metadataColStats.getMaxValue),
columnRangeMetadata.getMaxValue)
-
assertEquals(HoodieAvroUtils.unwrapAvroValueWrapper(metadataColStats.getMinValue),
columnRangeMetadata.getMinValue)
+ assertEquals(unwrapAvroValueWrapper(metadataColStats.getMaxValue),
columnRangeMetadata.getMaxValue)
+ assertEquals(unwrapAvroValueWrapper(metadataColStats.getMinValue),
columnRangeMetadata.getMinValue)
assertEquals(metadataColStats.getFileName, fileName)
}