codope commented on a change in pull request #5181:
URL: https://github.com/apache/hudi/pull/5181#discussion_r839233618
##########
File path: hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
##########
@@ -655,4 +786,112 @@ private static Schema resolveNullableSchema(Schema
schema) {
return nonNullType;
}
+
+ // TODO elaborate
+ public static class ConvertingGenericData extends GenericData {
+
+ private static final DecimalConversion DECIMAL_CONVERSION = new
DecimalConversion();
+ private static final Conversions.UUIDConversion UUID_CONVERSION = new
Conversions.UUIDConversion();
+ private static final TimeConversions.DateConversion DATE_CONVERSION = new
TimeConversions.DateConversion();
+ private static final TimeConversions.TimeMillisConversion
TIME_MILLIS_CONVERSION = new TimeConversions.TimeMillisConversion();
Review comment:
Do we need to upgrade avro version? Some of these classes are not
present in v1.8.2
https://www.javadoc.io/doc/org.apache.avro/avro/1.8.2/org/apache/avro/Conversion.html
##########
File path: hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
##########
@@ -501,6 +509,109 @@ public static Object getNestedFieldVal(GenericRecord
record, String fieldName, b
}
}
+ /**
+ * Get schema for the given field and record. Field can be nested, denoted
by dot notation. e.g: a.b.c
+ *
+ * @param record - record containing the value of the given field
+ * @param fieldName - name of the field
+ * @return
+ */
+ public static Schema getNestedFieldSchemaFromRecord(GenericRecord record,
String fieldName) {
+ String[] parts = fieldName.split("\\.");
+ GenericRecord valueNode = record;
+ int i = 0;
+ for (; i < parts.length; i++) {
+ String part = parts[i];
+ Object val = valueNode.get(part);
+
+ if (i == parts.length - 1) {
+ return
resolveNullableSchema(valueNode.getSchema().getField(part).schema());
+ } else {
+ if (!(val instanceof GenericRecord)) {
+ throw new HoodieException("Cannot find a record at part value :" +
part);
+ }
+ valueNode = (GenericRecord) val;
+ }
+ }
+ throw new HoodieException("Failed to get schema. Not a valid field name: "
+ fieldName);
+ }
+
+
+ /**
+ * Get schema for the given field and write schema. Field can be nested,
denoted by dot notation. e.g: a.b.c
+ * Use this method when record is not available. Otherwise, prefer to use
{@link #getNestedFieldSchemaFromRecord(GenericRecord, String)}
+ *
+ * @param writeSchema - write schema of the record
+ * @param fieldName - name of the field
+ * @return
+ */
+ public static Schema getNestedFieldSchemaFromWriteSchema(Schema writeSchema,
String fieldName) {
+ String[] parts = fieldName.split("\\.");
+ int i = 0;
+ for (; i < parts.length; i++) {
+ String part = parts[i];
+ Schema schema = writeSchema.getField(part).schema();
+
+ if (i == parts.length - 1) {
+ return resolveNullableSchema(schema);
+ }
+ }
+ throw new HoodieException("Failed to get schema. Not a valid field name: "
+ fieldName);
+ }
+
+ /**
+ * Given a field schema, convert its value to native Java type.
+ *
+ * @param schema - field schema
+ * @param val - field value
+ * @return
+ */
+ public static Comparable<?> convertToNativeJavaType(Schema schema, Object
val) {
+ if (val == null) {
+ return StringUtils.EMPTY_STRING;
Review comment:
Should this be null as well?
##########
File path: hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
##########
@@ -655,4 +786,113 @@ private static Schema resolveNullableSchema(Schema
schema) {
return nonNullType;
}
+
+ // TODO elaborate
+ public static class ConvertingGenericData extends GenericData {
+
+ private static final DecimalConversion DECIMAL_CONVERSION = new
DecimalConversion();
+ private static final Conversions.UUIDConversion UUID_CONVERSION = new
Conversions.UUIDConversion();
+ private static final TimeConversions.DateConversion DATE_CONVERSION = new
TimeConversions.DateConversion();
+ private static final TimeConversions.TimeMicrosConversion
TIME_MICROS_CONVERSION = new TimeConversions.TimeMicrosConversion();
+ private static final TimeConversions.TimestampMicrosConversion
TIMESTAMP_MICROS_CONVERSION = new TimeConversions.TimestampMicrosConversion();
+
+ // NOTE: Those are not supported in Avro 1.8.2
+ // TODO re-enable upon upgrading to 1.10
+ // private static final TimeConversions.TimestampMillisConversion
TIMESTAMP_MILLIS_CONVERSION = new TimeConversions.TimestampMillisConversion();
+ // private static final TimeConversions.TimeMillisConversion
TIME_MILLIS_CONVERSION = new TimeConversions.TimeMillisConversion();
+ // private static final TimeConversions.LocalTimestampMillisConversion
LOCAL_TIMESTAMP_MILLIS_CONVERSION = new
TimeConversions.LocalTimestampMillisConversion();
+ // private static final TimeConversions.LocalTimestampMicrosConversion
LOCAL_TIMESTAMP_MICROS_CONVERSION = new
TimeConversions.LocalTimestampMicrosConversion();
+
+ public static final GenericData INSTANCE = new ConvertingGenericData();
+
+ private ConvertingGenericData() {
+ addLogicalTypeConversion(DECIMAL_CONVERSION);
+ addLogicalTypeConversion(UUID_CONVERSION);
+ addLogicalTypeConversion(DATE_CONVERSION);
+ addLogicalTypeConversion(TIME_MICROS_CONVERSION);
+ addLogicalTypeConversion(TIMESTAMP_MICROS_CONVERSION);
+ // NOTE: Those are not supported in Avro 1.8.2
+ // TODO re-enable upon upgrading to 1.10
Review comment:
makes sense.. maybe also create a tracking jira for avro upgrade if
there isn't alrady one.. there are some GH issues tied to the upgrade too.
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
##########
@@ -203,15 +203,16 @@ public HoodieBloomIndex(HoodieWriteConfig config,
BaseHoodieBloomIndexHelper blo
return Stream.empty();
}
try {
- Map<Pair<String, String>, HoodieMetadataColumnStats>
fileToColumnStatsMap = hoodieTable
- .getMetadataTable().getColumnStats(partitionFileNameList,
keyField);
+ Map<Pair<String, String>, HoodieMetadataColumnStats>
fileToColumnStatsMap =
+
hoodieTable.getMetadataTable().getColumnStats(partitionFileNameList, keyField);
List<Pair<String, BloomIndexFileInfo>> result = new ArrayList<>();
for (Map.Entry<Pair<String, String>, HoodieMetadataColumnStats> entry
: fileToColumnStatsMap.entrySet()) {
result.add(Pair.of(entry.getKey().getLeft(),
new BloomIndexFileInfo(
FSUtils.getFileId(entry.getKey().getRight()),
- entry.getValue().getMinValue(),
- entry.getValue().getMaxValue()
+ // NOTE: Here we assume that the type of the primary key
field is string
Review comment:
maybe also add the reason behind this
##########
File path: hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
##########
@@ -655,4 +786,112 @@ private static Schema resolveNullableSchema(Schema
schema) {
return nonNullType;
}
+
+ // TODO elaborate
+ public static class ConvertingGenericData extends GenericData {
Review comment:
+1 for extracting to class
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
##########
@@ -1038,11 +1015,12 @@ public static void accumulateColumnRanges(Schema.Field
field, String filePath,
Map<String,
HoodieColumnRangeMetadata<Comparable>> columnRangeMap,
Map<String, Map<String, Object>>
columnToStats) {
Map<String, Object> columnStats = columnToStats.get(field.name());
- HoodieColumnRangeMetadata<Comparable> columnRangeMetadata =
HoodieColumnRangeMetadata.create(
+ HoodieColumnRangeMetadata<Comparable> columnRangeMetadata =
HoodieColumnRangeMetadata.<Comparable>create(
filePath,
field.name(),
- (Comparable) String.valueOf(columnStats.get(MIN)),
- (Comparable) String.valueOf(columnStats.get(MAX)),
+ // TODO fix
Review comment:
ok, so plan is to remove this method `convertToNativeJavaType`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]