vinishjail97 commented on code in PR #17694:
URL: https://github.com/apache/hudi/pull/17694#discussion_r2644458787
##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaUtils.java:
##########
@@ -416,20 +416,115 @@ private static Option<Pair<String, HoodieSchemaField>>
getNestedFieldInternal(Ho
.map(field -> Pair.of(prefix + fieldName, field));
} else {
// Recursive case: nested field
- if (nonNullableSchema.getType() != HoodieSchemaType.RECORD) {
- return Option.empty();
- }
-
int dotIndex = fieldName.indexOf(".");
String rootFieldName = fieldName.substring(0, dotIndex);
String remainingPath = fieldName.substring(dotIndex + 1);
- return nonNullableSchema.getField(rootFieldName)
- .flatMap(rootField -> getNestedFieldInternal(
- rootField.schema(),
- remainingPath,
- prefix + rootFieldName + "."
- ));
+ // Handle RECORD types - standard field navigation
+ if (nonNullableSchema.getType() == HoodieSchemaType.RECORD) {
+ return nonNullableSchema.getField(rootFieldName)
+ .flatMap(rootField -> getNestedFieldInternal(
+ rootField.schema(),
+ remainingPath,
+ prefix + rootFieldName + "."
+ ));
+ }
+
+ // Handle ARRAY types - expect ".list.element" pattern
+ if (nonNullableSchema.getType() == HoodieSchemaType.ARRAY &&
"list".equals(rootFieldName)) {
+ if (!remainingPath.startsWith("element")) {
+ return Option.empty(); // Invalid path for ARRAY
+ }
+
+ // Skip "element" and get remaining path
+ String pathAfterElement = remainingPath.substring("element".length());
+ if (!pathAfterElement.isEmpty() && !pathAfterElement.startsWith(".")) {
+ return Option.empty(); // Invalid format
+ }
+ if (pathAfterElement.startsWith(".")) {
+ pathAfterElement = pathAfterElement.substring(1);
+ }
Review Comment:
Can this be simplified by looking for the presence of "list.element"
directly?
##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaUtils.java:
##########
@@ -416,20 +416,115 @@ private static Option<Pair<String, HoodieSchemaField>>
getNestedFieldInternal(Ho
.map(field -> Pair.of(prefix + fieldName, field));
} else {
// Recursive case: nested field
- if (nonNullableSchema.getType() != HoodieSchemaType.RECORD) {
- return Option.empty();
- }
-
int dotIndex = fieldName.indexOf(".");
String rootFieldName = fieldName.substring(0, dotIndex);
String remainingPath = fieldName.substring(dotIndex + 1);
- return nonNullableSchema.getField(rootFieldName)
- .flatMap(rootField -> getNestedFieldInternal(
- rootField.schema(),
- remainingPath,
- prefix + rootFieldName + "."
- ));
+ // Handle RECORD types - standard field navigation
+ if (nonNullableSchema.getType() == HoodieSchemaType.RECORD) {
+ return nonNullableSchema.getField(rootFieldName)
+ .flatMap(rootField -> getNestedFieldInternal(
+ rootField.schema(),
+ remainingPath,
+ prefix + rootFieldName + "."
+ ));
+ }
+
+ // Handle ARRAY types - expect ".list.element" pattern
+ if (nonNullableSchema.getType() == HoodieSchemaType.ARRAY &&
"list".equals(rootFieldName)) {
+ if (!remainingPath.startsWith("element")) {
+ return Option.empty(); // Invalid path for ARRAY
+ }
+
+ // Skip "element" and get remaining path
+ String pathAfterElement = remainingPath.substring("element".length());
+ if (!pathAfterElement.isEmpty() && !pathAfterElement.startsWith(".")) {
+ return Option.empty(); // Invalid format
+ }
+ if (pathAfterElement.startsWith(".")) {
+ pathAfterElement = pathAfterElement.substring(1);
+ }
+
+ HoodieSchema elementSchema = nonNullableSchema.getElementType();
+ if (pathAfterElement.isEmpty()) {
+ // We've reached the end - return synthetic field for element
+ HoodieSchemaField syntheticField = HoodieSchemaField.of(
+ "element",
+ elementSchema,
+ null, // doc
+ null // defaultVal
+ );
+ return Option.of(Pair.of(prefix + rootFieldName + ".element",
syntheticField));
+ } else {
+ // Continue navigating into element schema
+ return getNestedFieldInternal(
+ elementSchema,
+ pathAfterElement,
+ prefix + rootFieldName + ".element."
+ );
+ }
+ }
+
+ // Handle MAP types - expect ".key_value.key" or ".key_value.value"
pattern
Review Comment:
Are these the only pattern for nested representation of map and array
column? Parquet uses the same values?
##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaUtils.java:
##########
@@ -416,20 +416,115 @@ private static Option<Pair<String, HoodieSchemaField>>
getNestedFieldInternal(Ho
.map(field -> Pair.of(prefix + fieldName, field));
} else {
// Recursive case: nested field
- if (nonNullableSchema.getType() != HoodieSchemaType.RECORD) {
- return Option.empty();
- }
-
int dotIndex = fieldName.indexOf(".");
String rootFieldName = fieldName.substring(0, dotIndex);
String remainingPath = fieldName.substring(dotIndex + 1);
- return nonNullableSchema.getField(rootFieldName)
- .flatMap(rootField -> getNestedFieldInternal(
- rootField.schema(),
- remainingPath,
- prefix + rootFieldName + "."
- ));
+ // Handle RECORD types - standard field navigation
+ if (nonNullableSchema.getType() == HoodieSchemaType.RECORD) {
+ return nonNullableSchema.getField(rootFieldName)
+ .flatMap(rootField -> getNestedFieldInternal(
+ rootField.schema(),
+ remainingPath,
+ prefix + rootFieldName + "."
+ ));
+ }
+
+ // Handle ARRAY types - expect ".list.element" pattern
Review Comment:
Is `list.element` the only available pattern to represent nested array
columns?
##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaUtils.java:
##########
@@ -416,20 +416,115 @@ private static Option<Pair<String, HoodieSchemaField>>
getNestedFieldInternal(Ho
.map(field -> Pair.of(prefix + fieldName, field));
} else {
// Recursive case: nested field
- if (nonNullableSchema.getType() != HoodieSchemaType.RECORD) {
- return Option.empty();
- }
-
int dotIndex = fieldName.indexOf(".");
String rootFieldName = fieldName.substring(0, dotIndex);
String remainingPath = fieldName.substring(dotIndex + 1);
- return nonNullableSchema.getField(rootFieldName)
- .flatMap(rootField -> getNestedFieldInternal(
- rootField.schema(),
- remainingPath,
- prefix + rootFieldName + "."
- ));
+ // Handle RECORD types - standard field navigation
+ if (nonNullableSchema.getType() == HoodieSchemaType.RECORD) {
+ return nonNullableSchema.getField(rootFieldName)
+ .flatMap(rootField -> getNestedFieldInternal(
+ rootField.schema(),
+ remainingPath,
+ prefix + rootFieldName + "."
+ ));
+ }
+
+ // Handle ARRAY types - expect ".list.element" pattern
+ if (nonNullableSchema.getType() == HoodieSchemaType.ARRAY &&
"list".equals(rootFieldName)) {
+ if (!remainingPath.startsWith("element")) {
+ return Option.empty(); // Invalid path for ARRAY
+ }
+
+ // Skip "element" and get remaining path
+ String pathAfterElement = remainingPath.substring("element".length());
+ if (!pathAfterElement.isEmpty() && !pathAfterElement.startsWith(".")) {
+ return Option.empty(); // Invalid format
+ }
+ if (pathAfterElement.startsWith(".")) {
+ pathAfterElement = pathAfterElement.substring(1);
+ }
+
+ HoodieSchema elementSchema = nonNullableSchema.getElementType();
+ if (pathAfterElement.isEmpty()) {
+ // We've reached the end - return synthetic field for element
+ HoodieSchemaField syntheticField = HoodieSchemaField.of(
Review Comment:
What do you mean by syntheticField?
##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaUtils.java:
##########
@@ -416,20 +416,115 @@ private static Option<Pair<String, HoodieSchemaField>>
getNestedFieldInternal(Ho
.map(field -> Pair.of(prefix + fieldName, field));
} else {
// Recursive case: nested field
- if (nonNullableSchema.getType() != HoodieSchemaType.RECORD) {
- return Option.empty();
- }
-
int dotIndex = fieldName.indexOf(".");
String rootFieldName = fieldName.substring(0, dotIndex);
String remainingPath = fieldName.substring(dotIndex + 1);
- return nonNullableSchema.getField(rootFieldName)
- .flatMap(rootField -> getNestedFieldInternal(
- rootField.schema(),
- remainingPath,
- prefix + rootFieldName + "."
- ));
+ // Handle RECORD types - standard field navigation
+ if (nonNullableSchema.getType() == HoodieSchemaType.RECORD) {
+ return nonNullableSchema.getField(rootFieldName)
+ .flatMap(rootField -> getNestedFieldInternal(
+ rootField.schema(),
+ remainingPath,
+ prefix + rootFieldName + "."
+ ));
+ }
+
+ // Handle ARRAY types - expect ".list.element" pattern
+ if (nonNullableSchema.getType() == HoodieSchemaType.ARRAY &&
"list".equals(rootFieldName)) {
+ if (!remainingPath.startsWith("element")) {
+ return Option.empty(); // Invalid path for ARRAY
+ }
+
+ // Skip "element" and get remaining path
+ String pathAfterElement = remainingPath.substring("element".length());
+ if (!pathAfterElement.isEmpty() && !pathAfterElement.startsWith(".")) {
+ return Option.empty(); // Invalid format
+ }
+ if (pathAfterElement.startsWith(".")) {
+ pathAfterElement = pathAfterElement.substring(1);
+ }
+
+ HoodieSchema elementSchema = nonNullableSchema.getElementType();
+ if (pathAfterElement.isEmpty()) {
+ // We've reached the end - return synthetic field for element
+ HoodieSchemaField syntheticField = HoodieSchemaField.of(
+ "element",
+ elementSchema,
+ null, // doc
+ null // defaultVal
+ );
+ return Option.of(Pair.of(prefix + rootFieldName + ".element",
syntheticField));
+ } else {
+ // Continue navigating into element schema
+ return getNestedFieldInternal(
+ elementSchema,
+ pathAfterElement,
+ prefix + rootFieldName + ".element."
+ );
+ }
+ }
+
+ // Handle MAP types - expect ".key_value.key" or ".key_value.value"
pattern
+ if (nonNullableSchema.getType() == HoodieSchemaType.MAP &&
"key_value".equals(rootFieldName)) {
+ if (remainingPath.startsWith("key")) {
+ // Skip "key" and get remaining path
+ String pathAfterKey = remainingPath.substring("key".length());
+ if (!pathAfterKey.isEmpty() && !pathAfterKey.startsWith(".")) {
+ return Option.empty(); // Invalid format
+ }
+ if (pathAfterKey.startsWith(".")) {
+ pathAfterKey = pathAfterKey.substring(1);
Review Comment:
Can we define static constants for key_value and list.element etc. and
simplify this code?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]