vinishjail97 commented on code in PR #17694:
URL: https://github.com/apache/hudi/pull/17694#discussion_r2722440861
##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java:
##########
@@ -754,6 +775,172 @@ public HoodieSchema getNonNullType() {
return types.get(0).getType() != HoodieSchemaType.NULL ? types.get(0) :
types.get(1);
}
+ /**
+ * Gets a nested field using dot notation, supporting Parquet-style
array/map accessors.
+ *
+ * <p>Supports nested field access using dot notation including MAP and
ARRAY types
+ * using Parquet-style accessor patterns:</p>
+ *
+ * <ul>
+ * <li><b>RECORD types:</b> Standard dot notation (e.g., {@code
"user.profile.name"})</li>
+ * <li><b>ARRAY types:</b> Use {@code ".list.element"} to access array
elements
+ * <ul>
+ * <li>Example: {@code "items.list.element"} accesses element schema
of array</li>
+ * <li>Example: {@code "items.list.element.id"} accesses nested
field within array elements</li>
+ * </ul>
+ * </li>
+ * <li><b>MAP types:</b> Use {@code ".key_value.key"} or {@code
".key_value.value"} to access map components
+ * <ul>
+ * <li>Example: {@code "metadata.key_value.key"} accesses map keys
(always STRING)</li>
+ * <li>Example: {@code "metadata.key_value.value"} accesses map
value schema</li>
+ * <li>Example: {@code "nested_map.key_value.value.field"} accesses
nested field within map values</li>
+ * </ul>
+ * </li>
+ * </ul>
+ *
+ * <p>Note: Spark Parquet files may use {@code ".array"} format instead of
{@code ".list.element"}.
+ * This translation is handled at the Parquet reading level in ParquetUtils,
not here.</p>
+ *
+ * @param fieldName Field path (e.g., "user.profile.name",
"items.list.element", "metadata.key_value.value")
+ * @return Option containing a pair of canonical field name and the
HoodieSchemaField, or Option.empty() if not found
+ */
+ public Option<Pair<String, HoodieSchemaField>> getNestedField(String
fieldName) {
+ ValidationUtils.checkArgument(fieldName != null && !fieldName.isEmpty(),
"Field name cannot be null or empty");
+ return getNestedFieldInternal(this, fieldName, 0, "");
+ }
+
+ /**
+ * Internal helper method for recursively retrieving nested fields using
offset-based navigation.
+ *
+ * @param schema the current schema to search in
+ * @param fullPath the full field path string
+ * @param offset current position in fullPath
+ * @param prefix the accumulated field path prefix
+ * @return Option containing a pair of canonical field name and the
HoodieSchemaField, or Option.empty() if field not found
+ */
+ private static Option<Pair<String, HoodieSchemaField>>
getNestedFieldInternal(
+ HoodieSchema schema, String fullPath, int offset, String prefix) {
+ HoodieSchema nonNullableSchema = schema.getNonNullType();
+ int nextDot = fullPath.indexOf('.', offset);
+ // Terminal case: no more dots in this segment
+ if (nextDot == -1) {
+ String fieldName = fullPath.substring(offset);
+
+ // Handle ARRAY terminal case: "element" (after "list.")
Review Comment:
You are right it's dead code.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]