the-other-tim-brown commented on code in PR #14311:
URL: https://github.com/apache/hudi/pull/14311#discussion_r2581761111


##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -699,25 +701,26 @@ public static List<String> 
getValidIndexedColumns(HoodieIndexDefinition indexDef
 
     return indexDefinition.getSourceFields().stream()
         .filter(indexCol -> {
-          Pair<String, Schema.Field> fieldSchemaPair = 
HoodieAvroUtils.getSchemaForField(tableSchema, indexCol);
-          Schema.Field fieldSchema = fieldSchemaPair.getRight();
-          return fieldSchema != null && 
!isTimestampMillisField(fieldSchema.schema());
+          Pair<String, HoodieSchemaField> fieldPair = 
HoodieSchemaUtils.getNestedField(tableSchema, indexCol);
+          if (fieldPair == null) {
+            return false;
+          }
+          return !isTimestampMillisField(fieldPair.getRight().schema());
         })
         .collect(Collectors.toList());
   }
 
   /**
-   * Checks if a schema field is of type timestamp_millis (timestamp-millis or 
local-timestamp-millis).

Review Comment:
   Let's keep this comment the same



##########
hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchemaUtils.java:
##########
@@ -301,4 +304,264 @@ public void testConsistencyWithAvroUtilities() {
     // Should produce equivalent schemas
     assertEquals(avroResult.toString(), hoodieResult.toString());
   }
+
+  @Test
+  public void testGetNestedFieldTopLevel() {
+    // Create simple schema
+    HoodieSchema schema = HoodieSchema.createRecord(
+        "TestRecord",
+        null,
+        null,
+        Arrays.asList(
+            HoodieSchemaField.of("id", 
HoodieSchema.create(HoodieSchemaType.STRING)),
+            HoodieSchemaField.of("name", 
HoodieSchema.create(HoodieSchemaType.STRING))
+        )
+    );
+
+    // Test getting top-level field
+    Pair<String, HoodieSchemaField> result = 
HoodieSchemaUtils.getNestedField(schema, "id");
+
+    assertNotNull(result);
+    assertEquals("id", result.getLeft());
+    assertEquals("id", result.getRight().name());
+    assertEquals(HoodieSchemaType.STRING, 
result.getRight().schema().getType());
+  }
+
+  @Test
+  public void testGetNestedFieldSingleLevel() {
+    // Create schema with nested record
+    HoodieSchema addressSchema = HoodieSchema.createRecord(
+        "Address",
+        null,
+        null,
+        Arrays.asList(
+            HoodieSchemaField.of("street", 
HoodieSchema.create(HoodieSchemaType.STRING)),
+            HoodieSchemaField.of("city", 
HoodieSchema.create(HoodieSchemaType.STRING))
+        )
+    );
+
+    HoodieSchema schema = HoodieSchema.createRecord(
+        "Person",
+        null,
+        null,
+        Arrays.asList(
+            HoodieSchemaField.of("name", 
HoodieSchema.create(HoodieSchemaType.STRING)),
+            HoodieSchemaField.of("address", addressSchema)
+        )
+    );
+
+    // Test getting nested field
+    Pair<String, HoodieSchemaField> result = 
HoodieSchemaUtils.getNestedField(schema, "address.city");
+
+    assertNotNull(result);
+    assertEquals("address.city", result.getLeft());
+    assertEquals("city", result.getRight().name());
+    assertEquals(HoodieSchemaType.STRING, 
result.getRight().schema().getType());
+  }
+
+  @Test
+  public void testGetNestedFieldMultiLevel() {
+    // Create 3-level nested schema
+    HoodieSchema profileSchema = HoodieSchema.createRecord(
+        "Profile",
+        null,
+        null,
+        Arrays.asList(
+            HoodieSchemaField.of("bio", 
HoodieSchema.create(HoodieSchemaType.STRING)),
+            HoodieSchemaField.of("ts_millis", 
HoodieSchema.createTimestampMillis())
+        )
+    );
+
+    HoodieSchema userSchema = HoodieSchema.createRecord(
+        "User",
+        null,
+        null,
+        Arrays.asList(
+            HoodieSchemaField.of("profile", profileSchema),
+            HoodieSchemaField.of("age", 
HoodieSchema.create(HoodieSchemaType.INT))
+        )
+    );
+
+    HoodieSchema rootSchema = HoodieSchema.createRecord(
+        "Root",
+        null,
+        null,
+        Arrays.asList(
+            HoodieSchemaField.of("user", userSchema),
+            HoodieSchemaField.of("event_id", 
HoodieSchema.create(HoodieSchemaType.STRING))
+        )
+    );
+
+    // Test getting deeply nested field
+    Pair<String, HoodieSchemaField> result = 
HoodieSchemaUtils.getNestedField(rootSchema, "user.profile.ts_millis");
+
+    assertNotNull(result);
+    assertEquals("user.profile.ts_millis", result.getLeft());
+    assertEquals("ts_millis", result.getRight().name());
+    assertEquals(HoodieSchemaType.TIMESTAMP, 
result.getRight().schema().getType());
+  }
+
+  @Test
+  public void testGetNestedFieldWithTimestampTypes() {
+    // Create schema with different timestamp types
+    HoodieSchema schema = HoodieSchema.createRecord(
+        "TimestampRecord",
+        null,
+        null,
+        Arrays.asList(
+            HoodieSchemaField.of("ts_millis", 
HoodieSchema.createTimestampMillis()),
+            HoodieSchemaField.of("ts_micros", 
HoodieSchema.createTimestampMicros()),
+            HoodieSchemaField.of("date_field", HoodieSchema.createDate())
+        )
+    );
+
+    // Test timestamp-millis field
+    Pair<String, HoodieSchemaField> millisResult = 
HoodieSchemaUtils.getNestedField(schema, "ts_millis");
+    assertNotNull(millisResult);
+    assertEquals("ts_millis", millisResult.getRight().name());

Review Comment:
   Let's add assertions on the `getLeft` output as well



##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaUtils.java:
##########
@@ -221,4 +222,64 @@ public static HoodieSchemaField 
createNewSchemaField(String name, HoodieSchema s
         name, schema.toAvroSchema(), doc, defaultValue);
     return HoodieSchemaField.fromAvroField(avroField);
   }
+
+  /**
+   * Gets a field (including nested fields) from the schema using dot notation.
+   * This is equivalent to HoodieAvroUtils.getSchemaForField() but operates on 
HoodieSchema.
+   * <p>
+   * Supports nested field access using dot notation. For example:
+   * <ul>
+   *   <li>"name" - retrieves top-level field</li>
+   *   <li>"user.profile.displayName" - retrieves nested field</li>
+   * </ul>
+   *
+   * @param schema    the schema to search in
+   * @param fieldName the field name (may contain dots for nested fields)
+   * @return Pair of canonical field name and the HoodieSchemaField, or null 
if field not found

Review Comment:
   Should we have this return `Option` instead of null?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to