This is an automated email from the ASF dual-hosted git repository.
timbrown pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 5b8c2a52e1dd feat(schema): Add helper to get HoodieSchema in
TableSchemaResolver (#17456)
5b8c2a52e1dd is described below
commit 5b8c2a52e1dd12274142e3d5fd547373b8b02bd6
Author: Rahil C <[email protected]>
AuthorDate: Tue Dec 2 12:21:10 2025 -0800
feat(schema): Add helper to get HoodieSchema in TableSchemaResolver (#17456)
* refactor: Add helper to get HoodieSchema in TableSchemaResolver
* address tim comment
---
.../hudi/common/table/TableSchemaResolver.java | 26 +++++++++++++++
.../hudi/common/table/TestTableSchemaResolver.java | 38 ++++++++++++++++++++++
2 files changed, 64 insertions(+)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
index 2e5cb79db203..6949c04790a0 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
@@ -25,6 +25,7 @@ import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.schema.HoodieSchema;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieLogFormat.Reader;
import org.apache.hudi.common.table.log.block.HoodieDataBlock;
@@ -117,6 +118,31 @@ public class TableSchemaResolver {
return getTableParquetSchemaFromDataFile();
}
+ /**
+ * Gets full schema (user + metadata) for a hoodie table as HoodieSchema.
+ * Delegates to getTableAvroSchema and wraps the result in a HoodieSchema.
+ *
+ * @return HoodieSchema for this table
+ * @throws Exception
+ */
+ public HoodieSchema getTableSchema() throws Exception {
+ Schema avroSchema =
getTableAvroSchema(metaClient.getTableConfig().populateMetaFields());
+ return HoodieSchema.fromAvroSchema(avroSchema);
+ }
+
+ /**
+ * Gets full schema (user + metadata) for a hoodie table as HoodieSchema.
+ * Delegates to getTableAvroSchema and wraps the result in a HoodieSchema.
+ *
+ * @param includeMetadataFields choice if include metadata fields
+ * @return HoodieSchema for this table
+ * @throws Exception
+ */
+ public HoodieSchema getTableSchema(boolean includeMetadataFields) throws
Exception {
+ Schema avroSchema = getTableAvroSchema(includeMetadataFields);
+ return HoodieSchema.fromAvroSchema(avroSchema);
+ }
+
/**
* Gets full schema (user + metadata) for a hoodie table in Avro format.
*
diff --git
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java
index ddefc8689340..509774506e17 100644
---
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java
+++
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/table/TestTableSchemaResolver.java
@@ -24,6 +24,7 @@ import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.schema.HoodieSchema;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.block.HoodieDataBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
@@ -62,6 +63,7 @@ import static
org.apache.hudi.common.testutils.HoodieCommonTestHarness.getDataBl
import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
@@ -110,6 +112,42 @@ class TestTableSchemaResolver {
}
}
+ @Test
+ void testGetTableSchema() throws Exception {
+ // Setup: Create mock metaClient and configure behavior
+ HoodieTableMetaClient metaClient = mock(HoodieTableMetaClient.class,
RETURNS_DEEP_STUBS);
+ Schema expectedSchema = getSimpleSchema();
+
+ // Mock table setup
+ when(metaClient.getTableConfig().populateMetaFields()).thenReturn(true);
+ when(metaClient.getTableConfig().getTableCreateSchema())
+ .thenReturn(Option.of(expectedSchema));
+
+ when(metaClient.getActiveTimeline().getLastCommitMetadataWithValidSchema())
+ .thenReturn(Option.empty());
+
+ // Create resolver and call both methods
+ TableSchemaResolver resolver = new TableSchemaResolver(metaClient);
+
+ // Test 1: getTableSchema() - should use table config's populateMetaFields
(true)
+ Schema avroSchema = resolver.getTableAvroSchema();
+ HoodieSchema hoodieSchema = resolver.getTableSchema();
+ assertNotNull(hoodieSchema);
+ assertEquals(avroSchema, hoodieSchema.getAvroSchema());
+
+ // Test 2: getTableSchema(true) - explicitly include metadata fields
+ Schema avroSchemaWithMetadata = resolver.getTableAvroSchema(true);
+ HoodieSchema hoodieSchemaWithMetadata = resolver.getTableSchema(true);
+ assertNotNull(hoodieSchemaWithMetadata);
+ assertEquals(avroSchemaWithMetadata,
hoodieSchemaWithMetadata.getAvroSchema());
+
+ // Test 3: getTableSchema(false) - explicitly exclude metadata fields
+ Schema avroSchemaWithoutMetadata = resolver.getTableAvroSchema(false);
+ HoodieSchema hoodieSchemaWithoutMetadata = resolver.getTableSchema(false);
+ assertNotNull(hoodieSchemaWithoutMetadata);
+ assertEquals(avroSchemaWithoutMetadata,
hoodieSchemaWithoutMetadata.getAvroSchema());
+ }
+
@Test
void testReadSchemaFromLogFile() throws IOException, URISyntaxException,
InterruptedException {
String testDir = initTestDir("read_schema_from_log_file");