This is an automated email from the ASF dual-hosted git repository.
timbrown pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git
The following commit(s) were added to refs/heads/main by this push:
new 51cec294 Nested column partition support in
HudiPartitionValuesExtractor when hive style partitioning is enabled (#738)
51cec294 is described below
commit 51cec294b7fb64e6aebc12e934f7643295435ed6
Author: Roushan Kumar <[email protected]>
AuthorDate: Mon Sep 8 19:07:35 2025 +0530
Nested column partition support in HudiPartitionValuesExtractor when hive
style partitioning is enabled (#738)
---
.../xtable/hudi/HudiPartitionValuesExtractor.java | 2 +-
.../hudi/TestHudiPartitionValuesExtractor.java | 87 +++++++++++++++++++++-
2 files changed, 86 insertions(+), 3 deletions(-)
diff --git
a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java
b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java
index a55968d9..bf9f1264 100644
---
a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java
+++
b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java
@@ -53,7 +53,7 @@ public class HudiPartitionValuesExtractor {
List<PartitionValue> result = new ArrayList<>(totalNumberOfPartitions);
String remainingPartitionPath = partitionPath;
for (InternalPartitionField partitionField : partitionColumns) {
- String sourceFieldName = partitionField.getSourceField().getName();
+ String sourceFieldName = partitionField.getSourceField().getPath();
if (remainingPartitionPath.startsWith(sourceFieldName + "=")) {
// Strip off hive style partitioning
remainingPartitionPath =
remainingPartitionPath.substring(sourceFieldName.length() + 1);
diff --git
a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java
b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java
index 7903bc8e..c5915b9e 100644
---
a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java
+++
b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java
@@ -24,6 +24,8 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions;
@@ -32,6 +34,8 @@ import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
+import com.google.common.base.Strings;
+
import org.apache.xtable.exception.PartitionValuesExtractorException;
import org.apache.xtable.model.schema.InternalField;
import org.apache.xtable.model.schema.InternalPartitionField;
@@ -43,6 +47,12 @@ import org.apache.xtable.model.stat.Range;
public class TestHudiPartitionValuesExtractor {
+ private static final InternalSchema INT_SCHEMA =
+ InternalSchema.builder().name("int").dataType(InternalType.INT).build();
+
+ private static final InternalSchema STRING_SCHEMA =
+
InternalSchema.builder().name("string").dataType(InternalType.STRING).build();
+
@Test
public void testSingleColumn() {
InternalPartitionField column =
@@ -366,7 +376,6 @@ public class TestHudiPartitionValuesExtractor {
.sourceField(
InternalField.builder()
.name("column2")
- .parentPath("base")
.schema(
InternalSchema.builder().name("long").dataType(InternalType.LONG).build())
.build())
@@ -404,7 +413,6 @@ public class TestHudiPartitionValuesExtractor {
.sourceField(
InternalField.builder()
.name("column2")
- .parentPath("base")
.schema(
InternalSchema.builder().name("long").dataType(InternalType.LONG).build())
.build())
@@ -498,4 +506,79 @@ public class TestHudiPartitionValuesExtractor {
new HudiPartitionValuesExtractor(pathToPartitionFieldFormat)
.extractPartitionValues(Collections.singletonList(column),
"2022-10-02"));
}
+
+ static Stream<Arguments> nestedColumnPartitioning_testArgs() {
+ InternalPartitionField p1 = createSimplePartitionField("year",
"partition.date", INT_SCHEMA);
+ InternalPartitionField p2 = createSimplePartitionField("month",
"partition.date", INT_SCHEMA);
+ InternalPartitionField p3 = createSimplePartitionField("day",
"partition.date", INT_SCHEMA);
+ InternalPartitionField p4 = createSimplePartitionField("country", null,
STRING_SCHEMA);
+
+ return Stream.of(
+ // nested column partition, hive style enabled
+ Arguments.of(
+ Collections.singletonList(p1),
+ Collections.singletonList(Range.scalar(2022)),
+ "partition.date.year=2022"),
+ Arguments.of(
+ Arrays.asList(p1, p2),
+ Arrays.asList(Range.scalar(2022), Range.scalar(10)),
+ "partition.date.year=2022/partition.date.month=10"),
+ Arguments.of(
+ Arrays.asList(p1, p2, p3),
+ Arrays.asList(Range.scalar(2022), Range.scalar(10),
Range.scalar(2)),
+
"partition.date.year=2022/partition.date.month=10/partition.date.day=2"),
+ Arguments.of(
+ Arrays.asList(p1, p4),
+ Arrays.asList(Range.scalar(2022), Range.scalar("US")),
+ "partition.date.year=2022/country=US"),
+
+ // nested column partition, hive style disabled
+ Arguments.of(
+ Collections.singletonList(p1),
Collections.singletonList(Range.scalar(2022)), "2022"),
+ Arguments.of(
+ Arrays.asList(p1, p2), Arrays.asList(Range.scalar(2022),
Range.scalar(10)), "2022/10"),
+ Arguments.of(
+ Arrays.asList(p1, p2, p3),
+ Arrays.asList(Range.scalar(2022), Range.scalar(10),
Range.scalar(2)),
+ "2022/10/2"),
+ Arguments.of(
+ Arrays.asList(p1, p4),
+ Arrays.asList(Range.scalar(2022), Range.scalar("US")),
+ "2022/US"));
+ }
+
+ @ParameterizedTest
+ @MethodSource("nestedColumnPartitioning_testArgs")
+ void testNestedColumnPartitioning(
+ List<InternalPartitionField> partitionFields,
+ List<Range> partitionRanges,
+ String partitionPath) {
+ List<PartitionValue> expected =
+ IntStream.range(0, partitionFields.size())
+ .mapToObj(
+ i ->
+ PartitionValue.builder()
+ .partitionField(partitionFields.get(i))
+ .range(partitionRanges.get(i))
+ .build())
+ .collect(Collectors.toList());
+
+ List<PartitionValue> actual =
+ new HudiPartitionValuesExtractor(Collections.emptyMap())
+ .extractPartitionValues(partitionFields, partitionPath);
+ Assertions.assertEquals(expected, actual);
+ }
+
+ private static InternalPartitionField createSimplePartitionField(
+ String name, String parentPath, InternalSchema schema) {
+ InternalField.InternalFieldBuilder sourceFieldBuilder =
+ InternalField.builder().name(name).schema(schema);
+ if (!Strings.isNullOrEmpty(parentPath)) {
+ sourceFieldBuilder.parentPath(parentPath);
+ }
+ return InternalPartitionField.builder()
+ .sourceField(sourceFieldBuilder.build())
+ .transformType(PartitionTransformType.VALUE)
+ .build();
+ }
}