This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 08e82c4 [HUDI-1762] Added HiveStylePartitionExtractor to support Hive
style partitions (#2769)
08e82c4 is described below
commit 08e82c469c456fbafc66c8e232dcd070f05fadee
Author: Vinoth Govindarajan <[email protected]>
AuthorDate: Thu Apr 8 22:00:11 2021 -0700
[HUDI-1762] Added HiveStylePartitionExtractor to support Hive style
partitions (#2769)
---
.../hive/HiveStylePartitionValueExtractor.java} | 32 ++++++++++++----------
.../hudi/hive/TestPartitionValueExtractor.java | 13 ++++++++-
2 files changed, 30 insertions(+), 15 deletions(-)
diff --git
a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveStylePartitionValueExtractor.java
similarity index 50%
copy from
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
copy to
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveStylePartitionValueExtractor.java
index a248e49..4bb20f5 100644
---
a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
+++
b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveStylePartitionValueExtractor.java
@@ -18,21 +18,25 @@
package org.apache.hudi.hive;
-import org.junit.jupiter.api.Test;
-import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertThrows;
+/**
+ * Extractor for Hive Style Partitioned tables, when the parition folders are
key value pairs.
+ *
+ * <p>This implementation extracts the partition value of yyyy-mm-dd from the
path of type datestr=yyyy-mm-dd.
+ */
+public class HiveStylePartitionValueExtractor implements
PartitionValueExtractor {
+ private static final long serialVersionUID = 1L;
-public class TestPartitionValueExtractor {
- @Test
- public void testHourPartition() {
- SlashEncodedHourPartitionValueExtractor hourPartition = new
SlashEncodedHourPartitionValueExtractor();
- List<String> list = new ArrayList<>();
- list.add("2020-12-20-01");
- assertEquals(hourPartition.extractPartitionValuesInPath("2020/12/20/01"),
list);
- assertThrows(IllegalArgumentException.class, () ->
hourPartition.extractPartitionValuesInPath("2020/12/20"));
-
assertEquals(hourPartition.extractPartitionValuesInPath("update_time=2020/12/20/01"),
list);
+ @Override
+ public List<String> extractPartitionValuesInPath(String partitionPath) {
+ // partition path is expected to be in this format
partition_key=partition_value.
+ String[] splits = partitionPath.split("=");
+ if (splits.length != 2) {
+ throw new IllegalArgumentException(
+ "Partition path " + partitionPath + " is not in the form
partition_key=partition_value.");
+ }
+ return Collections.singletonList(splits[1]);
}
-}
\ No newline at end of file
+}
diff --git
a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
index a248e49..ba5a544 100644
---
a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
+++
b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestPartitionValueExtractor.java
@@ -35,4 +35,15 @@ public class TestPartitionValueExtractor {
assertThrows(IllegalArgumentException.class, () ->
hourPartition.extractPartitionValuesInPath("2020/12/20"));
assertEquals(hourPartition.extractPartitionValuesInPath("update_time=2020/12/20/01"),
list);
}
-}
\ No newline at end of file
+
+ @Test
+ public void testHiveStylePartition() {
+ HiveStylePartitionValueExtractor hiveStylePartition = new
HiveStylePartitionValueExtractor();
+ List<String> list = new ArrayList<>();
+ list.add("2021-04-02");
+
assertEquals(hiveStylePartition.extractPartitionValuesInPath("datestr=2021-04-02"),
list);
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> hiveStylePartition.extractPartitionValuesInPath("2021/04/02"));
+ }
+}