This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6d35b74  test: add tests crate and adopt testing tables (#30)
6d35b74 is described below

commit 6d35b74a15aedbd0b45a85203d0eddb8da3794c7
Author: Shiyan Xu <[email protected]>
AuthorDate: Sat Jun 29 18:36:11 2024 -0500

    test: add tests crate and adopt testing tables (#30)
---
 crates/core/Cargo.toml                             |   9 +-
 crates/core/src/lib.rs                             |   1 -
 crates/core/src/storage/file_info.rs               |   2 +-
 crates/core/src/storage/file_stats.rs              |   2 +-
 crates/core/src/storage/mod.rs                     |  54 ++++++++----
 crates/core/src/storage/utils.rs                   |  58 ++++++++++++-
 crates/core/src/table/fs_view.rs                   |  45 +++++-----
 crates/core/src/table/mod.rs                       |  68 ++++++++++-----
 crates/core/src/timeline/mod.rs                    |   9 +-
 crates/datafusion/Cargo.toml                       |   7 +-
 crates/hudi/Cargo.toml                             |   4 +-
 crates/{hudi => tests}/Cargo.toml                  |   8 +-
 .../tables/v6_complexkeygen_hivestyle.datagen.sql  |  87 +++++++++++++++++++
 .../data/tables/v6_complexkeygen_hivestyle.zip     | Bin 0 -> 42914 bytes
 .../data/tables/v6_empty.sql}                      |  15 +++-
 crates/tests/data/tables/v6_empty.zip              | Bin 0 -> 2258 bytes
 .../data/tables/v6_nonpartitioned.datagen.sql      |  80 ++++++++++++++++++
 crates/tests/data/tables/v6_nonpartitioned.zip     | Bin 0 -> 24851 bytes
 ...implekeygen_hivestyle_no_metafields.datagen.sql |  81 ++++++++++++++++++
 .../v6_simplekeygen_hivestyle_no_metafields.zip    | Bin 0 -> 17950 bytes
 .../v6_simplekeygen_nonhivestyle.datagen.sql       |  88 ++++++++++++++++++++
 .../data/tables/v6_simplekeygen_nonhivestyle.zip   | Bin 0 -> 38375 bytes
 .../tables/v6_timebasedkeygen_nonhivestyle.sql     |  92 +++++++++++++++++++++
 .../tables/v6_timebasedkeygen_nonhivestyle.zip     | Bin 0 -> 49127 bytes
 crates/tests/src/lib.rs                            |  72 ++++++++++++++++
 .../{core/src/test_utils.rs => tests/src/utils.rs} |  13 ---
 python/Cargo.toml                                  |   6 +-
 python/tests/conftest.py                           |   2 +-
 .../tests}/table/0.x_cow_partitioned.zip           | Bin
 29 files changed, 696 insertions(+), 107 deletions(-)

diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index 98c12ab..6363804 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -17,16 +17,17 @@
 
 [package]
 name = "hudi-core"
-version = "0.1.0"
+version.workspace = true
 edition.workspace = true
 license.workspace = true
 rust-version.workspace = true
 
 [dependencies]
+hudi-tests = { path = "../tests" }
 # arrow
 arrow = { workspace = true }
 arrow-arith = { workspace = true }
-arrow-array = { workspace = true , features = ["chrono-tz"]}
+arrow-array = { workspace = true, features = ["chrono-tz"] }
 arrow-buffer = { workspace = true }
 arrow-cast = { workspace = true }
 arrow-ipc = { workspace = true }
@@ -68,7 +69,3 @@ async-recursion = { workspace = true }
 async-trait = { workspace = true }
 tokio = { workspace = true }
 futures = { workspace = true }
-
-# test
-tempfile = "3.10.1"
-zip-extract = "0.1.3"
diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs
index d2c53ee..1586ff8 100644
--- a/crates/core/src/lib.rs
+++ b/crates/core/src/lib.rs
@@ -23,7 +23,6 @@ pub mod file_group;
 pub mod table;
 pub type HudiTable = Table;
 mod storage;
-pub mod test_utils;
 mod timeline;
 
 pub fn crate_version() -> &'static str {
diff --git a/crates/core/src/storage/file_info.rs 
b/crates/core/src/storage/file_info.rs
index 4bd178d..8a77048 100644
--- a/crates/core/src/storage/file_info.rs
+++ b/crates/core/src/storage/file_info.rs
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
 pub struct FileInfo {
     pub uri: String,
     pub name: String,
diff --git a/crates/core/src/storage/file_stats.rs 
b/crates/core/src/storage/file_stats.rs
index ec63c14..19a7000 100644
--- a/crates/core/src/storage/file_stats.rs
+++ b/crates/core/src/storage/file_stats.rs
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
 pub struct FileStats {
     pub num_records: i64,
 }
diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs
index c8b7b34..b35f30d 100644
--- a/crates/core/src/storage/mod.rs
+++ b/crates/core/src/storage/mod.rs
@@ -132,8 +132,7 @@ impl Storage {
             .objects
             .into_iter()
             .map(|obj_meta| FileInfo {
-                uri: prefix_url
-                    .join(obj_meta.location.filename().unwrap())
+                uri: join_url_segments(&prefix_url, 
&[obj_meta.location.filename().unwrap()])
                     .unwrap()
                     .to_string(),
                 name: obj_meta.location.filename().unwrap().to_string(),
@@ -172,6 +171,7 @@ mod tests {
     use object_store::path::Path as ObjPath;
     use url::Url;
 
+    use crate::storage::file_info::FileInfo;
     use crate::storage::utils::join_url_segments;
     use crate::storage::{get_leaf_dirs, Storage};
 
@@ -224,28 +224,50 @@ mod tests {
             canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(),
         )
         .unwrap();
-        let storage = Storage::new(base_url, HashMap::new());
-        let file_names_1: Vec<String> = storage
-            .list_files(None)
-            .await
-            .into_iter()
-            .map(|file_info| file_info.name)
-            .collect();
-        assert_eq!(file_names_1, vec!["a.parquet"]);
-        let file_names_2: Vec<String> = storage
+        let storage = Storage::new(base_url.clone(), HashMap::new());
+        let file_info_1: Vec<FileInfo> = 
storage.list_files(None).await.into_iter().collect();
+        assert_eq!(
+            file_info_1,
+            vec![FileInfo {
+                uri: base_url.clone().join("a.parquet").unwrap().to_string(),
+                name: "a.parquet".to_string(),
+                size: 0,
+            }]
+        );
+        let file_info_2: Vec<FileInfo> = storage
             .list_files(Some("part1"))
             .await
             .into_iter()
-            .map(|file_info| file_info.name)
             .collect();
-        assert_eq!(file_names_2, vec!["b.parquet"]);
-        let file_names_3: Vec<String> = storage
+        assert_eq!(
+            file_info_2,
+            vec![FileInfo {
+                uri: base_url
+                    .clone()
+                    .join("part1/b.parquet")
+                    .unwrap()
+                    .to_string(),
+                name: "b.parquet".to_string(),
+                size: 0,
+            }]
+        );
+        let file_info_3: Vec<FileInfo> = storage
             .list_files(Some("part2/part22"))
             .await
             .into_iter()
-            .map(|file_info| file_info.name)
             .collect();
-        assert_eq!(file_names_3, vec!["c.parquet"]);
+        assert_eq!(
+            file_info_3,
+            vec![FileInfo {
+                uri: base_url
+                    .clone()
+                    .join("part2/part22/c.parquet")
+                    .unwrap()
+                    .to_string(),
+                name: "c.parquet".to_string(),
+                size: 0,
+            }]
+        );
     }
 
     #[tokio::test]
diff --git a/crates/core/src/storage/utils.rs b/crates/core/src/storage/utils.rs
index cf81dc0..d1f8c4a 100644
--- a/crates/core/src/storage/utils.rs
+++ b/crates/core/src/storage/utils.rs
@@ -17,8 +17,9 @@
  * under the License.
  */
 
-use anyhow::{anyhow, Result};
 use std::path::Path;
+
+use anyhow::{anyhow, Result};
 use url::{ParseError, Url};
 
 pub fn split_filename(filename: &str) -> Result<(String, String)> {
@@ -46,9 +47,58 @@ pub fn join_url_segments(base_url: &Url, segments: &[&str]) 
-> Result<Url> {
         url.path_segments_mut().unwrap().pop();
     }
 
-    url.path_segments_mut()
-        .map_err(|_| ParseError::RelativeUrlWithoutBase)?
-        .extend(segments);
+    for &seg in segments {
+        let segs: Vec<_> = seg.split('/').filter(|&s| !s.is_empty()).collect();
+        url.path_segments_mut()
+            .map_err(|_| ParseError::RelativeUrlWithoutBase)?
+            .extend(segs);
+    }
 
     Ok(url)
 }
+
+#[cfg(test)]
+mod tests {
+    use std::str::FromStr;
+
+    use url::Url;
+
+    use crate::storage::utils::join_url_segments;
+
+    #[test]
+    fn join_base_url_with_segments() {
+        let base_url = Url::from_str("file:///base").unwrap();
+
+        assert_eq!(
+            join_url_segments(&base_url, &["foo"]).unwrap(),
+            Url::from_str("file:///base/foo").unwrap()
+        );
+
+        assert_eq!(
+            join_url_segments(&base_url, &["/foo"]).unwrap(),
+            Url::from_str("file:///base/foo").unwrap()
+        );
+
+        assert_eq!(
+            join_url_segments(&base_url, &["/foo", "bar/", "/baz/"]).unwrap(),
+            Url::from_str("file:///base/foo/bar/baz").unwrap()
+        );
+
+        assert_eq!(
+            join_url_segments(&base_url, &["foo/", "", "bar/baz"]).unwrap(),
+            Url::from_str("file:///base/foo/bar/baz").unwrap()
+        );
+
+        assert_eq!(
+            join_url_segments(&base_url, &["foo1/bar1", "foo2/bar2"]).unwrap(),
+            Url::from_str("file:///base/foo1/bar1/foo2/bar2").unwrap()
+        );
+    }
+
+    #[test]
+    fn join_failed_due_to_invalid_base() {
+        let base_url = Url::from_str("foo:text/plain,bar").unwrap();
+        let result = join_url_segments(&base_url, &["foo"]);
+        assert!(result.is_err());
+    }
+}
diff --git a/crates/core/src/table/fs_view.rs b/crates/core/src/table/fs_view.rs
index 5f9cf0f..c7c20e1 100644
--- a/crates/core/src/table/fs_view.rs
+++ b/crates/core/src/table/fs_view.rs
@@ -185,53 +185,50 @@ async fn get_partitions_and_file_groups(
 #[cfg(test)]
 mod tests {
     use std::collections::HashSet;
-    use std::fs::canonicalize;
-    use std::path::Path;
 
-    use url::Url;
+    use hudi_tests::TestTable;
 
     use crate::table::fs_view::FileSystemView;
-    use crate::test_utils::extract_test_table;
 
     #[tokio::test]
-    async fn get_partition_paths() {
-        let fixture_path =
-            
canonicalize(Path::new("fixtures/table/0.x_cow_partitioned.zip")).unwrap();
-        let base_url = 
Url::from_file_path(extract_test_table(&fixture_path)).unwrap();
+    async fn get_partition_paths_for_nonpartitioned_table() {
+        let base_url = TestTable::V6Nonpartitioned.url();
+        let fs_view = FileSystemView::new(base_url);
+        let partition_paths = fs_view.get_partition_paths().await.unwrap();
+        let partition_path_set: HashSet<&str> =
+            HashSet::from_iter(partition_paths.iter().map(|p| p.as_str()));
+        assert_eq!(partition_path_set, HashSet::new(),)
+    }
+
+    #[tokio::test]
+    async fn get_partition_paths_for_complexkeygen_table() {
+        let base_url = TestTable::V6ComplexkeygenHivestyle.url();
         let fs_view = FileSystemView::new(base_url);
         let partition_paths = fs_view.get_partition_paths().await.unwrap();
         let partition_path_set: HashSet<&str> =
             HashSet::from_iter(partition_paths.iter().map(|p| p.as_str()));
         assert_eq!(
             partition_path_set,
-            HashSet::from_iter(vec!["chennai", "sao_paulo", "san_francisco"])
+            HashSet::from_iter(vec![
+                "byteField=10/shortField=300",
+                "byteField=20/shortField=100",
+                "byteField=30/shortField=100"
+            ])
         )
     }
 
     #[test]
     fn get_latest_file_slices() {
-        let fixture_path =
-            
canonicalize(Path::new("fixtures/table/0.x_cow_partitioned.zip")).unwrap();
-        let base_url = 
Url::from_file_path(extract_test_table(&fixture_path)).unwrap();
+        let base_url = TestTable::V6Nonpartitioned.url();
         let mut fs_view = FileSystemView::new(base_url);
         fs_view.load_file_groups();
         let file_slices = fs_view.get_latest_file_slices();
-        assert_eq!(file_slices.len(), 5);
+        assert_eq!(file_slices.len(), 1);
         let mut fg_ids = Vec::new();
         for f in file_slices {
             let fp = f.file_group_id();
             fg_ids.push(fp);
         }
-        let actual: HashSet<&str> = fg_ids.into_iter().collect();
-        assert_eq!(
-            actual,
-            HashSet::from_iter(vec![
-                "780b8586-3ad0-48ef-a6a1-d2217845ce4a-0",
-                "d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0",
-                "ee915c68-d7f8-44f6-9759-e691add290d8-0",
-                "68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0",
-                "5a226868-2934-4f84-a16f-55124630c68d-0"
-            ])
-        );
+        assert_eq!(fg_ids, vec!["a079bdb3-731c-4894-b855-abfcd6921007-0"])
     }
 }
diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs
index 681ef05..d26decc 100644
--- a/crates/core/src/table/mod.rs
+++ b/crates/core/src/table/mod.rs
@@ -255,21 +255,21 @@ impl ProvidesTableMetadata for Table {
 
 #[cfg(test)]
 mod tests {
-    use std::collections::HashMap;
+    use std::collections::{HashMap, HashSet};
     use std::fs::canonicalize;
     use std::path::Path;
-    use url::Url;
 
+    use hudi_tests::TestTable;
+
+    use crate::storage::utils::join_url_segments;
     use crate::table::config::BaseFileFormat::Parquet;
     use crate::table::config::TableType::CopyOnWrite;
     use crate::table::metadata::ProvidesTableMetadata;
     use crate::table::Table;
-    use crate::test_utils::extract_test_table;
 
     #[test]
     fn hudi_table_get_latest_schema() {
-        let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
-        let base_url = 
Url::from_file_path(extract_test_table(fixture_path)).unwrap();
+        let base_url = TestTable::V6Nonpartitioned.url();
         let hudi_table = Table::new(base_url.path(), HashMap::new());
         let fields: Vec<String> = hudi_table
             .get_latest_schema()
@@ -285,36 +285,66 @@ mod tests {
                 "_hoodie_record_key",
                 "_hoodie_partition_path",
                 "_hoodie_file_name",
-                "ts",
-                "uuid",
-                "rider",
-                "driver",
-                "fare",
-                "city"
+                "id",
+                "name",
+                "isActive",
+                "byteField",
+                "shortField",
+                "intField",
+                "longField",
+                "floatField",
+                "doubleField",
+                "decimalField",
+                "dateField",
+                "timestampField",
+                "binaryField",
+                "arrayField",
+                "array",
+                "arr_struct_f1",
+                "arr_struct_f2",
+                "mapField",
+                "key_value",
+                "key",
+                "value",
+                "map_field_value_struct_f1",
+                "map_field_value_struct_f2",
+                "structField",
+                "field1",
+                "field2",
+                "child_struct",
+                "child_field1",
+                "child_field2"
             ])
         );
     }
 
     #[test]
     fn hudi_table_read_file_slice() {
-        let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
-        let base_url = 
Url::from_file_path(extract_test_table(fixture_path)).unwrap();
+        let base_url = TestTable::V6Nonpartitioned.url();
         let mut hudi_table = Table::new(base_url.path(), HashMap::new());
         let batches = hudi_table.read_file_slice(
-            
"san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet",
+            
"a079bdb3-731c-4894-b855-abfcd6921007-0_0-203-274_20240418173551906.parquet",
         );
         assert_eq!(batches.len(), 1);
-        assert_eq!(batches.first().unwrap().num_rows(), 1);
-        assert_eq!(batches.first().unwrap().num_columns(), 11);
+        assert_eq!(batches.first().unwrap().num_rows(), 4);
+        assert_eq!(batches.first().unwrap().num_columns(), 21);
     }
 
     #[test]
     fn hudi_table_get_latest_file_paths() {
-        let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
-        let base_url = 
Url::from_file_path(extract_test_table(fixture_path)).unwrap();
+        let base_url = TestTable::V6ComplexkeygenHivestyle.url();
         let mut hudi_table = Table::new(base_url.path(), HashMap::new());
         assert_eq!(hudi_table.get_timeline().unwrap().instants.len(), 2);
-        assert_eq!(hudi_table.get_latest_file_paths().unwrap().len(), 5);
+        let actual: HashSet<String> =
+            HashSet::from_iter(hudi_table.get_latest_file_paths().unwrap());
+        let expected: HashSet<String> = HashSet::from_iter(vec![
+            
"byteField=10/shortField=300/a22e8257-e249-45e9-ba46-115bc85adcba-0_0-161-223_20240418173235694.parquet",
+            
"byteField=20/shortField=100/bb7c3a45-387f-490d-aab2-981c3f1a8ada-0_0-140-198_20240418173213674.parquet",
+            
"byteField=30/shortField=100/4668e35e-bff8-4be9-9ff2-e7fb17ecb1a7-0_1-161-224_20240418173235694.parquet",
+        ]
+            .into_iter().map(|f| { join_url_segments(&base_url, 
&[f]).unwrap().to_string() })
+            .collect::<Vec<_>>());
+        assert_eq!(actual, expected);
     }
 
     #[test]
diff --git a/crates/core/src/timeline/mod.rs b/crates/core/src/timeline/mod.rs
index e7f8010..311751a 100644
--- a/crates/core/src/timeline/mod.rs
+++ b/crates/core/src/timeline/mod.rs
@@ -138,17 +138,16 @@ mod tests {
 
     use url::Url;
 
-    use crate::test_utils::extract_test_table;
+    use hudi_tests::TestTable;
+
     use crate::timeline::{Instant, State, Timeline};
 
     #[tokio::test]
     async fn read_latest_schema() {
-        let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
-        let target_table_path = extract_test_table(fixture_path);
-        let base_url = 
Url::from_file_path(canonicalize(target_table_path).unwrap()).unwrap();
+        let base_url = TestTable::V6Nonpartitioned.url();
         let timeline = Timeline::new(base_url).await.unwrap();
         let table_schema = timeline.get_latest_schema().await.unwrap();
-        assert_eq!(table_schema.fields.len(), 11)
+        assert_eq!(table_schema.fields.len(), 21)
     }
 
     #[tokio::test]
diff --git a/crates/datafusion/Cargo.toml b/crates/datafusion/Cargo.toml
index e1a4560..4f250ff 100644
--- a/crates/datafusion/Cargo.toml
+++ b/crates/datafusion/Cargo.toml
@@ -17,17 +17,18 @@
 
 [package]
 name = "hudi-datafusion"
-version = "0.1.0"
+version.workspace = true
 edition.workspace = true
 license.workspace = true
 rust-version.workspace = true
 
 [dependencies]
-hudi-core = { path = "../core"}
+hudi-core = { path = "../core" }
+hudi-tests = { path = "../tests" }
 # arrow
 arrow = { workspace = true }
 arrow-arith = { workspace = true }
-arrow-array = { workspace = true , features = ["chrono-tz"]}
+arrow-array = { workspace = true, features = ["chrono-tz"] }
 arrow-buffer = { workspace = true }
 arrow-cast = { workspace = true }
 arrow-ipc = { workspace = true }
diff --git a/crates/hudi/Cargo.toml b/crates/hudi/Cargo.toml
index 5672e85..b6a08a8 100644
--- a/crates/hudi/Cargo.toml
+++ b/crates/hudi/Cargo.toml
@@ -22,7 +22,5 @@ edition.workspace = true
 license.workspace = true
 rust-version.workspace = true
 
-# See more keys and their definitions at 
https://doc.rust-lang.org/cargo/reference/manifest.html
-
 [dependencies]
-hudi-core = { path = "../core"}
+hudi-core = { path = "../core" }
diff --git a/crates/hudi/Cargo.toml b/crates/tests/Cargo.toml
similarity index 86%
copy from crates/hudi/Cargo.toml
copy to crates/tests/Cargo.toml
index 5672e85..b6efe85 100644
--- a/crates/hudi/Cargo.toml
+++ b/crates/tests/Cargo.toml
@@ -16,13 +16,13 @@
 # under the License.
 
 [package]
-name = "hudi"
+name = "hudi-tests"
 version.workspace = true
 edition.workspace = true
 license.workspace = true
 rust-version.workspace = true
 
-# See more keys and their definitions at 
https://doc.rust-lang.org/cargo/reference/manifest.html
-
 [dependencies]
-hudi-core = { path = "../core"}
+tempfile = "3.10.1"
+zip-extract = "0.1.3"
+url = { workspace = true }
diff --git a/crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql 
b/crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql
new file mode 100644
index 0000000..77a1fa7
--- /dev/null
+++ b/crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_complexkeygen_hivestyle (
+                                            id INT,
+                                            name STRING,
+                                            isActive BOOLEAN,
+                                            intField INT,
+                                            longField LONG,
+                                            floatField FLOAT,
+                                            doubleField DOUBLE,
+                                            decimalField DECIMAL(10,5),
+                                            dateField DATE,
+                                            timestampField TIMESTAMP,
+                                            binaryField BINARY,
+                                            arrayField 
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>,
+                                            mapField MAP<STRING, 
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
+                                            structField STRUCT<
+                                                field1: STRING,
+                                            field2: INT,
+                                            child_struct: STRUCT<
+                                                child_field1: DOUBLE,
+                                            child_field2: BOOLEAN
+                                                >
+                                                >,
+                                            byteField BYTE,
+                                            shortField SHORT
+)
+    USING HUDI
+TBLPROPERTIES (
+    type = 'cow',
+    primaryKey = 'id,name',
+    preCombineField = 'longField',
+    'hoodie.metadata.enable' = 'false',
+    'hoodie.datasource.write.hive_style_partitioning' = 'true'
+)
+PARTITIONED BY (byteField, shortField);
+
+INSERT INTO v6_complexkeygen_hivestyle VALUES
+                                           (1, 'Alice', true, 15000, 
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE), 
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+                                            ARRAY(STRUCT('red', 100), 
STRUCT('blue', 200), STRUCT('green', 300)),
+                                            MAP('key1', STRUCT(123.456, true), 
'key2', STRUCT(789.012, false)),
+                                            STRUCT('Alice', 30, 
STRUCT(123.456, true)),
+                                            10, 300
+                                           ),
+                                           (2, 'Bob', false, 25000, 
9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE), 
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+                                            ARRAY(STRUCT('yellow', 400), 
STRUCT('purple', 500)),
+                                            MAP('key3', STRUCT(234.567, true), 
'key4', STRUCT(567.890, false)),
+                                            STRUCT('Bob', 40, STRUCT(789.012, 
false)),
+                                            20, 100
+                                           ),
+                                           (3, 'Carol', true, 35000, 
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE), 
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS 
BINARY),
+                                            ARRAY(STRUCT('black', 600), 
STRUCT('white', 700), STRUCT('pink', 800)),
+                                            MAP('key5', STRUCT(345.678, true), 
'key6', STRUCT(654.321, false)),
+                                            STRUCT('Carol', 25, 
STRUCT(456.789, true)),
+                                            10, 300
+                                           );
+
+INSERT INTO v6_complexkeygen_hivestyle VALUES
+                                           (1, 'Alice', false, 15000, 
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE), 
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+                                            ARRAY(STRUCT('red', 100), 
STRUCT('blue', 200), STRUCT('green', 300)),
+                                            MAP('key1', STRUCT(123.456, true), 
'key2', STRUCT(789.012, false)),
+                                            STRUCT('Alice', 30, 
STRUCT(123.456, true)),
+                                            10, 300
+                                           ),
+                                           (4, 'Diana', true, 45000, 
987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE), 
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+                                            ARRAY(STRUCT('orange', 900), 
STRUCT('gray', 1000)),
+                                            MAP('key7', STRUCT(456.789, true), 
'key8', STRUCT(123.456, false)),
+                                            STRUCT('Diana', 50, 
STRUCT(987.654, true)),
+                                            30, 100
+                                           );
diff --git a/crates/tests/data/tables/v6_complexkeygen_hivestyle.zip 
b/crates/tests/data/tables/v6_complexkeygen_hivestyle.zip
new file mode 100644
index 0000000..6f3dbb9
Binary files /dev/null and 
b/crates/tests/data/tables/v6_complexkeygen_hivestyle.zip differ
diff --git a/crates/core/src/storage/file_stats.rs 
b/crates/tests/data/tables/v6_empty.sql
similarity index 74%
copy from crates/core/src/storage/file_stats.rs
copy to crates/tests/data/tables/v6_empty.sql
index ec63c14..6db4624 100644
--- a/crates/core/src/storage/file_stats.rs
+++ b/crates/tests/data/tables/v6_empty.sql
@@ -17,7 +17,14 @@
  * under the License.
  */
 
-#[derive(Clone, Debug, Default)]
-pub struct FileStats {
-    pub num_records: i64,
-}
+create table v6_empty (
+                          id INT,
+                          name STRING,
+                          isActive BOOLEAN
+)
+    USING HUDI
+    TBLPROPERTIES (
+        type = 'cow',
+        primaryKey = 'id',
+        'hoodie.metadata.enable' = 'false'
+);
diff --git a/crates/tests/data/tables/v6_empty.zip 
b/crates/tests/data/tables/v6_empty.zip
new file mode 100644
index 0000000..a4a1151
Binary files /dev/null and b/crates/tests/data/tables/v6_empty.zip differ
diff --git a/crates/tests/data/tables/v6_nonpartitioned.datagen.sql 
b/crates/tests/data/tables/v6_nonpartitioned.datagen.sql
new file mode 100644
index 0000000..d581dfa
--- /dev/null
+++ b/crates/tests/data/tables/v6_nonpartitioned.datagen.sql
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_nonpartitioned (
+                                   id INT,
+                                   name STRING,
+                                   isActive BOOLEAN,
+                                   byteField BYTE,
+                                   shortField SHORT,
+                                   intField INT,
+                                   longField LONG,
+                                   floatField FLOAT,
+                                   doubleField DOUBLE,
+                                   decimalField DECIMAL(10,5),
+                                   dateField DATE,
+                                   timestampField TIMESTAMP,
+                                   binaryField BINARY,
+                                   arrayField ARRAY<STRUCT<arr_struct_f1: 
STRING, arr_struct_f2: INT>>,  -- Array of structs
+                                   mapField MAP<STRING, 
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>, 
 -- Map with struct values
+                                   structField STRUCT<
+                                       field1: STRING,
+                                   field2: INT,
+                                   child_struct: STRUCT<
+                                       child_field1: DOUBLE,
+                                   child_field2: BOOLEAN
+                                       >
+                                       >
+)
+    USING HUDI
+TBLPROPERTIES (
+    type = 'cow',
+    primaryKey = 'id',
+    preCombineField = 'longField',
+    'hoodie.metadata.enable' = 'false'
+);
+
+INSERT INTO v6_nonpartitioned VALUES
+                                  (1, 'Alice', true, 1, 300, 15000, 
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE), 
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+                                   ARRAY(STRUCT('red', 100), STRUCT('blue', 
200), STRUCT('green', 300)),
+                                   MAP('key1', STRUCT(123.456, true), 'key2', 
STRUCT(789.012, false)),
+                                   STRUCT('Alice', 30, STRUCT(123.456, true))
+                                  ),
+                                  (2, 'Bob', false, 0, 100, 25000, 9876543210, 
2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE), CAST('2023-04-02 
13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+                                   ARRAY(STRUCT('yellow', 400), 
STRUCT('purple', 500)),
+                                   MAP('key3', STRUCT(234.567, true), 'key4', 
STRUCT(567.890, false)),
+                                   STRUCT('Bob', 40, STRUCT(789.012, false))
+                                  ),
+                                  (3, 'Carol', true, 1, 200, 35000, 
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE), 
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS 
BINARY),
+                                   ARRAY(STRUCT('black', 600), STRUCT('white', 
700), STRUCT('pink', 800)),
+                                   MAP('key5', STRUCT(345.678, true), 'key6', 
STRUCT(654.321, false)),
+                                   STRUCT('Carol', 25, STRUCT(456.789, true))
+                                  );
+
+INSERT INTO v6_nonpartitioned VALUES
+                                  (1, 'Alice', false, 1, 300, 15000, 
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE), 
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+                                   ARRAY(STRUCT('red', 100), STRUCT('blue', 
200), STRUCT('green', 300)),
+                                   MAP('key1', STRUCT(123.456, true), 'key2', 
STRUCT(789.012, false)),
+                                   STRUCT('Alice', 30, STRUCT(123.456, true))
+                                  ),
+                                  (4, 'Diana', true, 1, 500, 45000, 987654321, 
4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE), CAST('2023-04-04 15:04:00' 
AS TIMESTAMP), CAST('new binary data' AS BINARY),
+                                   ARRAY(STRUCT('orange', 900), STRUCT('gray', 
1000)),
+                                   MAP('key7', STRUCT(456.789, true), 'key8', 
STRUCT(123.456, false)),
+                                   STRUCT('Diana', 50, STRUCT(987.654, true))
+                                  );
diff --git a/crates/tests/data/tables/v6_nonpartitioned.zip 
b/crates/tests/data/tables/v6_nonpartitioned.zip
new file mode 100644
index 0000000..4675f83
Binary files /dev/null and b/crates/tests/data/tables/v6_nonpartitioned.zip 
differ
diff --git 
a/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql 
b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql
new file mode 100644
index 0000000..de37ffc
--- /dev/null
+++ 
b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_simplekeygen_hivestyle_no_metafields (
+    id INT,
+    name STRING,
+    isActive BOOLEAN,
+    shortField SHORT,
+    intField INT,
+    longField LONG,
+    floatField FLOAT,
+    doubleField DOUBLE,
+    decimalField DECIMAL(10,5),
+    dateField DATE,
+    timestampField TIMESTAMP,
+    binaryField BINARY,
+    arrayField ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>,  -- 
Array of structs
+    mapField MAP<STRING, STRUCT<map_field_value_struct_f1: DOUBLE, 
map_field_value_struct_f2: BOOLEAN>>,  -- Map with struct values
+    structField STRUCT<
+        field1: STRING,
+        field2: INT,
+        child_struct: STRUCT<
+            child_field1: DOUBLE,
+            child_field2: BOOLEAN
+        >
+    >,
+    byteField BYTE
+)
+USING HUDI
+TBLPROPERTIES (
+    type = 'cow',
+    primaryKey = 'id',
+    preCombineField = 'longField',
+    'hoodie.metadata.enable' = 'false',
+    'hoodie.datasource.write.hive_style_partitioning' = 'true',
+    'hoodie.datasource.write.drop.partition.columns' = 'false',
+    'hoodie.populate.meta.fields' = 'false'
+)
+PARTITIONED BY (byteField);
+
+INSERT INTO v6_simplekeygen_hivestyle_no_metafields VALUES
+(1, 'Alice', false, 300, 15000, 1234567890, 1.0, 3.14159, 12345.67890, 
CAST('2023-04-01' AS DATE), CAST('2023-04-01 12:01:00' AS TIMESTAMP), 
CAST('binary data' AS BINARY),
+    ARRAY(STRUCT('red', 100), STRUCT('blue', 200), STRUCT('green', 300)),
+    MAP('key1', STRUCT(123.456, true), 'key2', STRUCT(789.012, false)),
+    STRUCT('Alice', 30, STRUCT(123.456, true)),
+    10
+),
+(2, 'Bob', false, 100, 25000, 9876543210, 2.0, 2.71828, 67890.12345, 
CAST('2023-04-02' AS DATE), CAST('2023-04-02 13:02:00' AS TIMESTAMP), 
CAST('more binary data' AS BINARY),
+    ARRAY(STRUCT('yellow', 400), STRUCT('purple', 500)),
+    MAP('key3', STRUCT(234.567, true), 'key4', STRUCT(567.890, false)),
+    STRUCT('Bob', 40, STRUCT(789.012, false)),
+    20
+),
+(3, 'Carol', true, 200, 35000, 1928374650, 3.0, 1.41421, 11111.22222, 
CAST('2023-04-03' AS DATE), CAST('2023-04-03 14:03:00' AS TIMESTAMP), 
CAST('even more binary data' AS BINARY),
+    ARRAY(STRUCT('black', 600), STRUCT('white', 700), STRUCT('pink', 800)),
+    MAP('key5', STRUCT(345.678, true), 'key6', STRUCT(654.321, false)),
+    STRUCT('Carol', 25, STRUCT(456.789, true)),
+    10
+),
+(4, 'Diana', true, 500, 45000, 987654321, 4.0, 2.468, 65432.12345, 
CAST('2023-04-04' AS DATE), CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new 
binary data' AS BINARY),
+    ARRAY(STRUCT('orange', 900), STRUCT('gray', 1000)),
+    MAP('key7', STRUCT(456.789, true), 'key8', STRUCT(123.456, false)),
+    STRUCT('Diana', 50, STRUCT(987.654, true)),
+    30
+);
diff --git 
a/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.zip 
b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.zip
new file mode 100644
index 0000000..dcc76c4
Binary files /dev/null and 
b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.zip differ
diff --git a/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql 
b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql
new file mode 100644
index 0000000..f1d8c9e
--- /dev/null
+++ b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_simplekeygen_nonhivestyle (
+                                              id INT,
+                                              name STRING,
+                                              isActive BOOLEAN,
+                                              shortField SHORT,
+                                              intField INT,
+                                              longField LONG,
+                                              floatField FLOAT,
+                                              doubleField DOUBLE,
+                                              decimalField DECIMAL(10,5),
+                                              dateField DATE,
+                                              timestampField TIMESTAMP,
+                                              binaryField BINARY,
+                                              arrayField 
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>,  -- Array of structs
+                                              mapField MAP<STRING, 
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>, 
 -- Map with struct values
+                                              structField STRUCT<
+                                                  field1: STRING,
+                                              field2: INT,
+                                              child_struct: STRUCT<
+                                                  child_field1: DOUBLE,
+                                              child_field2: BOOLEAN
+                                                  >
+                                                  >,
+                                              byteField BYTE
+)
+    USING HUDI
+TBLPROPERTIES (
+    type = 'cow',
+    primaryKey = 'id',
+    preCombineField = 'longField',
+    'hoodie.metadata.enable' = 'false',
+    'hoodie.datasource.write.hive_style_partitioning' = 'false',
+    'hoodie.datasource.write.drop.partition.columns' = 'false'
+)
+PARTITIONED BY (byteField);
+
+INSERT INTO v6_simplekeygen_nonhivestyle VALUES
+                                             (1, 'Alice', true, 300, 15000, 
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE), 
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+                                              ARRAY(STRUCT('red', 100), 
STRUCT('blue', 200), STRUCT('green', 300)),
+                                              MAP('key1', STRUCT(123.456, 
true), 'key2', STRUCT(789.012, false)),
+                                              STRUCT('Alice', 30, 
STRUCT(123.456, true)),
+                                              10
+                                             ),
+                                             (2, 'Bob', false, 100, 25000, 
9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE), 
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+                                              ARRAY(STRUCT('yellow', 400), 
STRUCT('purple', 500)),
+                                              MAP('key3', STRUCT(234.567, 
true), 'key4', STRUCT(567.890, false)),
+                                              STRUCT('Bob', 40, 
STRUCT(789.012, false)),
+                                              20
+                                             ),
+                                             (3, 'Carol', true, 200, 35000, 
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE), 
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS 
BINARY),
+                                              ARRAY(STRUCT('black', 600), 
STRUCT('white', 700), STRUCT('pink', 800)),
+                                              MAP('key5', STRUCT(345.678, 
true), 'key6', STRUCT(654.321, false)),
+                                              STRUCT('Carol', 25, 
STRUCT(456.789, true)),
+                                              10
+                                             );
+
+INSERT INTO v6_simplekeygen_nonhivestyle VALUES
+                                             (1, 'Alice', false, 300, 15000, 
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE), 
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+                                              ARRAY(STRUCT('red', 100), 
STRUCT('blue', 200), STRUCT('green', 300)),
+                                              MAP('key1', STRUCT(123.456, 
true), 'key2', STRUCT(789.012, false)),
+                                              STRUCT('Alice', 30, 
STRUCT(123.456, true)),
+                                              10
+                                             ),
+                                             (4, 'Diana', true, 500, 45000, 
987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE), 
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+                                              ARRAY(STRUCT('orange', 900), 
STRUCT('gray', 1000)),
+                                              MAP('key7', STRUCT(456.789, 
true), 'key8', STRUCT(123.456, false)),
+                                              STRUCT('Diana', 50, 
STRUCT(987.654, true)),
+                                              30
+                                             );
diff --git a/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.zip 
b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.zip
new file mode 100644
index 0000000..4f05679
Binary files /dev/null and 
b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.zip differ
diff --git a/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.sql 
b/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.sql
new file mode 100644
index 0000000..35c5952
--- /dev/null
+++ b/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.sql
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_timebasedkeygen_nonhivestyle (
+                                                 id INT,
+                                                 name STRING,
+                                                 isActive BOOLEAN,
+                                                 byteField BYTE,
+                                                 shortField SHORT,
+                                                 intField INT,
+                                                 longField LONG,
+                                                 floatField FLOAT,
+                                                 doubleField DOUBLE,
+                                                 decimalField DECIMAL(10,5),
+                                                 dateField DATE,
+                                                 timestampField TIMESTAMP,
+                                                 binaryField BINARY,
+                                                 arrayField 
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>,  -- Array of structs
+                                                 mapField MAP<STRING, 
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>, 
 -- Map with struct values
+                                                 structField STRUCT<
+                                                     field1: STRING,
+                                                 field2: INT,
+                                                 child_struct: STRUCT<
+                                                     child_field1: DOUBLE,
+                                                 child_field2: BOOLEAN
+                                                     >
+                                                     >,
+                                                 ts_str STRING
+)
+    USING HUDI
+TBLPROPERTIES (
+    type = 'cow',
+    primaryKey = 'id',
+    preCombineField = 'longField',
+    'hoodie.metadata.enable' = 'false',
+    'hoodie.datasource.write.hive_style_partitioning' = 'false',
+    'hoodie.table.keygenerator.class' = 
'org.apache.hudi.keygen.TimestampBasedKeyGenerator',
+    'hoodie.keygen.timebased.timestamp.type' = 'DATE_STRING',
+    'hoodie.keygen.timebased.input.dateformat' = "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+    'hoodie.keygen.timebased.output.dateformat' = 'yyyy/MM/dd/HH'
+)
+PARTITIONED BY (ts_str);
+
+INSERT INTO v6_timebasedkeygen_nonhivestyle VALUES
+                                                (1, 'Alice', true, 10, 300, 
15000, 1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE), 
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+                                                 ARRAY(STRUCT('red', 100), 
STRUCT('blue', 200), STRUCT('green', 300)),
+                                                 MAP('key1', STRUCT(123.456, 
true), 'key2', STRUCT(789.012, false)),
+                                                 STRUCT('Alice', 30, 
STRUCT(123.456, true)),
+                                                 '2023-04-01T12:01:00.123Z'
+                                                ),
+                                                (2, 'Bob', false, 20, 100, 
25000, 9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE), 
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+                                                 ARRAY(STRUCT('yellow', 400), 
STRUCT('purple', 500)),
+                                                 MAP('key3', STRUCT(234.567, 
true), 'key4', STRUCT(567.890, false)),
+                                                 STRUCT('Bob', 40, 
STRUCT(789.012, false)),
+                                                 '2023-04-02T12:01:00.123Z'
+                                                ),
+                                                (3, 'Carol', true, 10, 300, 
35000, 1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE), 
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS 
BINARY),
+                                                 ARRAY(STRUCT('black', 600), 
STRUCT('white', 700), STRUCT('pink', 800)),
+                                                 MAP('key5', STRUCT(345.678, 
true), 'key6', STRUCT(654.321, false)),
+                                                 STRUCT('Carol', 25, 
STRUCT(456.789, true)),
+                                                 '2023-04-03T12:01:00.123Z'
+                                                );
+
+INSERT INTO v6_timebasedkeygen_nonhivestyle VALUES
+                                                (1, 'Alice', false, 10, 300, 
15000, 1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE), 
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+                                                 ARRAY(STRUCT('red', 100), 
STRUCT('blue', 200), STRUCT('green', 300)),
+                                                 MAP('key1', STRUCT(123.456, 
true), 'key2', STRUCT(789.012, false)),
+                                                 STRUCT('Alice', 30, 
STRUCT(123.456, true)),
+                                                 '2023-04-01T12:01:00.123Z'
+                                                ),
+                                                (4, 'Diana', true, 30, 100, 
45000, 987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE), 
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+                                                 ARRAY(STRUCT('orange', 900), 
STRUCT('gray', 1000)),
+                                                 MAP('key7', STRUCT(456.789, 
true), 'key8', STRUCT(123.456, false)),
+                                                 STRUCT('Diana', 50, 
STRUCT(987.654, true)),
+                                                 '2023-04-04T13:01:00.123Z'
+                                                );
diff --git a/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.zip 
b/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.zip
new file mode 100644
index 0000000..83b6816
Binary files /dev/null and 
b/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.zip differ
diff --git a/crates/tests/src/lib.rs b/crates/tests/src/lib.rs
new file mode 100644
index 0000000..e467818
--- /dev/null
+++ b/crates/tests/src/lib.rs
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use std::fs;
+use std::io::Cursor;
+use std::path::{Path, PathBuf};
+
+use tempfile::tempdir;
+use url::Url;
+
+pub mod utils;
+
+pub fn extract_test_table(zip_path: &Path) -> PathBuf {
+    let target_dir = tempdir().unwrap().path().to_path_buf();
+    let archive = fs::read(zip_path).unwrap();
+    zip_extract::extract(Cursor::new(archive), &target_dir, true).unwrap();
+    target_dir
+}
+
+pub enum TestTable {
+    V6ComplexkeygenHivestyle,
+    V6Nonpartitioned,
+}
+
+impl TestTable {
+    pub fn zip_path(&self) -> Box<Path> {
+        let dir = env!("CARGO_MANIFEST_DIR");
+        let data_path = Path::new(dir).join("data/tables");
+        match self {
+            Self::V6ComplexkeygenHivestyle => data_path
+                .join("v6_complexkeygen_hivestyle.zip")
+                .into_boxed_path(),
+            Self::V6Nonpartitioned => 
data_path.join("v6_nonpartitioned.zip").into_boxed_path(),
+        }
+    }
+
+    pub fn path(&self) -> String {
+        let zip_path = self.zip_path();
+        match self {
+            Self::V6ComplexkeygenHivestyle => extract_test_table(&zip_path)
+                .join("v6_complexkeygen_hivestyle")
+                .to_str()
+                .unwrap()
+                .to_string(),
+            Self::V6Nonpartitioned => extract_test_table(&zip_path)
+                .join("v6_nonpartitioned")
+                .to_str()
+                .unwrap()
+                .to_string(),
+        }
+    }
+
+    pub fn url(&self) -> Url {
+        Url::from_file_path(self.path()).unwrap()
+    }
+}
diff --git a/crates/core/src/test_utils.rs b/crates/tests/src/utils.rs
similarity index 77%
rename from crates/core/src/test_utils.rs
rename to crates/tests/src/utils.rs
index 94e3c4b..37e9cfb 100644
--- a/crates/core/src/test_utils.rs
+++ b/crates/tests/src/utils.rs
@@ -17,19 +17,6 @@
  * under the License.
  */
 
-use std::fs;
-use std::io::Cursor;
-use std::path::{Path, PathBuf};
-
-use tempfile::tempdir;
-
-pub fn extract_test_table(fixture_path: &Path) -> PathBuf {
-    let target_dir = tempdir().unwrap().path().to_path_buf();
-    let archive = fs::read(fixture_path).unwrap();
-    zip_extract::extract(Cursor::new(archive), &target_dir, true).unwrap();
-    target_dir
-}
-
 #[macro_export]
 macro_rules! assert_approx_eq {
     ($a:expr, $b:expr, $delta:expr) => {{
diff --git a/python/Cargo.toml b/python/Cargo.toml
index 613b010..3ce4986 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -17,8 +17,10 @@
 
 [package]
 name = "hudi-python"
-version = "0.1.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+rust-version.workspace = true
 
 [lib]
 name = "hudi"
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index 2dcfdeb..b1fd566 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -34,7 +34,7 @@ def _extract_testing_table(zip_file_path, target_path) -> str:
     ]
 )
 def get_sample_table(request, tmp_path) -> str:
-    fixture_path = "../crates/core/fixtures/table"
+    fixture_path = "tests/table"
     table_name = request.param
     zip_file_path = Path(fixture_path).joinpath(f"{table_name}.zip")
     return _extract_testing_table(zip_file_path, tmp_path)
diff --git a/crates/core/fixtures/table/0.x_cow_partitioned.zip 
b/python/tests/table/0.x_cow_partitioned.zip
similarity index 100%
rename from crates/core/fixtures/table/0.x_cow_partitioned.zip
rename to python/tests/table/0.x_cow_partitioned.zip

Reply via email to