This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 6d35b74 test: add tests crate and adopt testing tables (#30)
6d35b74 is described below
commit 6d35b74a15aedbd0b45a85203d0eddb8da3794c7
Author: Shiyan Xu <[email protected]>
AuthorDate: Sat Jun 29 18:36:11 2024 -0500
test: add tests crate and adopt testing tables (#30)
---
crates/core/Cargo.toml | 9 +-
crates/core/src/lib.rs | 1 -
crates/core/src/storage/file_info.rs | 2 +-
crates/core/src/storage/file_stats.rs | 2 +-
crates/core/src/storage/mod.rs | 54 ++++++++----
crates/core/src/storage/utils.rs | 58 ++++++++++++-
crates/core/src/table/fs_view.rs | 45 +++++-----
crates/core/src/table/mod.rs | 68 ++++++++++-----
crates/core/src/timeline/mod.rs | 9 +-
crates/datafusion/Cargo.toml | 7 +-
crates/hudi/Cargo.toml | 4 +-
crates/{hudi => tests}/Cargo.toml | 8 +-
.../tables/v6_complexkeygen_hivestyle.datagen.sql | 87 +++++++++++++++++++
.../data/tables/v6_complexkeygen_hivestyle.zip | Bin 0 -> 42914 bytes
.../data/tables/v6_empty.sql} | 15 +++-
crates/tests/data/tables/v6_empty.zip | Bin 0 -> 2258 bytes
.../data/tables/v6_nonpartitioned.datagen.sql | 80 ++++++++++++++++++
crates/tests/data/tables/v6_nonpartitioned.zip | Bin 0 -> 24851 bytes
...implekeygen_hivestyle_no_metafields.datagen.sql | 81 ++++++++++++++++++
.../v6_simplekeygen_hivestyle_no_metafields.zip | Bin 0 -> 17950 bytes
.../v6_simplekeygen_nonhivestyle.datagen.sql | 88 ++++++++++++++++++++
.../data/tables/v6_simplekeygen_nonhivestyle.zip | Bin 0 -> 38375 bytes
.../tables/v6_timebasedkeygen_nonhivestyle.sql | 92 +++++++++++++++++++++
.../tables/v6_timebasedkeygen_nonhivestyle.zip | Bin 0 -> 49127 bytes
crates/tests/src/lib.rs | 72 ++++++++++++++++
.../{core/src/test_utils.rs => tests/src/utils.rs} | 13 ---
python/Cargo.toml | 6 +-
python/tests/conftest.py | 2 +-
.../tests}/table/0.x_cow_partitioned.zip | Bin
29 files changed, 696 insertions(+), 107 deletions(-)
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index 98c12ab..6363804 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -17,16 +17,17 @@
[package]
name = "hudi-core"
-version = "0.1.0"
+version.workspace = true
edition.workspace = true
license.workspace = true
rust-version.workspace = true
[dependencies]
+hudi-tests = { path = "../tests" }
# arrow
arrow = { workspace = true }
arrow-arith = { workspace = true }
-arrow-array = { workspace = true , features = ["chrono-tz"]}
+arrow-array = { workspace = true, features = ["chrono-tz"] }
arrow-buffer = { workspace = true }
arrow-cast = { workspace = true }
arrow-ipc = { workspace = true }
@@ -68,7 +69,3 @@ async-recursion = { workspace = true }
async-trait = { workspace = true }
tokio = { workspace = true }
futures = { workspace = true }
-
-# test
-tempfile = "3.10.1"
-zip-extract = "0.1.3"
diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs
index d2c53ee..1586ff8 100644
--- a/crates/core/src/lib.rs
+++ b/crates/core/src/lib.rs
@@ -23,7 +23,6 @@ pub mod file_group;
pub mod table;
pub type HudiTable = Table;
mod storage;
-pub mod test_utils;
mod timeline;
pub fn crate_version() -> &'static str {
diff --git a/crates/core/src/storage/file_info.rs
b/crates/core/src/storage/file_info.rs
index 4bd178d..8a77048 100644
--- a/crates/core/src/storage/file_info.rs
+++ b/crates/core/src/storage/file_info.rs
@@ -17,7 +17,7 @@
* under the License.
*/
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct FileInfo {
pub uri: String,
pub name: String,
diff --git a/crates/core/src/storage/file_stats.rs
b/crates/core/src/storage/file_stats.rs
index ec63c14..19a7000 100644
--- a/crates/core/src/storage/file_stats.rs
+++ b/crates/core/src/storage/file_stats.rs
@@ -17,7 +17,7 @@
* under the License.
*/
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct FileStats {
pub num_records: i64,
}
diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs
index c8b7b34..b35f30d 100644
--- a/crates/core/src/storage/mod.rs
+++ b/crates/core/src/storage/mod.rs
@@ -132,8 +132,7 @@ impl Storage {
.objects
.into_iter()
.map(|obj_meta| FileInfo {
- uri: prefix_url
- .join(obj_meta.location.filename().unwrap())
+ uri: join_url_segments(&prefix_url,
&[obj_meta.location.filename().unwrap()])
.unwrap()
.to_string(),
name: obj_meta.location.filename().unwrap().to_string(),
@@ -172,6 +171,7 @@ mod tests {
use object_store::path::Path as ObjPath;
use url::Url;
+ use crate::storage::file_info::FileInfo;
use crate::storage::utils::join_url_segments;
use crate::storage::{get_leaf_dirs, Storage};
@@ -224,28 +224,50 @@ mod tests {
canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(),
)
.unwrap();
- let storage = Storage::new(base_url, HashMap::new());
- let file_names_1: Vec<String> = storage
- .list_files(None)
- .await
- .into_iter()
- .map(|file_info| file_info.name)
- .collect();
- assert_eq!(file_names_1, vec!["a.parquet"]);
- let file_names_2: Vec<String> = storage
+ let storage = Storage::new(base_url.clone(), HashMap::new());
+ let file_info_1: Vec<FileInfo> =
storage.list_files(None).await.into_iter().collect();
+ assert_eq!(
+ file_info_1,
+ vec![FileInfo {
+ uri: base_url.clone().join("a.parquet").unwrap().to_string(),
+ name: "a.parquet".to_string(),
+ size: 0,
+ }]
+ );
+ let file_info_2: Vec<FileInfo> = storage
.list_files(Some("part1"))
.await
.into_iter()
- .map(|file_info| file_info.name)
.collect();
- assert_eq!(file_names_2, vec!["b.parquet"]);
- let file_names_3: Vec<String> = storage
+ assert_eq!(
+ file_info_2,
+ vec![FileInfo {
+ uri: base_url
+ .clone()
+ .join("part1/b.parquet")
+ .unwrap()
+ .to_string(),
+ name: "b.parquet".to_string(),
+ size: 0,
+ }]
+ );
+ let file_info_3: Vec<FileInfo> = storage
.list_files(Some("part2/part22"))
.await
.into_iter()
- .map(|file_info| file_info.name)
.collect();
- assert_eq!(file_names_3, vec!["c.parquet"]);
+ assert_eq!(
+ file_info_3,
+ vec![FileInfo {
+ uri: base_url
+ .clone()
+ .join("part2/part22/c.parquet")
+ .unwrap()
+ .to_string(),
+ name: "c.parquet".to_string(),
+ size: 0,
+ }]
+ );
}
#[tokio::test]
diff --git a/crates/core/src/storage/utils.rs b/crates/core/src/storage/utils.rs
index cf81dc0..d1f8c4a 100644
--- a/crates/core/src/storage/utils.rs
+++ b/crates/core/src/storage/utils.rs
@@ -17,8 +17,9 @@
* under the License.
*/
-use anyhow::{anyhow, Result};
use std::path::Path;
+
+use anyhow::{anyhow, Result};
use url::{ParseError, Url};
pub fn split_filename(filename: &str) -> Result<(String, String)> {
@@ -46,9 +47,58 @@ pub fn join_url_segments(base_url: &Url, segments: &[&str])
-> Result<Url> {
url.path_segments_mut().unwrap().pop();
}
- url.path_segments_mut()
- .map_err(|_| ParseError::RelativeUrlWithoutBase)?
- .extend(segments);
+ for &seg in segments {
+ let segs: Vec<_> = seg.split('/').filter(|&s| !s.is_empty()).collect();
+ url.path_segments_mut()
+ .map_err(|_| ParseError::RelativeUrlWithoutBase)?
+ .extend(segs);
+ }
Ok(url)
}
+
+#[cfg(test)]
+mod tests {
+ use std::str::FromStr;
+
+ use url::Url;
+
+ use crate::storage::utils::join_url_segments;
+
+ #[test]
+ fn join_base_url_with_segments() {
+ let base_url = Url::from_str("file:///base").unwrap();
+
+ assert_eq!(
+ join_url_segments(&base_url, &["foo"]).unwrap(),
+ Url::from_str("file:///base/foo").unwrap()
+ );
+
+ assert_eq!(
+ join_url_segments(&base_url, &["/foo"]).unwrap(),
+ Url::from_str("file:///base/foo").unwrap()
+ );
+
+ assert_eq!(
+ join_url_segments(&base_url, &["/foo", "bar/", "/baz/"]).unwrap(),
+ Url::from_str("file:///base/foo/bar/baz").unwrap()
+ );
+
+ assert_eq!(
+ join_url_segments(&base_url, &["foo/", "", "bar/baz"]).unwrap(),
+ Url::from_str("file:///base/foo/bar/baz").unwrap()
+ );
+
+ assert_eq!(
+ join_url_segments(&base_url, &["foo1/bar1", "foo2/bar2"]).unwrap(),
+ Url::from_str("file:///base/foo1/bar1/foo2/bar2").unwrap()
+ );
+ }
+
+ #[test]
+ fn join_failed_due_to_invalid_base() {
+ let base_url = Url::from_str("foo:text/plain,bar").unwrap();
+ let result = join_url_segments(&base_url, &["foo"]);
+ assert!(result.is_err());
+ }
+}
diff --git a/crates/core/src/table/fs_view.rs b/crates/core/src/table/fs_view.rs
index 5f9cf0f..c7c20e1 100644
--- a/crates/core/src/table/fs_view.rs
+++ b/crates/core/src/table/fs_view.rs
@@ -185,53 +185,50 @@ async fn get_partitions_and_file_groups(
#[cfg(test)]
mod tests {
use std::collections::HashSet;
- use std::fs::canonicalize;
- use std::path::Path;
- use url::Url;
+ use hudi_tests::TestTable;
use crate::table::fs_view::FileSystemView;
- use crate::test_utils::extract_test_table;
#[tokio::test]
- async fn get_partition_paths() {
- let fixture_path =
-
canonicalize(Path::new("fixtures/table/0.x_cow_partitioned.zip")).unwrap();
- let base_url =
Url::from_file_path(extract_test_table(&fixture_path)).unwrap();
+ async fn get_partition_paths_for_nonpartitioned_table() {
+ let base_url = TestTable::V6Nonpartitioned.url();
+ let fs_view = FileSystemView::new(base_url);
+ let partition_paths = fs_view.get_partition_paths().await.unwrap();
+ let partition_path_set: HashSet<&str> =
+ HashSet::from_iter(partition_paths.iter().map(|p| p.as_str()));
+ assert_eq!(partition_path_set, HashSet::new(),)
+ }
+
+ #[tokio::test]
+ async fn get_partition_paths_for_complexkeygen_table() {
+ let base_url = TestTable::V6ComplexkeygenHivestyle.url();
let fs_view = FileSystemView::new(base_url);
let partition_paths = fs_view.get_partition_paths().await.unwrap();
let partition_path_set: HashSet<&str> =
HashSet::from_iter(partition_paths.iter().map(|p| p.as_str()));
assert_eq!(
partition_path_set,
- HashSet::from_iter(vec!["chennai", "sao_paulo", "san_francisco"])
+ HashSet::from_iter(vec![
+ "byteField=10/shortField=300",
+ "byteField=20/shortField=100",
+ "byteField=30/shortField=100"
+ ])
)
}
#[test]
fn get_latest_file_slices() {
- let fixture_path =
-
canonicalize(Path::new("fixtures/table/0.x_cow_partitioned.zip")).unwrap();
- let base_url =
Url::from_file_path(extract_test_table(&fixture_path)).unwrap();
+ let base_url = TestTable::V6Nonpartitioned.url();
let mut fs_view = FileSystemView::new(base_url);
fs_view.load_file_groups();
let file_slices = fs_view.get_latest_file_slices();
- assert_eq!(file_slices.len(), 5);
+ assert_eq!(file_slices.len(), 1);
let mut fg_ids = Vec::new();
for f in file_slices {
let fp = f.file_group_id();
fg_ids.push(fp);
}
- let actual: HashSet<&str> = fg_ids.into_iter().collect();
- assert_eq!(
- actual,
- HashSet::from_iter(vec![
- "780b8586-3ad0-48ef-a6a1-d2217845ce4a-0",
- "d9082ffd-2eb1-4394-aefc-deb4a61ecc57-0",
- "ee915c68-d7f8-44f6-9759-e691add290d8-0",
- "68d3c349-f621-4cd8-9e8b-c6dd8eb20d08-0",
- "5a226868-2934-4f84-a16f-55124630c68d-0"
- ])
- );
+ assert_eq!(fg_ids, vec!["a079bdb3-731c-4894-b855-abfcd6921007-0"])
}
}
diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs
index 681ef05..d26decc 100644
--- a/crates/core/src/table/mod.rs
+++ b/crates/core/src/table/mod.rs
@@ -255,21 +255,21 @@ impl ProvidesTableMetadata for Table {
#[cfg(test)]
mod tests {
- use std::collections::HashMap;
+ use std::collections::{HashMap, HashSet};
use std::fs::canonicalize;
use std::path::Path;
- use url::Url;
+ use hudi_tests::TestTable;
+
+ use crate::storage::utils::join_url_segments;
use crate::table::config::BaseFileFormat::Parquet;
use crate::table::config::TableType::CopyOnWrite;
use crate::table::metadata::ProvidesTableMetadata;
use crate::table::Table;
- use crate::test_utils::extract_test_table;
#[test]
fn hudi_table_get_latest_schema() {
- let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
- let base_url =
Url::from_file_path(extract_test_table(fixture_path)).unwrap();
+ let base_url = TestTable::V6Nonpartitioned.url();
let hudi_table = Table::new(base_url.path(), HashMap::new());
let fields: Vec<String> = hudi_table
.get_latest_schema()
@@ -285,36 +285,66 @@ mod tests {
"_hoodie_record_key",
"_hoodie_partition_path",
"_hoodie_file_name",
- "ts",
- "uuid",
- "rider",
- "driver",
- "fare",
- "city"
+ "id",
+ "name",
+ "isActive",
+ "byteField",
+ "shortField",
+ "intField",
+ "longField",
+ "floatField",
+ "doubleField",
+ "decimalField",
+ "dateField",
+ "timestampField",
+ "binaryField",
+ "arrayField",
+ "array",
+ "arr_struct_f1",
+ "arr_struct_f2",
+ "mapField",
+ "key_value",
+ "key",
+ "value",
+ "map_field_value_struct_f1",
+ "map_field_value_struct_f2",
+ "structField",
+ "field1",
+ "field2",
+ "child_struct",
+ "child_field1",
+ "child_field2"
])
);
}
#[test]
fn hudi_table_read_file_slice() {
- let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
- let base_url =
Url::from_file_path(extract_test_table(fixture_path)).unwrap();
+ let base_url = TestTable::V6Nonpartitioned.url();
let mut hudi_table = Table::new(base_url.path(), HashMap::new());
let batches = hudi_table.read_file_slice(
-
"san_francisco/780b8586-3ad0-48ef-a6a1-d2217845ce4a-0_0-8-0_20240402123035233.parquet",
+
"a079bdb3-731c-4894-b855-abfcd6921007-0_0-203-274_20240418173551906.parquet",
);
assert_eq!(batches.len(), 1);
- assert_eq!(batches.first().unwrap().num_rows(), 1);
- assert_eq!(batches.first().unwrap().num_columns(), 11);
+ assert_eq!(batches.first().unwrap().num_rows(), 4);
+ assert_eq!(batches.first().unwrap().num_columns(), 21);
}
#[test]
fn hudi_table_get_latest_file_paths() {
- let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
- let base_url =
Url::from_file_path(extract_test_table(fixture_path)).unwrap();
+ let base_url = TestTable::V6ComplexkeygenHivestyle.url();
let mut hudi_table = Table::new(base_url.path(), HashMap::new());
assert_eq!(hudi_table.get_timeline().unwrap().instants.len(), 2);
- assert_eq!(hudi_table.get_latest_file_paths().unwrap().len(), 5);
+ let actual: HashSet<String> =
+ HashSet::from_iter(hudi_table.get_latest_file_paths().unwrap());
+ let expected: HashSet<String> = HashSet::from_iter(vec![
+
"byteField=10/shortField=300/a22e8257-e249-45e9-ba46-115bc85adcba-0_0-161-223_20240418173235694.parquet",
+
"byteField=20/shortField=100/bb7c3a45-387f-490d-aab2-981c3f1a8ada-0_0-140-198_20240418173213674.parquet",
+
"byteField=30/shortField=100/4668e35e-bff8-4be9-9ff2-e7fb17ecb1a7-0_1-161-224_20240418173235694.parquet",
+ ]
+ .into_iter().map(|f| { join_url_segments(&base_url,
&[f]).unwrap().to_string() })
+ .collect::<Vec<_>>());
+ assert_eq!(actual, expected);
}
#[test]
diff --git a/crates/core/src/timeline/mod.rs b/crates/core/src/timeline/mod.rs
index e7f8010..311751a 100644
--- a/crates/core/src/timeline/mod.rs
+++ b/crates/core/src/timeline/mod.rs
@@ -138,17 +138,16 @@ mod tests {
use url::Url;
- use crate::test_utils::extract_test_table;
+ use hudi_tests::TestTable;
+
use crate::timeline::{Instant, State, Timeline};
#[tokio::test]
async fn read_latest_schema() {
- let fixture_path = Path::new("fixtures/table/0.x_cow_partitioned.zip");
- let target_table_path = extract_test_table(fixture_path);
- let base_url =
Url::from_file_path(canonicalize(target_table_path).unwrap()).unwrap();
+ let base_url = TestTable::V6Nonpartitioned.url();
let timeline = Timeline::new(base_url).await.unwrap();
let table_schema = timeline.get_latest_schema().await.unwrap();
- assert_eq!(table_schema.fields.len(), 11)
+ assert_eq!(table_schema.fields.len(), 21)
}
#[tokio::test]
diff --git a/crates/datafusion/Cargo.toml b/crates/datafusion/Cargo.toml
index e1a4560..4f250ff 100644
--- a/crates/datafusion/Cargo.toml
+++ b/crates/datafusion/Cargo.toml
@@ -17,17 +17,18 @@
[package]
name = "hudi-datafusion"
-version = "0.1.0"
+version.workspace = true
edition.workspace = true
license.workspace = true
rust-version.workspace = true
[dependencies]
-hudi-core = { path = "../core"}
+hudi-core = { path = "../core" }
+hudi-tests = { path = "../tests" }
# arrow
arrow = { workspace = true }
arrow-arith = { workspace = true }
-arrow-array = { workspace = true , features = ["chrono-tz"]}
+arrow-array = { workspace = true, features = ["chrono-tz"] }
arrow-buffer = { workspace = true }
arrow-cast = { workspace = true }
arrow-ipc = { workspace = true }
diff --git a/crates/hudi/Cargo.toml b/crates/hudi/Cargo.toml
index 5672e85..b6a08a8 100644
--- a/crates/hudi/Cargo.toml
+++ b/crates/hudi/Cargo.toml
@@ -22,7 +22,5 @@ edition.workspace = true
license.workspace = true
rust-version.workspace = true
-# See more keys and their definitions at
https://doc.rust-lang.org/cargo/reference/manifest.html
-
[dependencies]
-hudi-core = { path = "../core"}
+hudi-core = { path = "../core" }
diff --git a/crates/hudi/Cargo.toml b/crates/tests/Cargo.toml
similarity index 86%
copy from crates/hudi/Cargo.toml
copy to crates/tests/Cargo.toml
index 5672e85..b6efe85 100644
--- a/crates/hudi/Cargo.toml
+++ b/crates/tests/Cargo.toml
@@ -16,13 +16,13 @@
# under the License.
[package]
-name = "hudi"
+name = "hudi-tests"
version.workspace = true
edition.workspace = true
license.workspace = true
rust-version.workspace = true
-# See more keys and their definitions at
https://doc.rust-lang.org/cargo/reference/manifest.html
-
[dependencies]
-hudi-core = { path = "../core"}
+tempfile = "3.10.1"
+zip-extract = "0.1.3"
+url = { workspace = true }
diff --git a/crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql
b/crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql
new file mode 100644
index 0000000..77a1fa7
--- /dev/null
+++ b/crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_complexkeygen_hivestyle (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>,
+ mapField MAP<STRING,
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >,
+ byteField BYTE,
+ shortField SHORT
+)
+ USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id,name',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.datasource.write.hive_style_partitioning' = 'true'
+)
+PARTITIONED BY (byteField, shortField);
+
+INSERT INTO v6_complexkeygen_hivestyle VALUES
+ (1, 'Alice', true, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true),
'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10, 300
+ ),
+ (2, 'Bob', false, 25000,
9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE),
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400),
STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567, true),
'key4', STRUCT(567.890, false)),
+ STRUCT('Bob', 40, STRUCT(789.012,
false)),
+ 20, 100
+ ),
+ (3, 'Carol', true, 35000,
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE),
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS
BINARY),
+ ARRAY(STRUCT('black', 600),
STRUCT('white', 700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678, true),
'key6', STRUCT(654.321, false)),
+ STRUCT('Carol', 25,
STRUCT(456.789, true)),
+ 10, 300
+ );
+
+INSERT INTO v6_complexkeygen_hivestyle VALUES
+ (1, 'Alice', false, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true),
'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10, 300
+ ),
+ (4, 'Diana', true, 45000,
987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE),
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900),
STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789, true),
'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50,
STRUCT(987.654, true)),
+ 30, 100
+ );
diff --git a/crates/tests/data/tables/v6_complexkeygen_hivestyle.zip
b/crates/tests/data/tables/v6_complexkeygen_hivestyle.zip
new file mode 100644
index 0000000..6f3dbb9
Binary files /dev/null and
b/crates/tests/data/tables/v6_complexkeygen_hivestyle.zip differ
diff --git a/crates/core/src/storage/file_stats.rs
b/crates/tests/data/tables/v6_empty.sql
similarity index 74%
copy from crates/core/src/storage/file_stats.rs
copy to crates/tests/data/tables/v6_empty.sql
index ec63c14..6db4624 100644
--- a/crates/core/src/storage/file_stats.rs
+++ b/crates/tests/data/tables/v6_empty.sql
@@ -17,7 +17,14 @@
* under the License.
*/
-#[derive(Clone, Debug, Default)]
-pub struct FileStats {
- pub num_records: i64,
-}
+create table v6_empty (
+ id INT,
+ name STRING,
+ isActive BOOLEAN
+)
+ USING HUDI
+ TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ 'hoodie.metadata.enable' = 'false'
+);
diff --git a/crates/tests/data/tables/v6_empty.zip
b/crates/tests/data/tables/v6_empty.zip
new file mode 100644
index 0000000..a4a1151
Binary files /dev/null and b/crates/tests/data/tables/v6_empty.zip differ
diff --git a/crates/tests/data/tables/v6_nonpartitioned.datagen.sql
b/crates/tests/data/tables/v6_nonpartitioned.datagen.sql
new file mode 100644
index 0000000..d581dfa
--- /dev/null
+++ b/crates/tests/data/tables/v6_nonpartitioned.datagen.sql
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_nonpartitioned (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ byteField BYTE,
+ shortField SHORT,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField ARRAY<STRUCT<arr_struct_f1:
STRING, arr_struct_f2: INT>>, -- Array of structs
+ mapField MAP<STRING,
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
-- Map with struct values
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >
+)
+ USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false'
+);
+
+INSERT INTO v6_nonpartitioned VALUES
+ (1, 'Alice', true, 1, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100), STRUCT('blue',
200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true), 'key2',
STRUCT(789.012, false)),
+ STRUCT('Alice', 30, STRUCT(123.456, true))
+ ),
+ (2, 'Bob', false, 0, 100, 25000, 9876543210,
2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE), CAST('2023-04-02
13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400),
STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567, true), 'key4',
STRUCT(567.890, false)),
+ STRUCT('Bob', 40, STRUCT(789.012, false))
+ ),
+ (3, 'Carol', true, 1, 200, 35000,
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE),
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS
BINARY),
+ ARRAY(STRUCT('black', 600), STRUCT('white',
700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678, true), 'key6',
STRUCT(654.321, false)),
+ STRUCT('Carol', 25, STRUCT(456.789, true))
+ );
+
+INSERT INTO v6_nonpartitioned VALUES
+ (1, 'Alice', false, 1, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100), STRUCT('blue',
200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true), 'key2',
STRUCT(789.012, false)),
+ STRUCT('Alice', 30, STRUCT(123.456, true))
+ ),
+ (4, 'Diana', true, 1, 500, 45000, 987654321,
4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE), CAST('2023-04-04 15:04:00'
AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900), STRUCT('gray',
1000)),
+ MAP('key7', STRUCT(456.789, true), 'key8',
STRUCT(123.456, false)),
+ STRUCT('Diana', 50, STRUCT(987.654, true))
+ );
diff --git a/crates/tests/data/tables/v6_nonpartitioned.zip
b/crates/tests/data/tables/v6_nonpartitioned.zip
new file mode 100644
index 0000000..4675f83
Binary files /dev/null and b/crates/tests/data/tables/v6_nonpartitioned.zip
differ
diff --git
a/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql
b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql
new file mode 100644
index 0000000..de37ffc
--- /dev/null
+++
b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_simplekeygen_hivestyle_no_metafields (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ shortField SHORT,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>, --
Array of structs
+ mapField MAP<STRING, STRUCT<map_field_value_struct_f1: DOUBLE,
map_field_value_struct_f2: BOOLEAN>>, -- Map with struct values
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >,
+ byteField BYTE
+)
+USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.datasource.write.hive_style_partitioning' = 'true',
+ 'hoodie.datasource.write.drop.partition.columns' = 'false',
+ 'hoodie.populate.meta.fields' = 'false'
+)
+PARTITIONED BY (byteField);
+
+INSERT INTO v6_simplekeygen_hivestyle_no_metafields VALUES
+(1, 'Alice', false, 300, 15000, 1234567890, 1.0, 3.14159, 12345.67890,
CAST('2023-04-01' AS DATE), CAST('2023-04-01 12:01:00' AS TIMESTAMP),
CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100), STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30, STRUCT(123.456, true)),
+ 10
+),
+(2, 'Bob', false, 100, 25000, 9876543210, 2.0, 2.71828, 67890.12345,
CAST('2023-04-02' AS DATE), CAST('2023-04-02 13:02:00' AS TIMESTAMP),
CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400), STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567, true), 'key4', STRUCT(567.890, false)),
+ STRUCT('Bob', 40, STRUCT(789.012, false)),
+ 20
+),
+(3, 'Carol', true, 200, 35000, 1928374650, 3.0, 1.41421, 11111.22222,
CAST('2023-04-03' AS DATE), CAST('2023-04-03 14:03:00' AS TIMESTAMP),
CAST('even more binary data' AS BINARY),
+ ARRAY(STRUCT('black', 600), STRUCT('white', 700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678, true), 'key6', STRUCT(654.321, false)),
+ STRUCT('Carol', 25, STRUCT(456.789, true)),
+ 10
+),
+(4, 'Diana', true, 500, 45000, 987654321, 4.0, 2.468, 65432.12345,
CAST('2023-04-04' AS DATE), CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new
binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900), STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789, true), 'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50, STRUCT(987.654, true)),
+ 30
+);
diff --git
a/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.zip
b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.zip
new file mode 100644
index 0000000..dcc76c4
Binary files /dev/null and
b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.zip differ
diff --git a/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql
b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql
new file mode 100644
index 0000000..f1d8c9e
--- /dev/null
+++ b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_simplekeygen_nonhivestyle (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ shortField SHORT,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>, -- Array of structs
+ mapField MAP<STRING,
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
-- Map with struct values
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >,
+ byteField BYTE
+)
+ USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.datasource.write.hive_style_partitioning' = 'false',
+ 'hoodie.datasource.write.drop.partition.columns' = 'false'
+)
+PARTITIONED BY (byteField);
+
+INSERT INTO v6_simplekeygen_nonhivestyle VALUES
+ (1, 'Alice', true, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10
+ ),
+ (2, 'Bob', false, 100, 25000,
9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE),
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400),
STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567,
true), 'key4', STRUCT(567.890, false)),
+ STRUCT('Bob', 40,
STRUCT(789.012, false)),
+ 20
+ ),
+ (3, 'Carol', true, 200, 35000,
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE),
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS
BINARY),
+ ARRAY(STRUCT('black', 600),
STRUCT('white', 700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678,
true), 'key6', STRUCT(654.321, false)),
+ STRUCT('Carol', 25,
STRUCT(456.789, true)),
+ 10
+ );
+
+INSERT INTO v6_simplekeygen_nonhivestyle VALUES
+ (1, 'Alice', false, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10
+ ),
+ (4, 'Diana', true, 500, 45000,
987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE),
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900),
STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789,
true), 'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50,
STRUCT(987.654, true)),
+ 30
+ );
diff --git a/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.zip
b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.zip
new file mode 100644
index 0000000..4f05679
Binary files /dev/null and
b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.zip differ
diff --git a/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.sql
b/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.sql
new file mode 100644
index 0000000..35c5952
--- /dev/null
+++ b/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.sql
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v6_timebasedkeygen_nonhivestyle (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ byteField BYTE,
+ shortField SHORT,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>, -- Array of structs
+ mapField MAP<STRING,
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
-- Map with struct values
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >,
+ ts_str STRING
+)
+ USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.datasource.write.hive_style_partitioning' = 'false',
+ 'hoodie.table.keygenerator.class' =
'org.apache.hudi.keygen.TimestampBasedKeyGenerator',
+ 'hoodie.keygen.timebased.timestamp.type' = 'DATE_STRING',
+ 'hoodie.keygen.timebased.input.dateformat' = "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+ 'hoodie.keygen.timebased.output.dateformat' = 'yyyy/MM/dd/HH'
+)
+PARTITIONED BY (ts_str);
+
+INSERT INTO v6_timebasedkeygen_nonhivestyle VALUES
+ (1, 'Alice', true, 10, 300,
15000, 1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ '2023-04-01T12:01:00.123Z'
+ ),
+ (2, 'Bob', false, 20, 100,
25000, 9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE),
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400),
STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567,
true), 'key4', STRUCT(567.890, false)),
+ STRUCT('Bob', 40,
STRUCT(789.012, false)),
+ '2023-04-02T12:01:00.123Z'
+ ),
+ (3, 'Carol', true, 10, 300,
35000, 1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE),
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS
BINARY),
+ ARRAY(STRUCT('black', 600),
STRUCT('white', 700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678,
true), 'key6', STRUCT(654.321, false)),
+ STRUCT('Carol', 25,
STRUCT(456.789, true)),
+ '2023-04-03T12:01:00.123Z'
+ );
+
+INSERT INTO v6_timebasedkeygen_nonhivestyle VALUES
+ (1, 'Alice', false, 10, 300,
15000, 1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ '2023-04-01T12:01:00.123Z'
+ ),
+ (4, 'Diana', true, 30, 100,
45000, 987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE),
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900),
STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789,
true), 'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50,
STRUCT(987.654, true)),
+ '2023-04-04T13:01:00.123Z'
+ );
diff --git a/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.zip
b/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.zip
new file mode 100644
index 0000000..83b6816
Binary files /dev/null and
b/crates/tests/data/tables/v6_timebasedkeygen_nonhivestyle.zip differ
diff --git a/crates/tests/src/lib.rs b/crates/tests/src/lib.rs
new file mode 100644
index 0000000..e467818
--- /dev/null
+++ b/crates/tests/src/lib.rs
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use std::fs;
+use std::io::Cursor;
+use std::path::{Path, PathBuf};
+
+use tempfile::tempdir;
+use url::Url;
+
+pub mod utils;
+
+pub fn extract_test_table(zip_path: &Path) -> PathBuf {
+ let target_dir = tempdir().unwrap().path().to_path_buf();
+ let archive = fs::read(zip_path).unwrap();
+ zip_extract::extract(Cursor::new(archive), &target_dir, true).unwrap();
+ target_dir
+}
+
+pub enum TestTable {
+ V6ComplexkeygenHivestyle,
+ V6Nonpartitioned,
+}
+
+impl TestTable {
+ pub fn zip_path(&self) -> Box<Path> {
+ let dir = env!("CARGO_MANIFEST_DIR");
+ let data_path = Path::new(dir).join("data/tables");
+ match self {
+ Self::V6ComplexkeygenHivestyle => data_path
+ .join("v6_complexkeygen_hivestyle.zip")
+ .into_boxed_path(),
+ Self::V6Nonpartitioned =>
data_path.join("v6_nonpartitioned.zip").into_boxed_path(),
+ }
+ }
+
+ pub fn path(&self) -> String {
+ let zip_path = self.zip_path();
+ match self {
+ Self::V6ComplexkeygenHivestyle => extract_test_table(&zip_path)
+ .join("v6_complexkeygen_hivestyle")
+ .to_str()
+ .unwrap()
+ .to_string(),
+ Self::V6Nonpartitioned => extract_test_table(&zip_path)
+ .join("v6_nonpartitioned")
+ .to_str()
+ .unwrap()
+ .to_string(),
+ }
+ }
+
+ pub fn url(&self) -> Url {
+ Url::from_file_path(self.path()).unwrap()
+ }
+}
diff --git a/crates/core/src/test_utils.rs b/crates/tests/src/utils.rs
similarity index 77%
rename from crates/core/src/test_utils.rs
rename to crates/tests/src/utils.rs
index 94e3c4b..37e9cfb 100644
--- a/crates/core/src/test_utils.rs
+++ b/crates/tests/src/utils.rs
@@ -17,19 +17,6 @@
* under the License.
*/
-use std::fs;
-use std::io::Cursor;
-use std::path::{Path, PathBuf};
-
-use tempfile::tempdir;
-
-pub fn extract_test_table(fixture_path: &Path) -> PathBuf {
- let target_dir = tempdir().unwrap().path().to_path_buf();
- let archive = fs::read(fixture_path).unwrap();
- zip_extract::extract(Cursor::new(archive), &target_dir, true).unwrap();
- target_dir
-}
-
#[macro_export]
macro_rules! assert_approx_eq {
($a:expr, $b:expr, $delta:expr) => {{
diff --git a/python/Cargo.toml b/python/Cargo.toml
index 613b010..3ce4986 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -17,8 +17,10 @@
[package]
name = "hudi-python"
-version = "0.1.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+rust-version.workspace = true
[lib]
name = "hudi"
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index 2dcfdeb..b1fd566 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -34,7 +34,7 @@ def _extract_testing_table(zip_file_path, target_path) -> str:
]
)
def get_sample_table(request, tmp_path) -> str:
- fixture_path = "../crates/core/fixtures/table"
+ fixture_path = "tests/table"
table_name = request.param
zip_file_path = Path(fixture_path).joinpath(f"{table_name}.zip")
return _extract_testing_table(zip_file_path, tmp_path)
diff --git a/crates/core/fixtures/table/0.x_cow_partitioned.zip
b/python/tests/table/0.x_cow_partitioned.zip
similarity index 100%
rename from crates/core/fixtures/table/0.x_cow_partitioned.zip
rename to python/tests/table/0.x_cow_partitioned.zip