This is an automated email from the ASF dual-hosted git repository.
codope pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 3937170 test: add basic v8 test tables (#394)
3937170 is described below
commit 393717078f0fa96896ef3d57fad7676c77561d5b
Author: Shiyan Xu <[email protected]>
AuthorDate: Mon Jul 14 09:36:50 2025 -0500
test: add basic v8 test tables (#394)
Signed-off-by: Shiyan Xu <[email protected]>
---
crates/test/README.md | 28 ++++++
.../mor/avro/v8_trips_8i1u.sql | 50 +++++++++++
.../mor/avro/v8_trips_8i1u.zip | Bin 0 -> 29954 bytes
.../mor/avro/v8_trips_8i3d.sql | 48 +++++++++++
.../mor/avro/v8_trips_8i3d.zip | Bin 0 -> 29933 bytes
.../cow/v8_complexkeygen_hivestyle.sql | 87 +++++++++++++++++++
.../cow/v8_complexkeygen_hivestyle.zip | Bin 0 -> 43000 bytes
crates/test/data/sample_table/cow/v8_empty.sql | 30 +++++++
crates/test/data/sample_table/cow/v8_empty.zip | Bin 0 -> 6066 bytes
.../data/sample_table/cow/v8_nonpartitioned.sql | 80 +++++++++++++++++
.../data/sample_table/cow/v8_nonpartitioned.zip | Bin 0 -> 26699 bytes
.../v8_simplekeygen_hivestyle_no_metafields.sql | 81 +++++++++++++++++
.../v8_simplekeygen_hivestyle_no_metafields.zip | Bin 0 -> 28076 bytes
.../cow/v8_simplekeygen_nonhivestyle.sql | 88 +++++++++++++++++++
.../cow/v8_simplekeygen_nonhivestyle.zip | Bin 0 -> 40168 bytes
...v8_simplekeygen_nonhivestyle_overwritetable.sql | 96 +++++++++++++++++++++
...v8_simplekeygen_nonhivestyle_overwritetable.zip | Bin 0 -> 51842 bytes
.../cow/v8_timebasedkeygen_nonhivestyle.sql | 92 ++++++++++++++++++++
.../cow/v8_timebasedkeygen_nonhivestyle.zip | Bin 0 -> 50805 bytes
crates/test/src/lib.rs | 23 +++++
20 files changed, 703 insertions(+)
diff --git a/crates/test/README.md b/crates/test/README.md
new file mode 100644
index 0000000..c9cb9b2
--- /dev/null
+++ b/crates/test/README.md
@@ -0,0 +1,28 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+-->
+
+# The `test` crate
+
+This is a crate meant for facilitating testing in `hudi-rs` - it provides
utilities and test data.
+
+The `data/` directory contains fully prepared sample Hudi tables categorized
by table version and configuration options:
+each zipped file contains the Hudi table and the corresponding `.sql` contains
the SQL used to generate the table.
+
+Enums in `src/lib.rs` like `QuickstartTripsTable` and `SampleTable` are for
conveniently accessing these ready-made
+tables in tests. They take care of the unzipping and hand over the table paths
to callers.
diff --git a/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i1u.sql
b/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i1u.sql
new file mode 100644
index 0000000..f699741
--- /dev/null
+++ b/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i1u.sql
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v8_trips_8i1u
+(
+ ts BIGINT,
+ uuid STRING,
+ rider STRING,
+ driver STRING,
+ fare DOUBLE,
+ city STRING
+) USING HUDI
+PARTITIONED BY (city)
+TBLPROPERTIES (
+ type = 'mor',
+ primaryKey = 'uuid',
+ preCombineField = 'ts',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.parquet.small.file.limit' = '0'
+);
+
+INSERT INTO v8_trips_8i1u
+VALUES (1695159649087, '334e26e9-8355-45cc-97c6-c31daf0df330', 'rider-A',
'driver-K', 19.10, 'san_francisco'),
+ (1695091554788, 'e96c4396-3fad-413a-a942-4cb36106d721', 'rider-C',
'driver-M', 27.70, 'san_francisco'),
+ (1695046462179, '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', 'rider-D',
'driver-L', 33.90, 'san_francisco'),
+ (1695332066204, '1dced545-862b-4ceb-8b43-d2a568f6616b', 'rider-E',
'driver-O', 93.50, 'san_francisco'),
+ (1695516137016, 'e3cf430c-889d-4015-bc98-59bdce1e530c', 'rider-F',
'driver-P', 34.15, 'sao_paulo'),
+ (1695376420876, '7a84095f-737f-40bc-b62f-6b69664712d2', 'rider-G',
'driver-Q', 43.40, 'sao_paulo'),
+ (1695173887231, '3eeb61f7-c2b0-4636-99bd-5d7a5a1d2c04', 'rider-I',
'driver-S', 41.06, 'chennai'),
+ (1695115999911, 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', 'rider-J',
'driver-T', 17.85, 'chennai');
+
+UPDATE v8_trips_8i1u
+SET fare = 25.0
+WHERE rider = 'rider-D';
diff --git a/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i1u.zip
b/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i1u.zip
new file mode 100644
index 0000000..5814c5b
Binary files /dev/null and
b/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i1u.zip differ
diff --git a/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i3d.sql
b/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i3d.sql
new file mode 100644
index 0000000..55d795d
--- /dev/null
+++ b/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i3d.sql
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v8_trips_8i3d
+(
+ ts BIGINT,
+ uuid STRING,
+ rider STRING,
+ driver STRING,
+ fare DOUBLE,
+ city STRING
+) USING HUDI
+PARTITIONED BY (city)
+TBLPROPERTIES (
+ type = 'mor',
+ primaryKey = 'uuid',
+ preCombineField = 'ts',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.parquet.small.file.limit' = '0'
+);
+
+INSERT INTO v8_trips_8i3d
+VALUES (1695159649087, '334e26e9-8355-45cc-97c6-c31daf0df330', 'rider-A',
'driver-K', 19.10, 'san_francisco'),
+ (1695091554788, 'e96c4396-3fad-413a-a942-4cb36106d721', 'rider-C',
'driver-M', 27.70, 'san_francisco'),
+ (1695046462179, '9909a8b1-2d15-4d3d-8ec9-efc48c536a00', 'rider-D',
'driver-L', 33.90, 'san_francisco'),
+ (1695332066204, '1dced545-862b-4ceb-8b43-d2a568f6616b', 'rider-E',
'driver-O', 93.50, 'san_francisco'),
+ (1695516137016, 'e3cf430c-889d-4015-bc98-59bdce1e530c', 'rider-F',
'driver-P', 34.15, 'sao_paulo'),
+ (1695376420876, '7a84095f-737f-40bc-b62f-6b69664712d2', 'rider-G',
'driver-Q', 43.40, 'sao_paulo'),
+ (1695173887231, '3eeb61f7-c2b0-4636-99bd-5d7a5a1d2c04', 'rider-I',
'driver-S', 41.06, 'chennai'),
+ (1695115999911, 'c8abbe79-8d89-47ea-b4ce-4d224bae5bfa', 'rider-J',
'driver-T', 17.85, 'chennai');
+
+DELETE FROM v8_trips_8i3d WHERE rider in ('rider-A', 'rider-C', 'rider-D');
diff --git a/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i3d.zip
b/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i3d.zip
new file mode 100644
index 0000000..7345d31
Binary files /dev/null and
b/crates/test/data/quickstart_trips_table/mor/avro/v8_trips_8i3d.zip differ
diff --git a/crates/test/data/sample_table/cow/v8_complexkeygen_hivestyle.sql
b/crates/test/data/sample_table/cow/v8_complexkeygen_hivestyle.sql
new file mode 100644
index 0000000..8771881
--- /dev/null
+++ b/crates/test/data/sample_table/cow/v8_complexkeygen_hivestyle.sql
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v8_complexkeygen_hivestyle (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>,
+ mapField MAP<STRING,
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >,
+ byteField BYTE,
+ shortField SHORT
+)
+ USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id,name',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.datasource.write.hive_style_partitioning' = 'true'
+)
+PARTITIONED BY (byteField, shortField);
+
+INSERT INTO v8_complexkeygen_hivestyle VALUES
+ (1, 'Alice', true, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true),
'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10, 300
+ ),
+ (2, 'Bob', false, 25000,
9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE),
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400),
STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567, true),
'key4', STRUCT(567.890, false)),
+ STRUCT('Bob', 40, STRUCT(789.012,
false)),
+ 20, 100
+ ),
+ (3, 'Carol', true, 35000,
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE),
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS
BINARY),
+ ARRAY(STRUCT('black', 600),
STRUCT('white', 700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678, true),
'key6', STRUCT(654.321, false)),
+ STRUCT('Carol', 25,
STRUCT(456.789, true)),
+ 10, 300
+ );
+
+INSERT INTO v8_complexkeygen_hivestyle VALUES
+ (1, 'Alice', false, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true),
'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10, 300
+ ),
+ (4, 'Diana', true, 45000,
987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE),
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900),
STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789, true),
'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50,
STRUCT(987.654, true)),
+ 30, 100
+ );
diff --git a/crates/test/data/sample_table/cow/v8_complexkeygen_hivestyle.zip
b/crates/test/data/sample_table/cow/v8_complexkeygen_hivestyle.zip
new file mode 100644
index 0000000..310883e
Binary files /dev/null and
b/crates/test/data/sample_table/cow/v8_complexkeygen_hivestyle.zip differ
diff --git a/crates/test/data/sample_table/cow/v8_empty.sql
b/crates/test/data/sample_table/cow/v8_empty.sql
new file mode 100644
index 0000000..41dc38b
--- /dev/null
+++ b/crates/test/data/sample_table/cow/v8_empty.sql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+create table v8_empty (
+ id INT,
+ name STRING,
+ isActive BOOLEAN
+)
+ USING HUDI
+ TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ 'hoodie.metadata.enable' = 'false'
+);
diff --git a/crates/test/data/sample_table/cow/v8_empty.zip
b/crates/test/data/sample_table/cow/v8_empty.zip
new file mode 100644
index 0000000..ff208ac
Binary files /dev/null and b/crates/test/data/sample_table/cow/v8_empty.zip
differ
diff --git a/crates/test/data/sample_table/cow/v8_nonpartitioned.sql
b/crates/test/data/sample_table/cow/v8_nonpartitioned.sql
new file mode 100644
index 0000000..b2d4f83
--- /dev/null
+++ b/crates/test/data/sample_table/cow/v8_nonpartitioned.sql
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v8_nonpartitioned (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ byteField BYTE,
+ shortField SHORT,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField ARRAY<STRUCT<arr_struct_f1:
STRING, arr_struct_f2: INT>>, -- Array of structs
+ mapField MAP<STRING,
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
-- Map with struct values
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >
+)
+ USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false'
+);
+
+INSERT INTO v8_nonpartitioned VALUES
+ (1, 'Alice', true, 1, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100), STRUCT('blue',
200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true), 'key2',
STRUCT(789.012, false)),
+ STRUCT('Alice', 30, STRUCT(123.456, true))
+ ),
+ (2, 'Bob', false, 0, 100, 25000, 9876543210,
2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE), CAST('2023-04-02
13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400),
STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567, true), 'key4',
STRUCT(567.890, false)),
+ STRUCT('Bob', 40, STRUCT(789.012, false))
+ ),
+ (3, 'Carol', true, 1, 200, 35000,
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE),
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS
BINARY),
+ ARRAY(STRUCT('black', 600), STRUCT('white',
700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678, true), 'key6',
STRUCT(654.321, false)),
+ STRUCT('Carol', 25, STRUCT(456.789, true))
+ );
+
+INSERT INTO v8_nonpartitioned VALUES
+ (1, 'Alice', false, 1, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100), STRUCT('blue',
200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true), 'key2',
STRUCT(789.012, false)),
+ STRUCT('Alice', 30, STRUCT(123.456, true))
+ ),
+ (4, 'Diana', true, 1, 500, 45000, 987654321,
4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE), CAST('2023-04-04 15:04:00'
AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900), STRUCT('gray',
1000)),
+ MAP('key7', STRUCT(456.789, true), 'key8',
STRUCT(123.456, false)),
+ STRUCT('Diana', 50, STRUCT(987.654, true))
+ );
diff --git a/crates/test/data/sample_table/cow/v8_nonpartitioned.zip
b/crates/test/data/sample_table/cow/v8_nonpartitioned.zip
new file mode 100644
index 0000000..36f74e3
Binary files /dev/null and
b/crates/test/data/sample_table/cow/v8_nonpartitioned.zip differ
diff --git
a/crates/test/data/sample_table/cow/v8_simplekeygen_hivestyle_no_metafields.sql
b/crates/test/data/sample_table/cow/v8_simplekeygen_hivestyle_no_metafields.sql
new file mode 100644
index 0000000..b224188
--- /dev/null
+++
b/crates/test/data/sample_table/cow/v8_simplekeygen_hivestyle_no_metafields.sql
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v8_simplekeygen_hivestyle_no_metafields (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ shortField SHORT,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>, --
Array of structs
+ mapField MAP<STRING, STRUCT<map_field_value_struct_f1: DOUBLE,
map_field_value_struct_f2: BOOLEAN>>, -- Map with struct values
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >,
+ byteField BYTE
+)
+USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.datasource.write.hive_style_partitioning' = 'true',
+ 'hoodie.datasource.write.drop.partition.columns' = 'false',
+ 'hoodie.populate.meta.fields' = 'false'
+)
+PARTITIONED BY (byteField);
+
+INSERT INTO v8_simplekeygen_hivestyle_no_metafields VALUES
+(1, 'Alice', false, 300, 15000, 1234567890, 1.0, 3.14159, 12345.67890,
CAST('2023-04-01' AS DATE), CAST('2023-04-01 12:01:00' AS TIMESTAMP),
CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100), STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456, true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30, STRUCT(123.456, true)),
+ 10
+),
+(2, 'Bob', false, 100, 25000, 9876543210, 2.0, 2.71828, 67890.12345,
CAST('2023-04-02' AS DATE), CAST('2023-04-02 13:02:00' AS TIMESTAMP),
CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400), STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567, true), 'key4', STRUCT(567.890, false)),
+ STRUCT('Bob', 40, STRUCT(789.012, false)),
+ 20
+),
+(3, 'Carol', true, 200, 35000, 1928374650, 3.0, 1.41421, 11111.22222,
CAST('2023-04-03' AS DATE), CAST('2023-04-03 14:03:00' AS TIMESTAMP),
CAST('even more binary data' AS BINARY),
+ ARRAY(STRUCT('black', 600), STRUCT('white', 700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678, true), 'key6', STRUCT(654.321, false)),
+ STRUCT('Carol', 25, STRUCT(456.789, true)),
+ 10
+),
+(4, 'Diana', true, 500, 45000, 987654321, 4.0, 2.468, 65432.12345,
CAST('2023-04-04' AS DATE), CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new
binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900), STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789, true), 'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50, STRUCT(987.654, true)),
+ 30
+);
diff --git
a/crates/test/data/sample_table/cow/v8_simplekeygen_hivestyle_no_metafields.zip
b/crates/test/data/sample_table/cow/v8_simplekeygen_hivestyle_no_metafields.zip
new file mode 100644
index 0000000..586c4f6
Binary files /dev/null and
b/crates/test/data/sample_table/cow/v8_simplekeygen_hivestyle_no_metafields.zip
differ
diff --git a/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle.sql
b/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle.sql
new file mode 100644
index 0000000..e45e3f2
--- /dev/null
+++ b/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle.sql
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v8_simplekeygen_nonhivestyle (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ shortField SHORT,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>, -- Array of structs
+ mapField MAP<STRING,
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
-- Map with struct values
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >,
+ byteField BYTE
+)
+ USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.datasource.write.hive_style_partitioning' = 'false',
+ 'hoodie.datasource.write.drop.partition.columns' = 'false'
+)
+PARTITIONED BY (byteField);
+
+INSERT INTO v8_simplekeygen_nonhivestyle VALUES
+ (1, 'Alice', true, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10
+ ),
+ (2, 'Bob', false, 100, 25000,
9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE),
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400),
STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567,
true), 'key4', STRUCT(567.890, false)),
+ STRUCT('Bob', 40,
STRUCT(789.012, false)),
+ 20
+ ),
+ (3, 'Carol', true, 200, 35000,
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE),
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS
BINARY),
+ ARRAY(STRUCT('black', 600),
STRUCT('white', 700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678,
true), 'key6', STRUCT(654.321, false)),
+ STRUCT('Carol', 25,
STRUCT(456.789, true)),
+ 10
+ );
+
+INSERT INTO v8_simplekeygen_nonhivestyle VALUES
+ (1, 'Alice', false, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10
+ ),
+ (4, 'Diana', true, 500, 45000,
987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE),
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900),
STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789,
true), 'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50,
STRUCT(987.654, true)),
+ 30
+ );
diff --git a/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle.zip
b/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle.zip
new file mode 100644
index 0000000..b9809ee
Binary files /dev/null and
b/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle.zip differ
diff --git
a/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle_overwritetable.sql
b/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle_overwritetable.sql
new file mode 100644
index 0000000..387dff3
--- /dev/null
+++
b/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle_overwritetable.sql
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v8_simplekeygen_nonhivestyle_overwritetable (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ shortField SHORT,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>, -- Array of structs
+ mapField MAP<STRING,
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
-- Map with struct values
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >,
+ byteField BYTE
+)
+ USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.datasource.write.hive_style_partitioning' = 'false',
+ 'hoodie.datasource.write.drop.partition.columns' = 'false'
+)
+PARTITIONED BY (byteField);
+
+INSERT INTO v8_simplekeygen_nonhivestyle_overwritetable VALUES
+ (1, 'Alice', true, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10
+ ),
+ (2, 'Bob', false, 100, 25000,
9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE),
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400),
STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567,
true), 'key4', STRUCT(567.890, false)),
+ STRUCT('Bob', 40,
STRUCT(789.012, false)),
+ 20
+ ),
+ (3, 'Carol', true, 200, 35000,
1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE),
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS
BINARY),
+ ARRAY(STRUCT('black', 600),
STRUCT('white', 700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678,
true), 'key6', STRUCT(654.321, false)),
+ STRUCT('Carol', 25,
STRUCT(456.789, true)),
+ 10
+ );
+
+INSERT INTO v8_simplekeygen_nonhivestyle_overwritetable VALUES
+ (1, 'Alice', false, 300, 15000,
1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ 10
+ ),
+ (4, 'Diana', true, 500, 45000,
987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE),
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900),
STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789,
true), 'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50,
STRUCT(987.654, true)),
+ 30
+ );
+
+INSERT OVERWRITE TABLE v8_simplekeygen_nonhivestyle_overwritetable SELECT
+ 4, 'Diana', false, 500, 45000,
987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE),
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900),
STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789,
true), 'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50,
STRUCT(987.654, true)),
+ 30
+ ;
diff --git
a/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle_overwritetable.zip
b/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle_overwritetable.zip
new file mode 100644
index 0000000..a5941f5
Binary files /dev/null and
b/crates/test/data/sample_table/cow/v8_simplekeygen_nonhivestyle_overwritetable.zip
differ
diff --git
a/crates/test/data/sample_table/cow/v8_timebasedkeygen_nonhivestyle.sql
b/crates/test/data/sample_table/cow/v8_timebasedkeygen_nonhivestyle.sql
new file mode 100644
index 0000000..3976e80
--- /dev/null
+++ b/crates/test/data/sample_table/cow/v8_timebasedkeygen_nonhivestyle.sql
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+CREATE TABLE v8_timebasedkeygen_nonhivestyle (
+ id INT,
+ name STRING,
+ isActive BOOLEAN,
+ byteField BYTE,
+ shortField SHORT,
+ intField INT,
+ longField LONG,
+ floatField FLOAT,
+ doubleField DOUBLE,
+ decimalField DECIMAL(10,5),
+ dateField DATE,
+ timestampField TIMESTAMP,
+ binaryField BINARY,
+ arrayField
ARRAY<STRUCT<arr_struct_f1: STRING, arr_struct_f2: INT>>, -- Array of structs
+ mapField MAP<STRING,
STRUCT<map_field_value_struct_f1: DOUBLE, map_field_value_struct_f2: BOOLEAN>>,
-- Map with struct values
+ structField STRUCT<
+ field1: STRING,
+ field2: INT,
+ child_struct: STRUCT<
+ child_field1: DOUBLE,
+ child_field2: BOOLEAN
+ >
+ >,
+ ts_str STRING
+)
+ USING HUDI
+TBLPROPERTIES (
+ type = 'cow',
+ primaryKey = 'id',
+ preCombineField = 'longField',
+ 'hoodie.metadata.enable' = 'false',
+ 'hoodie.datasource.write.hive_style_partitioning' = 'false',
+ 'hoodie.table.keygenerator.class' =
'org.apache.hudi.keygen.TimestampBasedKeyGenerator',
+ 'hoodie.keygen.timebased.timestamp.type' = 'DATE_STRING',
+ 'hoodie.keygen.timebased.input.dateformat' = "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
+ 'hoodie.keygen.timebased.output.dateformat' = 'yyyy/MM/dd/HH'
+)
+PARTITIONED BY (ts_str);
+
+INSERT INTO v8_timebasedkeygen_nonhivestyle VALUES
+ (1, 'Alice', true, 10, 300,
15000, 1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ '2023-04-01T12:01:00.123Z'
+ ),
+ (2, 'Bob', false, 20, 100,
25000, 9876543210, 2.0, 2.71828, 67890.12345, CAST('2023-04-02' AS DATE),
CAST('2023-04-02 13:02:00' AS TIMESTAMP), CAST('more binary data' AS BINARY),
+ ARRAY(STRUCT('yellow', 400),
STRUCT('purple', 500)),
+ MAP('key3', STRUCT(234.567,
true), 'key4', STRUCT(567.890, false)),
+ STRUCT('Bob', 40,
STRUCT(789.012, false)),
+ '2023-04-02T12:01:00.123Z'
+ ),
+ (3, 'Carol', true, 10, 300,
35000, 1928374650, 3.0, 1.41421, 11111.22222, CAST('2023-04-03' AS DATE),
CAST('2023-04-03 14:03:00' AS TIMESTAMP), CAST('even more binary data' AS
BINARY),
+ ARRAY(STRUCT('black', 600),
STRUCT('white', 700), STRUCT('pink', 800)),
+ MAP('key5', STRUCT(345.678,
true), 'key6', STRUCT(654.321, false)),
+ STRUCT('Carol', 25,
STRUCT(456.789, true)),
+ '2023-04-03T12:01:00.123Z'
+ );
+
+INSERT INTO v8_timebasedkeygen_nonhivestyle VALUES
+ (1, 'Alice', false, 10, 300,
15000, 1234567890, 1.0, 3.14159, 12345.67890, CAST('2023-04-01' AS DATE),
CAST('2023-04-01 12:01:00' AS TIMESTAMP), CAST('binary data' AS BINARY),
+ ARRAY(STRUCT('red', 100),
STRUCT('blue', 200), STRUCT('green', 300)),
+ MAP('key1', STRUCT(123.456,
true), 'key2', STRUCT(789.012, false)),
+ STRUCT('Alice', 30,
STRUCT(123.456, true)),
+ '2023-04-01T12:01:00.123Z'
+ ),
+ (4, 'Diana', true, 30, 100,
45000, 987654321, 4.0, 2.468, 65432.12345, CAST('2023-04-04' AS DATE),
CAST('2023-04-04 15:04:00' AS TIMESTAMP), CAST('new binary data' AS BINARY),
+ ARRAY(STRUCT('orange', 900),
STRUCT('gray', 1000)),
+ MAP('key7', STRUCT(456.789,
true), 'key8', STRUCT(123.456, false)),
+ STRUCT('Diana', 50,
STRUCT(987.654, true)),
+ '2023-04-04T13:01:00.123Z'
+ );
diff --git
a/crates/test/data/sample_table/cow/v8_timebasedkeygen_nonhivestyle.zip
b/crates/test/data/sample_table/cow/v8_timebasedkeygen_nonhivestyle.zip
new file mode 100644
index 0000000..d962047
Binary files /dev/null and
b/crates/test/data/sample_table/cow/v8_timebasedkeygen_nonhivestyle.zip differ
diff --git a/crates/test/src/lib.rs b/crates/test/src/lib.rs
index 3a0cc5e..a69d004 100644
--- a/crates/test/src/lib.rs
+++ b/crates/test/src/lib.rs
@@ -41,6 +41,10 @@ pub enum QuickstartTripsTable {
V6Trips8I1U,
#[strum(serialize = "v6_trips_8i3d")]
V6Trips8I3D,
+ #[strum(serialize = "v8_trips_8i1u")]
+ V8Trips8I1U,
+ #[strum(serialize = "v8_trips_8i3d")]
+ V8Trips8I3D,
}
impl QuickstartTripsTable {
@@ -111,6 +115,13 @@ pub enum SampleTable {
V6SimplekeygenNonhivestyle,
V6SimplekeygenNonhivestyleOverwritetable,
V6TimebasedkeygenNonhivestyle,
+ V8ComplexkeygenHivestyle,
+ V8Empty,
+ V8Nonpartitioned,
+ V8SimplekeygenHivestyleNoMetafields,
+ V8SimplekeygenNonhivestyle,
+ V8SimplekeygenNonhivestyleOverwritetable,
+ V8TimebasedkeygenNonhivestyle,
}
impl SampleTable {
@@ -200,6 +211,14 @@ mod tests {
let path = t.zip_path("mor", Some("avro"));
assert!(path.exists());
}
+ QuickstartTripsTable::V8Trips8I1U => {
+ let path = t.zip_path("mor", Some("avro"));
+ assert!(path.exists());
+ }
+ QuickstartTripsTable::V8Trips8I3D => {
+ let path = t.zip_path("mor", Some("avro"));
+ assert!(path.exists());
+ }
}
}
}
@@ -215,6 +234,10 @@ mod tests {
let path = t.zip_path("mor", Some("parquet"));
assert!(path.exists());
}
+ ref table if table.as_ref().starts_with("v8") => {
+ let path = t.zip_path("cow", None);
+ assert!(path.exists());
+ }
_ => {
let path = t.zip_path("cow", None);
assert!(path.exists());