This is an automated email from the ASF dual-hosted git repository.
xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new a6a3fd77 test (datafusion): add test for table provider creation (#651)
a6a3fd77 is described below
commit a6a3fd779dfe895e02885803f4f82f6b39407732
Author: Alon Agmon <[email protected]>
AuthorDate: Sat Sep 28 10:10:08 2024 +0300
test (datafusion): add test for table provider creation (#651)
* add test for table provider creation
* fix formatting
* fixing yet another formatting issue
* testing schema using data fusion
---------
Co-authored-by: Alon Agmon <[email protected]>
---
crates/integrations/datafusion/src/table.rs | 54 +++++++++
.../tests/test_data/TableMetadataV2Valid.json | 122 +++++++++++++++++++++
2 files changed, 176 insertions(+)
diff --git a/crates/integrations/datafusion/src/table.rs
b/crates/integrations/datafusion/src/table.rs
index f12d41ee..2797e12d 100644
--- a/crates/integrations/datafusion/src/table.rs
+++ b/crates/integrations/datafusion/src/table.rs
@@ -110,3 +110,57 @@ impl TableProvider for IcebergTableProvider {
Ok(filter_support)
}
}
+
+#[cfg(test)]
+mod tests {
+ use datafusion::common::Column;
+ use datafusion::prelude::SessionContext;
+ use iceberg::io::FileIO;
+ use iceberg::table::{StaticTable, Table};
+ use iceberg::TableIdent;
+
+ use super::*;
+
+ async fn get_test_table_from_metadata_file() -> Table {
+ let metadata_file_name = "TableMetadataV2Valid.json";
+ let metadata_file_path = format!(
+ "{}/tests/test_data/{}",
+ env!("CARGO_MANIFEST_DIR"),
+ metadata_file_name
+ );
+ let file_io = FileIO::from_path(&metadata_file_path)
+ .unwrap()
+ .build()
+ .unwrap();
+ let static_identifier = TableIdent::from_strs(["static_ns",
"static_table"]).unwrap();
+ let static_table =
+ StaticTable::from_metadata_file(&metadata_file_path,
static_identifier, file_io)
+ .await
+ .unwrap();
+ static_table.into_table()
+ }
+
+ #[tokio::test]
+ async fn test_try_new_from_table() {
+ let table = get_test_table_from_metadata_file().await;
+ let table_provider =
IcebergTableProvider::try_new_from_table(table.clone())
+ .await
+ .unwrap();
+ let ctx = SessionContext::new();
+ ctx.register_table("mytable", Arc::new(table_provider))
+ .unwrap();
+ let df = ctx.sql("SELECT * FROM mytable").await.unwrap();
+ let df_schema = df.schema();
+ let df_columns = df_schema.fields();
+ assert_eq!(df_columns.len(), 3);
+ let x_column = df_columns.first().unwrap();
+ let column_data = format!(
+ "{:?}:{:?}",
+ x_column.name(),
+ x_column.data_type().to_string()
+ );
+ assert_eq!(column_data, "\"x\":\"Int64\"");
+ let has_column = df_schema.has_column(&Column::from_name("z"));
+ assert!(has_column);
+ }
+}
diff --git
a/crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json
b/crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json
new file mode 100644
index 00000000..0dc89de5
--- /dev/null
+++ b/crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json
@@ -0,0 +1,122 @@
+{
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 1,
+ "schemas": [
+ {
+ "type": "struct",
+ "schema-id": 0,
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ },
+ {
+ "type": "struct",
+ "schema-id": 1,
+ "identifier-field-ids": [
+ 1,
+ 2
+ ],
+ "fields": [
+ {
+ "id": 1,
+ "name": "x",
+ "required": true,
+ "type": "long"
+ },
+ {
+ "id": 2,
+ "name": "y",
+ "required": true,
+ "type": "long",
+ "doc": "comment"
+ },
+ {
+ "id": 3,
+ "name": "z",
+ "required": true,
+ "type": "long"
+ }
+ ]
+ }
+ ],
+ "default-spec-id": 0,
+ "partition-specs": [
+ {
+ "spec-id": 0,
+ "fields": [
+ {
+ "name": "x",
+ "transform": "identity",
+ "source-id": 1,
+ "field-id": 1000
+ }
+ ]
+ }
+ ],
+ "last-partition-id": 1000,
+ "default-sort-order-id": 3,
+ "sort-orders": [
+ {
+ "order-id": 3,
+ "fields": [
+ {
+ "transform": "identity",
+ "source-id": 2,
+ "direction": "asc",
+ "null-order": "nulls-first"
+ },
+ {
+ "transform": "bucket[4]",
+ "source-id": 3,
+ "direction": "desc",
+ "null-order": "nulls-last"
+ }
+ ]
+ }
+ ],
+ "properties": {},
+ "current-snapshot-id": 3055729675574597004,
+ "snapshots": [
+ {
+ "snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1515100955770,
+ "sequence-number": 0,
+ "summary": {
+ "operation": "append"
+ },
+ "manifest-list": "s3://a/b/1.avro"
+ },
+ {
+ "snapshot-id": 3055729675574597004,
+ "parent-snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1555100955770,
+ "sequence-number": 1,
+ "summary": {
+ "operation": "append"
+ },
+ "manifest-list": "s3://a/b/2.avro",
+ "schema-id": 1
+ }
+ ],
+ "snapshot-log": [
+ {
+ "snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1515100955770
+ },
+ {
+ "snapshot-id": 3055729675574597004,
+ "timestamp-ms": 1555100955770
+ }
+ ],
+ "metadata-log": []
+}
\ No newline at end of file