This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new c0f9fdcd2 feat(datafusion): Add sqllogictest for DataFusion INSERT
INTO (#1887)
c0f9fdcd2 is described below
commit c0f9fdcd283ec650c64df7f367cd1ae473c24e62
Author: Shawn Chang <[email protected]>
AuthorDate: Mon Dec 8 02:14:42 2025 -0800
feat(datafusion): Add sqllogictest for DataFusion INSERT INTO (#1887)
## Which issue does this PR close?
- Closes #1835
## What changes are included in this PR?
- Added a new schedule to run insert into sqllogic test
## Are these changes tested?
It's a test itself
---
crates/sqllogictest/src/engine/datafusion.rs | 71 +++++++++++-
.../sqllogictest/testdata/schedules/df_test.toml | 6 +-
.../testdata/slts/df_test/insert_into.slt | 119 +++++++++++++++++++++
.../testdata/slts/df_test/show_tables.slt | 6 ++
4 files changed, 200 insertions(+), 2 deletions(-)
diff --git a/crates/sqllogictest/src/engine/datafusion.rs
b/crates/sqllogictest/src/engine/datafusion.rs
index b3e37d920..e3402dfa9 100644
--- a/crates/sqllogictest/src/engine/datafusion.rs
+++ b/crates/sqllogictest/src/engine/datafusion.rs
@@ -22,8 +22,9 @@ use std::sync::Arc;
use datafusion::catalog::CatalogProvider;
use datafusion::prelude::{SessionConfig, SessionContext};
use datafusion_sqllogictest::DataFusion;
-use iceberg::CatalogBuilder;
use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
+use iceberg::spec::{NestedField, PrimitiveType, Schema, Transform, Type,
UnboundPartitionSpec};
+use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation};
use iceberg_datafusion::IcebergCatalogProvider;
use indicatif::ProgressBar;
use toml::Table as TomlTable;
@@ -84,8 +85,76 @@ impl DataFusionEngine {
)
.await?;
+ // Create a test namespace for INSERT INTO tests
+ let namespace = NamespaceIdent::new("default".to_string());
+ catalog.create_namespace(&namespace, HashMap::new()).await?;
+
+ // Create test tables
+ Self::create_unpartitioned_table(&catalog, &namespace).await?;
+ Self::create_partitioned_table(&catalog, &namespace).await?;
+
Ok(Arc::new(
IcebergCatalogProvider::try_new(Arc::new(catalog)).await?,
))
}
+
+ /// Create an unpartitioned test table with id and name columns
+ /// TODO: this can be removed when we support CREATE TABLE
+ async fn create_unpartitioned_table(
+ catalog: &impl Catalog,
+ namespace: &NamespaceIdent,
+ ) -> anyhow::Result<()> {
+ let schema = Schema::builder()
+ .with_fields(vec![
+ NestedField::required(1, "id",
Type::Primitive(PrimitiveType::Int)).into(),
+ NestedField::optional(2, "name",
Type::Primitive(PrimitiveType::String)).into(),
+ ])
+ .build()?;
+
+ catalog
+ .create_table(
+ namespace,
+ TableCreation::builder()
+ .name("test_unpartitioned_table".to_string())
+ .schema(schema)
+ .build(),
+ )
+ .await?;
+
+ Ok(())
+ }
+
+ /// Create a partitioned test table with id, category, and value columns
+ /// Partitioned by category using identity transform
+ /// TODO: this can be removed when we support CREATE TABLE
+ async fn create_partitioned_table(
+ catalog: &impl Catalog,
+ namespace: &NamespaceIdent,
+ ) -> anyhow::Result<()> {
+ let schema = Schema::builder()
+ .with_fields(vec![
+ NestedField::required(1, "id",
Type::Primitive(PrimitiveType::Int)).into(),
+ NestedField::required(2, "category",
Type::Primitive(PrimitiveType::String)).into(),
+ NestedField::optional(3, "value",
Type::Primitive(PrimitiveType::String)).into(),
+ ])
+ .build()?;
+
+ let partition_spec = UnboundPartitionSpec::builder()
+ .with_spec_id(0)
+ .add_partition_field(2, "category", Transform::Identity)?
+ .build();
+
+ catalog
+ .create_table(
+ namespace,
+ TableCreation::builder()
+ .name("test_partitioned_table".to_string())
+ .schema(schema)
+ .partition_spec(partition_spec)
+ .build(),
+ )
+ .await?;
+
+ Ok(())
+ }
}
diff --git a/crates/sqllogictest/testdata/schedules/df_test.toml
b/crates/sqllogictest/testdata/schedules/df_test.toml
index 073374495..df5e638d5 100644
--- a/crates/sqllogictest/testdata/schedules/df_test.toml
+++ b/crates/sqllogictest/testdata/schedules/df_test.toml
@@ -20,4 +20,8 @@ df = { type = "datafusion" }
[[steps]]
engine = "df"
-slt = "df_test/show_tables.slt"
\ No newline at end of file
+slt = "df_test/show_tables.slt"
+
+[[steps]]
+engine = "df"
+slt = "df_test/insert_into.slt"
diff --git a/crates/sqllogictest/testdata/slts/df_test/insert_into.slt
b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt
new file mode 100644
index 000000000..2ba33afcd
--- /dev/null
+++ b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt
@@ -0,0 +1,119 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Verify the table is initially empty
+query IT rowsort
+SELECT * FROM default.default.test_unpartitioned_table
+----
+
+# Insert a single row and verify the count
+query I
+INSERT INTO default.default.test_unpartitioned_table VALUES (1, 'Alice')
+----
+1
+
+# Verify the inserted row
+query IT rowsort
+SELECT * FROM default.default.test_unpartitioned_table
+----
+1 Alice
+
+# Insert multiple rows and verify the count
+query I
+INSERT INTO default.default.test_unpartitioned_table VALUES (2, 'Bob'), (3,
'Charlie')
+----
+2
+
+# Verify all rows
+query IT rowsort
+SELECT * FROM default.default.test_unpartitioned_table
+----
+1 Alice
+2 Bob
+3 Charlie
+
+# Insert with NULL value and verify the count
+query I
+INSERT INTO default.default.test_unpartitioned_table VALUES (4, NULL)
+----
+1
+
+# Verify NULL handling
+query IT rowsort
+SELECT * FROM default.default.test_unpartitioned_table
+----
+1 Alice
+2 Bob
+3 Charlie
+4 NULL
+
+# Test partitioned table - verify initially empty
+query ITT rowsort
+SELECT * FROM default.default.test_partitioned_table
+----
+
+# Insert single row into partitioned table
+query I
+INSERT INTO default.default.test_partitioned_table VALUES (1, 'electronics',
'laptop')
+----
+1
+
+# Verify the inserted row in partitioned table
+query ITT rowsort
+SELECT * FROM default.default.test_partitioned_table
+----
+1 electronics laptop
+
+# Insert multiple rows with different partition values
+query I
+INSERT INTO default.default.test_partitioned_table VALUES (2, 'electronics',
'phone'), (3, 'books', 'novel'), (4, 'books', 'textbook'), (5, 'clothing',
'shirt')
+----
+4
+
+# Verify all rows in partitioned table
+query ITT rowsort
+SELECT * FROM default.default.test_partitioned_table
+----
+1 electronics laptop
+2 electronics phone
+3 books novel
+4 books textbook
+5 clothing shirt
+
+# Insert with NULL value in optional column
+query I
+INSERT INTO default.default.test_partitioned_table VALUES (6, 'electronics',
NULL)
+----
+1
+
+# Verify NULL handling in partitioned table
+query ITT rowsort
+SELECT * FROM default.default.test_partitioned_table
+----
+1 electronics laptop
+2 electronics phone
+3 books novel
+4 books textbook
+5 clothing shirt
+6 electronics NULL
+
+# Verify partition filtering works
+query ITT rowsort
+SELECT * FROM default.default.test_partitioned_table WHERE category = 'books'
+----
+3 books novel
+4 books textbook
diff --git a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt
b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt
index 34709d735..c5da5f627 100644
--- a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt
+++ b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt
@@ -25,6 +25,12 @@ datafusion information_schema routines VIEW
datafusion information_schema schemata VIEW
datafusion information_schema tables VIEW
datafusion information_schema views VIEW
+default default test_partitioned_table BASE TABLE
+default default test_partitioned_table$manifests BASE TABLE
+default default test_partitioned_table$snapshots BASE TABLE
+default default test_unpartitioned_table BASE TABLE
+default default test_unpartitioned_table$manifests BASE TABLE
+default default test_unpartitioned_table$snapshots BASE TABLE
default information_schema columns VIEW
default information_schema df_settings VIEW
default information_schema parameters VIEW