This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new b97daf7fc8 [task #8203] Port tests in joins.rs to sqllogictest (#8996)
b97daf7fc8 is described below
commit b97daf7fc834920f4a312670da9d5560e2facbe7
Author: Reilly.tang <[email protected]>
AuthorDate: Fri Jan 26 03:48:31 2024 +0800
[task #8203] Port tests in joins.rs to sqllogictest (#8996)
Signed-off-by: tangruilin <[email protected]>
---
datafusion/core/tests/sql/joins.rs | 59 ----------------------
datafusion/core/tests/sql/mod.rs | 73 ----------------------------
datafusion/sqllogictest/test_files/joins.slt | 70 ++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 132 deletions(-)
diff --git a/datafusion/core/tests/sql/joins.rs
b/datafusion/core/tests/sql/joins.rs
index 0cc102002e..f7d5205db0 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -20,65 +20,6 @@ use
datafusion::test_util::register_unbounded_file_with_ordering;
use super::*;
-#[tokio::test]
-#[ignore]
-/// TODO: need to repair. Wrong Test: ambiguous column name: a
-async fn nestedjoin_with_alias() -> Result<()> {
- // repro case for https://github.com/apache/arrow-datafusion/issues/2867
- let sql = "select * from ((select 1 as a, 2 as b) c INNER JOIN (select 1
as a, 3 as d) e on c.a = e.a) f;";
- let expected = [
- "+---+---+---+---+",
- "| a | b | a | d |",
- "+---+---+---+---+",
- "| 1 | 2 | 1 | 3 |",
- "+---+---+---+---+",
- ];
- let ctx = SessionContext::new();
- let actual = execute_to_batches(&ctx, sql).await;
- assert_batches_eq!(expected, &actual);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn join_partitioned() -> Result<()> {
- // self join on partition id (workaround for duplicate column name)
- let results = execute_with_partition(
- "SELECT 1 FROM test JOIN (SELECT c1 AS id1 FROM test) AS a ON c1=id1",
- 4,
- )
- .await?;
-
- assert_eq!(
- results.iter().map(|b| b.num_rows()).sum::<usize>(),
- 4 * 10 * 10
- );
-
- Ok(())
-}
-
-#[tokio::test]
-#[ignore = "Test ignored, will be enabled after fixing the NAAJ bug"]
-// https://github.com/apache/arrow-datafusion/issues/4211
-async fn null_aware_left_anti_join() -> Result<()> {
- let test_repartition_joins = vec![true, false];
- for repartition_joins in test_repartition_joins {
- let ctx = create_left_semi_anti_join_context_with_null_ids(
- "t1_id",
- "t2_id",
- repartition_joins,
- )
- .unwrap();
-
- let sql = "SELECT t1_id, t1_name FROM t1 WHERE t1_id NOT IN (SELECT
t2_id FROM t2) ORDER BY t1_id";
- let actual = execute_to_batches(&ctx, sql).await;
- let expected = ["++", "++"];
- assert_batches_eq!(expected, &actual);
- }
-
- Ok(())
-}
-
#[tokio::test]
async fn join_change_in_planner() -> Result<()> {
let config = SessionConfig::new().with_target_partitions(8);
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 40ae75cd7f..8852854a8b 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -76,79 +76,6 @@ pub mod repartition;
pub mod select;
mod sql_api;
-fn create_left_semi_anti_join_context_with_null_ids(
- column_left: &str,
- column_right: &str,
- repartition_joins: bool,
-) -> Result<SessionContext> {
- let ctx = SessionContext::new_with_config(
- SessionConfig::new()
- .with_repartition_joins(repartition_joins)
- .with_target_partitions(2)
- .with_batch_size(4096),
- );
-
- let t1_schema = Arc::new(Schema::new(vec![
- Field::new(column_left, DataType::UInt32, true),
- Field::new("t1_name", DataType::Utf8, true),
- Field::new("t1_int", DataType::UInt32, true),
- ]));
- let t1_data = RecordBatch::try_new(
- t1_schema,
- vec![
- Arc::new(UInt32Array::from(vec![
- Some(11),
- Some(11),
- Some(22),
- Some(33),
- Some(44),
- None,
- ])),
- Arc::new(StringArray::from(vec![
- Some("a"),
- Some("a"),
- Some("b"),
- Some("c"),
- Some("d"),
- Some("e"),
- ])),
- Arc::new(UInt32Array::from(vec![1, 1, 2, 3, 4, 0])),
- ],
- )?;
- ctx.register_batch("t1", t1_data)?;
-
- let t2_schema = Arc::new(Schema::new(vec![
- Field::new(column_right, DataType::UInt32, true),
- Field::new("t2_name", DataType::Utf8, true),
- Field::new("t2_int", DataType::UInt32, true),
- ]));
- let t2_data = RecordBatch::try_new(
- t2_schema,
- vec![
- Arc::new(UInt32Array::from(vec![
- Some(11),
- Some(11),
- Some(22),
- Some(44),
- Some(55),
- None,
- ])),
- Arc::new(StringArray::from(vec![
- Some("z"),
- Some("z"),
- Some("y"),
- Some("x"),
- Some("w"),
- Some("v"),
- ])),
- Arc::new(UInt32Array::from(vec![3, 3, 1, 3, 3, 0])),
- ],
- )?;
- ctx.register_batch("t2", t2_data)?;
-
- Ok(ctx)
-}
-
async fn register_aggregate_csv_by_sql(ctx: &SessionContext) {
let testdata = datafusion::test_util::arrow_test_data();
diff --git a/datafusion/sqllogictest/test_files/joins.slt
b/datafusion/sqllogictest/test_files/joins.slt
index e605813b20..9619696679 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3523,3 +3523,73 @@ set datafusion.optimizer.prefer_existing_sort = false;
statement ok
drop table annotated_data;
+
+####
+# nestedjoin_with_alias_test
+####
+
+query IIII
+select * from ((select 1 as a, 2 as b) c INNER JOIN (select 1 as c, 3 as d) e
on c.a = e.c) f;
+----
+1 2 1 3
+
+####
+# create_left_semi_anti_join_context_with_null_ids_table_test
+####
+
+statement ok
+CREATE TABLE join_test_left(t1_id INT UNSIGNED, t1_name VARCHAR, t1_int INT
UNSIGNED)
+AS VALUES
+(11, 'a', 1),
+(11, 'a', 1),
+(22, 'b', 2),
+(33, 'c', 3),
+(44, 'd', 4),
+(NULL, 'e', 0);
+
+statement ok
+CREATE TABLE join_test_right(t2_id INT UNSIGNED, t2_name VARCHAR, t2_int INT
UNSIGNED)
+AS VALUES
+(11, 'z', 3),
+(11, 'z', 3),
+(22, 'y', 1),
+(33, 'x', 3),
+(44, 'w', 3),
+(NULL, 'v', 0);
+
+query IT
+SELECT t1_id, t1_name FROM join_test_left WHERE t1_id NOT IN (SELECT t2_id
FROM join_test_right) ORDER BY t1_id;
+----
+NULL e
+
+####
+# join_partitioned_test
+####
+
+statement ok
+CREATE TABLE join_partitioned_table(c1 INT UNSIGNED, c2 INT UNSIGNED, c3
BOOLEAN)
+AS VALUES
+(4, 1, true),
+(4, 2, false),
+(4, 3, true),
+(4, 4, false);
+
+query I
+SELECT 1 FROM join_partitioned_table JOIN (SELECT c1 AS id1 FROM
join_partitioned_table) AS a ON c1=id1;
+----
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1