This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 560ad35361 infer column nullability on joins (#6060)
560ad35361 is described below
commit 560ad353619f4526352fd3f64d09a59babe87a3c
Author: comphead <[email protected]>
AuthorDate: Fri Apr 21 12:15:52 2023 -0700
infer column nullability on joins (#6060)
---
.../core/tests/sqllogictests/test_files/join.slt | 25 ++++++++++++++++-
datafusion/expr/src/logical_plan/builder.rs | 32 +++++++++++++++++-----
2 files changed, 49 insertions(+), 8 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/join.slt
b/datafusion/core/tests/sqllogictests/test_files/join.slt
index b940232a74..2f49c5b97c 100644
--- a/datafusion/core/tests/sqllogictests/test_files/join.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/join.slt
@@ -532,17 +532,40 @@ FROM t1
# test create table from query with LEFT join
statement ok
-create table temp as
+create table left_join_test as
with t1 as (select 1 as col1, 'asd' as col2),
t2 as (select 1 as col3, 'sdf' as col4)
select col2, col4 from t1 left join t2 on col1 = col3
+# test create table from query with RIGHT join
+statement ok
+create table right_join_test as
+with t1 as (select 1 as col1, 'asd' as col2),
+ t2 as (select 1 as col3, 'sdf' as col4)
+select col2, col4 from t1 right join t2 on col1 = col3
+
+# test create table from query with FULL OUTER join
+statement ok
+create table full_join_test as
+with t1 as (select 1 as col1, 'asd' as col2),
+ t2 as (select 1 as col3, 'sdf' as col4)
+select col2, col4 from t1 full outer join t2 on col1 = col3
+
statement ok
drop table IF EXISTS t1;
statement ok
drop table IF EXISTS t2;
+statement ok
+drop table IF EXISTS left_join_test;
+
+statement ok
+drop table IF EXISTS right_join_test;
+
+statement ok
+drop table IF EXISTS full_join_test;
+
# batch size
statement ok
set datafusion.execution.batch_size = 8192;
diff --git a/datafusion/expr/src/logical_plan/builder.rs
b/datafusion/expr/src/logical_plan/builder.rs
index ec2a27542c..d72f6b462c 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -1041,11 +1041,18 @@ pub fn build_join_schema(
right: &DFSchema,
join_type: &JoinType,
) -> Result<DFSchema> {
+ fn nullify_fields(fields: &[DFField]) -> Vec<DFField> {
+ fields
+ .iter()
+ .map(|f| f.clone().with_nullable(true))
+ .collect()
+ }
+
let right_fields = right.fields();
let left_fields = left.fields();
let fields: Vec<DFField> = match join_type {
- JoinType::Inner | JoinType::Full | JoinType::Right => {
+ JoinType::Inner => {
// left then right
left_fields
.iter()
@@ -1055,14 +1062,25 @@ pub fn build_join_schema(
}
JoinType::Left => {
// left then right, right set to nullable in case of not matched
scenario
- let right_fields_nullable: Vec<DFField> = right_fields
- .iter()
- .map(|f| f.clone().with_nullable(true))
- .collect();
-
left_fields
.iter()
- .chain(&right_fields_nullable)
+ .chain(&nullify_fields(right_fields))
+ .cloned()
+ .collect()
+ }
+ JoinType::Right => {
+ // left then right, left set to nullable in case of not matched
scenario
+ nullify_fields(left_fields)
+ .iter()
+ .chain(right_fields.iter())
+ .cloned()
+ .collect()
+ }
+ JoinType::Full => {
+ // left then right, all set to nullable in case of not matched
scenario
+ nullify_fields(left_fields)
+ .iter()
+ .chain(&nullify_fields(right_fields))
.cloned()
.collect()
}