This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new bc0ed23b9e fix: Timestamp with timezone not considered `join on` 
(#8150)
bc0ed23b9e is described below

commit bc0ed23b9ecc113cdfd9a5d68d581be9c4cbf914
Author: L_B__ <[email protected]>
AuthorDate: Sat Nov 18 03:34:47 2023 +0800

    fix: Timestamp with timezone not considered `join on` (#8150)
    
    * fix: Timestamp with timezone not considerd in
    
    * Add Test For Explain HashJoin On Timestamp with Tz
    
    ---------
    
    Co-authored-by: ackingliu <[email protected]>
---
 datafusion/expr/src/utils.rs                 |  2 +-
 datafusion/sqllogictest/test_files/joins.slt | 77 ++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 8f13bf5f61..ff95ff10e7 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -901,7 +901,7 @@ pub fn can_hash(data_type: &DataType) -> bool {
         DataType::UInt64 => true,
         DataType::Float32 => true,
         DataType::Float64 => true,
-        DataType::Timestamp(time_unit, None) => match time_unit {
+        DataType::Timestamp(time_unit, _) => match time_unit {
             TimeUnit::Second => true,
             TimeUnit::Millisecond => true,
             TimeUnit::Microsecond => true,
diff --git a/datafusion/sqllogictest/test_files/joins.slt 
b/datafusion/sqllogictest/test_files/joins.slt
index fa3a6cff8c..737b43b5a9 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -140,6 +140,17 @@ SELECT
 FROM
   test_timestamps_table_source;
 
+# create a table of timestamps with time zone
+statement ok
+CREATE TABLE test_timestamps_tz_table as
+SELECT
+  arrow_cast(ts::timestamp::bigint, 'Timestamp(Nanosecond, Some("UTC"))') as 
nanos,
+  arrow_cast(ts::timestamp::bigint / 1000, 'Timestamp(Microsecond, 
Some("UTC"))') as micros,
+  arrow_cast(ts::timestamp::bigint / 1000000, 'Timestamp(Millisecond, 
Some("UTC"))') as millis,
+  arrow_cast(ts::timestamp::bigint / 1000000000, 'Timestamp(Second, 
Some("UTC"))') as secs,
+  names
+FROM
+  test_timestamps_table_source;
 
 
 statement ok
@@ -2462,6 +2473,16 @@ test_timestamps_table
 NULL NULL NULL NULL Row 2
 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 
2021-01-01T05:11:10 Row 3
 
+# show the contents of the timestamp with timezone table
+query PPPPT
+select * from
+test_timestamps_tz_table
+----
+2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 
2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0
+2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 
2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1
+NULL NULL NULL NULL Row 2
+2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 
2021-01-01T05:11:10Z Row 3
+
 # test timestamp join on nanos datatype
 query PPPPTPPPPT rowsort
 SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * FROM 
test_timestamps_table ) as t2 ON t1.nanos = t2.nanos;
@@ -2470,6 +2491,14 @@ SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * 
FROM test_timestamps_ta
 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 
2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 2018-11-13T17:11:10.011375885 
2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0
 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 
2021-01-01T05:11:10 Row 3 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 
2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3
 
+# test timestamp with timezone join on nanos datatype
+query PPPPTPPPPT rowsort
+SELECT * FROM test_timestamps_tz_table as t1 JOIN (SELECT * FROM 
test_timestamps_tz_table ) as t2 ON t1.nanos = t2.nanos;
+----
+2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 
2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 2011-12-13T11:13:10.123450Z 
2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1
+2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 
2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 
2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 
2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0
+2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 
2021-01-01T05:11:10Z Row 3 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 
2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3
+
 # test timestamp join on micros datatype
 query PPPPTPPPPT rowsort
 SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * FROM 
test_timestamps_table ) as t2 ON t1.micros = t2.micros
@@ -2478,6 +2507,14 @@ SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * 
FROM test_timestamps_ta
 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 
2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 2018-11-13T17:11:10.011375885 
2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0
 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 
2021-01-01T05:11:10 Row 3 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 
2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3
 
+# test timestamp with timezone join on micros datatype
+query PPPPTPPPPT rowsort
+SELECT * FROM test_timestamps_tz_table as t1 JOIN (SELECT * FROM 
test_timestamps_tz_table ) as t2 ON t1.micros = t2.micros
+----
+2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 
2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 2011-12-13T11:13:10.123450Z 
2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1
+2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 
2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 
2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 
2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0
+2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 
2021-01-01T05:11:10Z Row 3 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 
2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3
+
 # test timestamp join on millis datatype
 query PPPPTPPPPT rowsort
 SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * FROM 
test_timestamps_table ) as t2 ON t1.millis = t2.millis
@@ -2486,6 +2523,46 @@ SELECT * FROM test_timestamps_table as t1 JOIN (SELECT * 
FROM test_timestamps_ta
 2018-11-13T17:11:10.011375885 2018-11-13T17:11:10.011375 
2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0 2018-11-13T17:11:10.011375885 
2018-11-13T17:11:10.011375 2018-11-13T17:11:10.011 2018-11-13T17:11:10 Row 0
 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 
2021-01-01T05:11:10 Row 3 2021-01-01T05:11:10.432 2021-01-01T05:11:10.432 
2021-01-01T05:11:10.432 2021-01-01T05:11:10 Row 3
 
+# test timestamp with timezone join on millis datatype
+query PPPPTPPPPT rowsort
+SELECT * FROM test_timestamps_tz_table as t1 JOIN (SELECT * FROM 
test_timestamps_tz_table ) as t2 ON t1.millis = t2.millis
+----
+2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123450Z 
2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1 2011-12-13T11:13:10.123450Z 
2011-12-13T11:13:10.123450Z 2011-12-13T11:13:10.123Z 2011-12-13T11:13:10Z Row 1
+2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 
2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0 
2018-11-13T17:11:10.011375885Z 2018-11-13T17:11:10.011375Z 
2018-11-13T17:11:10.011Z 2018-11-13T17:11:10Z Row 0
+2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 
2021-01-01T05:11:10Z Row 3 2021-01-01T05:11:10.432Z 2021-01-01T05:11:10.432Z 
2021-01-01T05:11:10.432Z 2021-01-01T05:11:10Z Row 3
+
+####
+# Config setup
+####
+
+statement ok
+set datafusion.explain.logical_plan_only = false;
+
+statement ok
+set datafusion.optimizer.prefer_hash_join = true;
+
+# explain hash join on timestamp with timezone type
+query TT
+EXPLAIN SELECT * FROM test_timestamps_tz_table as t1 JOIN 
test_timestamps_tz_table as t2 ON t1.millis = t2.millis
+----
+logical_plan
+Inner Join: t1.millis = t2.millis
+--SubqueryAlias: t1
+----TableScan: test_timestamps_tz_table projection=[nanos, micros, millis, 
secs, names]
+--SubqueryAlias: t2
+----TableScan: test_timestamps_tz_table projection=[nanos, micros, millis, 
secs, names]
+physical_plan
+CoalesceBatchesExec: target_batch_size=2
+--HashJoinExec: mode=Partitioned, join_type=Inner, on=[(millis@2, millis@2)]
+----CoalesceBatchesExec: target_batch_size=2
+------RepartitionExec: partitioning=Hash([millis@2], 2), input_partitions=2
+--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+----------MemoryExec: partitions=1, partition_sizes=[1]
+----CoalesceBatchesExec: target_batch_size=2
+------RepartitionExec: partitioning=Hash([millis@2], 2), input_partitions=2
+--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+----------MemoryExec: partitions=1, partition_sizes=[1]
+
 # left_join_using_2
 query II
 SELECT t1.c1, t2.c2 FROM test_partition_table t1 JOIN test_partition_table t2 
USING (c2) ORDER BY t2.c2;

Reply via email to