This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 6c5823ec25 Support `DictionaryArray` in `OVER` clause  (#13153)
6c5823ec25 is described below

commit 6c5823ec25a0fc14a49922b10a3d274e072c4bd6
Author: Adrian Garcia Badaracco <[email protected]>
AuthorDate: Fri Nov 1 10:23:23 2024 -0500

    Support `DictionaryArray` in `OVER` clause  (#13153)
    
    * implement target type selection for range queries on dictionary data types
    
    Fixes #13151
    
    * Update type_coercion.rs
    
    * Add test
    
    * query I?
---
 datafusion/optimizer/src/analyzer/type_coercion.rs | 28 ++++++++++++----------
 datafusion/sqllogictest/test_files/dictionary.slt  |  6 +++++
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs 
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 5d33b58a02..9793c4c549 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -688,6 +688,21 @@ fn coerce_frame_bound(
     }
 }
 
+fn extract_window_frame_target_type(col_type: &DataType) -> Result<DataType> {
+    if col_type.is_numeric()
+        || is_utf8_or_large_utf8(col_type)
+        || matches!(col_type, DataType::Null)
+    {
+        Ok(col_type.clone())
+    } else if is_datetime(col_type) {
+        Ok(DataType::Interval(IntervalUnit::MonthDayNano))
+    } else if let DataType::Dictionary(_, value_type) = col_type {
+        extract_window_frame_target_type(value_type)
+    } else {
+        return internal_err!("Cannot run range queries on datatype: 
{col_type:?}");
+    }
+}
+
 // Coerces the given `window_frame` to use appropriate natural types.
 // For example, ROWS and GROUPS frames use `UInt64` during calculations.
 fn coerce_window_frame(
@@ -703,18 +718,7 @@ fn coerce_window_frame(
                 .map(|s| s.expr.get_type(schema))
                 .transpose()?;
             if let Some(col_type) = current_types {
-                if col_type.is_numeric()
-                    || is_utf8_or_large_utf8(&col_type)
-                    || matches!(col_type, DataType::Null)
-                {
-                    col_type
-                } else if is_datetime(&col_type) {
-                    DataType::Interval(IntervalUnit::MonthDayNano)
-                } else {
-                    return internal_err!(
-                        "Cannot run range queries on datatype: {col_type:?}"
-                    );
-                }
+                extract_window_frame_target_type(&col_type)?
             } else {
                 return internal_err!("ORDER BY column cannot be empty");
             }
diff --git a/datafusion/sqllogictest/test_files/dictionary.slt 
b/datafusion/sqllogictest/test_files/dictionary.slt
index 176331f570..b6923fcc94 100644
--- a/datafusion/sqllogictest/test_files/dictionary.slt
+++ b/datafusion/sqllogictest/test_files/dictionary.slt
@@ -444,3 +444,9 @@ physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
 02)--FilterExec: column2@1 = 1
 03)----MemoryExec: partitions=1, partition_sizes=[1]
+
+# Window Functions
+query I
+select dense_rank() over (order by arrow_cast('abc', 'Dictionary(UInt16, 
Utf8)'));
+----
+1


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to