This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6c5823ec25 Support `DictionaryArray` in `OVER` clause (#13153)
6c5823ec25 is described below
commit 6c5823ec25a0fc14a49922b10a3d274e072c4bd6
Author: Adrian Garcia Badaracco <[email protected]>
AuthorDate: Fri Nov 1 10:23:23 2024 -0500
Support `DictionaryArray` in `OVER` clause (#13153)
* implement target type selection for range queries on dictionary data types
Fixes #13151
* Update type_coercion.rs
* Add test
* query I?
---
datafusion/optimizer/src/analyzer/type_coercion.rs | 28 ++++++++++++----------
datafusion/sqllogictest/test_files/dictionary.slt | 6 +++++
2 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 5d33b58a02..9793c4c549 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -688,6 +688,21 @@ fn coerce_frame_bound(
}
}
+fn extract_window_frame_target_type(col_type: &DataType) -> Result<DataType> {
+ if col_type.is_numeric()
+ || is_utf8_or_large_utf8(col_type)
+ || matches!(col_type, DataType::Null)
+ {
+ Ok(col_type.clone())
+ } else if is_datetime(col_type) {
+ Ok(DataType::Interval(IntervalUnit::MonthDayNano))
+ } else if let DataType::Dictionary(_, value_type) = col_type {
+ extract_window_frame_target_type(value_type)
+ } else {
+ return internal_err!("Cannot run range queries on datatype:
{col_type:?}");
+ }
+}
+
// Coerces the given `window_frame` to use appropriate natural types.
// For example, ROWS and GROUPS frames use `UInt64` during calculations.
fn coerce_window_frame(
@@ -703,18 +718,7 @@ fn coerce_window_frame(
.map(|s| s.expr.get_type(schema))
.transpose()?;
if let Some(col_type) = current_types {
- if col_type.is_numeric()
- || is_utf8_or_large_utf8(&col_type)
- || matches!(col_type, DataType::Null)
- {
- col_type
- } else if is_datetime(&col_type) {
- DataType::Interval(IntervalUnit::MonthDayNano)
- } else {
- return internal_err!(
- "Cannot run range queries on datatype: {col_type:?}"
- );
- }
+ extract_window_frame_target_type(&col_type)?
} else {
return internal_err!("ORDER BY column cannot be empty");
}
diff --git a/datafusion/sqllogictest/test_files/dictionary.slt
b/datafusion/sqllogictest/test_files/dictionary.slt
index 176331f570..b6923fcc94 100644
--- a/datafusion/sqllogictest/test_files/dictionary.slt
+++ b/datafusion/sqllogictest/test_files/dictionary.slt
@@ -444,3 +444,9 @@ physical_plan
01)CoalesceBatchesExec: target_batch_size=8192
02)--FilterExec: column2@1 = 1
03)----MemoryExec: partitions=1, partition_sizes=[1]
+
+# Window Functions
+query I
+select dense_rank() over (order by arrow_cast('abc', 'Dictionary(UInt16,
Utf8)'));
+----
+1
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]