This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 7a5354fe59 fix(functions): support `Dictionary` for string and int
functions (#7262)
7a5354fe59 is described below
commit 7a5354fe5908b8ac7db163d6c484dbf1d85a142e
Author: Chunchun Ye <[email protected]>
AuthorDate: Fri Aug 11 15:21:48 2023 -0500
fix(functions): support `Dictionary` for string and int functions (#7262)
* fix(functions): support `Dictionary` type for string functions and int
functions
* chore: add tests
chore: add more test
---
.../tests/sqllogictests/test_files/functions.slt | 201 +++++++++++++++++++++
datafusion/expr/src/built_in_function.rs | 14 ++
2 files changed, 215 insertions(+)
diff --git a/datafusion/core/tests/sqllogictests/test_files/functions.slt
b/datafusion/core/tests/sqllogictests/test_files/functions.slt
index 301d73befb..f8dbf8a00d 100644
--- a/datafusion/core/tests/sqllogictests/test_files/functions.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/functions.slt
@@ -63,6 +63,11 @@ SELECT left('abcde', -2)
----
abc
+query T
+SELECT left(arrow_cast('abcde', 'Dictionary(Int32, Utf8)'), -2)
+----
+abc
+
query T
SELECT left('abcde', -200)
----
@@ -103,6 +108,11 @@ SELECT length('')
----
0
+query I
+SELECT length(arrow_cast('', 'Dictionary(Int32, Utf8)'))
+----
+0
+
query I
SELECT length('chars')
----
@@ -113,6 +123,11 @@ SELECT length('josé')
----
4
+query I
+SELECT length(arrow_cast('josé', 'Dictionary(Int32, Utf8)'))
+----
+4
+
query ?
SELECT length(NULL)
----
@@ -158,6 +173,11 @@ SELECT lpad('hi', 5)
----
hi
+query T
+SELECT lpad(arrow_cast('hi', 'Dictionary(Int32, Utf8)'), 5)
+----
+ hi
+
query T
SELECT lpad('hi', CAST(NULL AS INT), 'xy')
----
@@ -188,6 +208,11 @@ SELECT reverse('abcde')
----
edcba
+query T
+SELECT reverse(arrow_cast('abcde', 'Dictionary(Int32, Utf8)'))
+----
+edcba
+
query T
SELECT reverse('loẅks')
----
@@ -203,6 +228,11 @@ SELECT right('abcde', -2)
----
cde
+query T
+SELECT right(arrow_cast('abcde', 'Dictionary(Int32, Utf8)'), 1)
+----
+e
+
query T
SELECT right('abcde', -200)
----
@@ -268,6 +298,11 @@ SELECT rpad('hi', 5, 'xy')
----
hixyx
+query T
+SELECT rpad(arrow_cast('hi', 'Dictionary(Int32, Utf8)'), 5, 'xy')
+----
+hixyx
+
query T
SELECT rpad('hi', 5, NULL)
----
@@ -383,6 +418,11 @@ SELECT translate('12345', '143', 'ax')
----
a2x5
+query T
+SELECT translate(arrow_cast('12345', 'Dictionary(Int32, Utf8)'), '143', 'ax')
+----
+a2x5
+
query ?
SELECT translate(NULL, '143', 'ax')
----
@@ -565,3 +605,164 @@ SELECT
sqrt(column1),sqrt(column2),sqrt(column3),sqrt(column4),sqrt(column5),sqr
statement ok
drop table t
+
+
+query T
+SELECT upper('foo')
+----
+FOO
+
+query T
+select upper(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+FOO
+
+query T
+SELECT btrim(' foo ')
+----
+foo
+
+query T
+SELECT btrim(arrow_cast(' foo ', 'Dictionary(Int32, Utf8)'))
+----
+foo
+
+query T
+SELECT initcap('foo')
+----
+Foo
+
+query T
+SELECT initcap(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+Foo
+
+query T
+SELECT lower('FOObar')
+----
+foobar
+
+query T
+SELECT lower(arrow_cast('FOObar', 'Dictionary(Int32, Utf8)'))
+----
+foobar
+
+query T
+SELECT ltrim(' foo')
+----
+foo
+
+query T
+SELECT ltrim(arrow_cast(' foo', 'Dictionary(Int32, Utf8)'))
+----
+foo
+
+query T
+SELECT md5('foo')
+----
+acbd18db4cc2f85cedef654fccc4a4d8
+
+query T
+SELECT md5(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+acbd18db4cc2f85cedef654fccc4a4d8
+
+query T
+SELECT regexp_replace('foobar', 'bar', 'xx', 'gi')
+----
+fooxx
+
+query T
+SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar',
'xx', 'gi')
+----
+fooxx
+
+query T
+SELECT repeat('foo', 3)
+----
+foofoofoo
+
+query T
+SELECT repeat(arrow_cast('foo', 'Dictionary(Int32, Utf8)'), 3)
+----
+foofoofoo
+
+query T
+SELECT replace('foobar', 'bar', 'hello')
+----
+foohello
+
+query T
+SELECT replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'hello')
+----
+foohello
+
+query T
+SELECT rtrim(' foo ')
+----
+ foo
+
+query T
+SELECT rtrim(arrow_cast(' foo ', 'Dictionary(Int32, Utf8)'))
+----
+ foo
+
+query T
+SELECT split_part('foo_bar', '_', 2)
+----
+bar
+
+query T
+SELECT split_part(arrow_cast('foo_bar', 'Dictionary(Int32, Utf8)'), '_', 2)
+----
+bar
+
+query T
+SELECT trim(' foo ')
+----
+foo
+
+query T
+SELECT trim(arrow_cast(' foo ', 'Dictionary(Int32, Utf8)'))
+----
+foo
+
+query I
+SELECT bit_length('foo')
+----
+24
+
+query I
+SELECT bit_length(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+24
+
+query I
+SELECT character_length('foo')
+----
+3
+
+query I
+SELECT character_length(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+3
+
+query I
+SELECT octet_length('foo')
+----
+3
+
+query I
+SELECT octet_length(arrow_cast('foo', 'Dictionary(Int32, Utf8)'))
+----
+3
+
+query I
+SELECT strpos('helloworld', 'world')
+----
+6
+
+query I
+SELECT strpos(arrow_cast('helloworld', 'Dictionary(Int32, Utf8)'), 'world')
+----
+6
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index cf609135ae..2ad06b873b 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -1397,6 +1397,20 @@ macro_rules! make_utf8_to_return_type {
DataType::LargeUtf8 => $largeUtf8Type,
DataType::Utf8 => $utf8Type,
DataType::Null => DataType::Null,
+ DataType::Dictionary(_, value_type) => {
+ match **value_type {
+ DataType::LargeUtf8 => $largeUtf8Type,
+ DataType::Utf8 => $utf8Type,
+ DataType::Null => DataType::Null,
+ _ => {
+ // this error is internal as `data_types` should
have captured this.
+ return Err(DataFusionError::Internal(format!(
+ "The {:?} function can only accept strings.",
+ name
+ )));
+ }
+ }
+ }
_ => {
// this error is internal as `data_types` should have
captured this.
return Err(DataFusionError::Internal(format!(