This is an automated email from the ASF dual-hosted git repository.
goldmedal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 1557fce891 Support `Utf8View` for string function `bit_length()`
(#13221)
1557fce891 is described below
commit 1557fce891c5610c21e310a458e8a2a118044c1a
Author: Austin Liu <[email protected]>
AuthorDate: Sun Nov 10 16:38:41 2024 +0800
Support `Utf8View` for string function `bit_length()` (#13221)
* Support `Utf8View` for string function `bit_length()`
Signed-off-by: Austin Liu <[email protected]>
* Add scalar test case
Signed-off-by: Austin Liu <[email protected]>
* Refine tests
Signed-off-by: Austin Liu <[email protected]>
* Fix wrong format
Signed-off-by: Austin Liu <[email protected]>
---------
Signed-off-by: Austin Liu <[email protected]>
---
datafusion/functions/src/string/bit_length.rs | 3 ++
.../test_files/string/string_literal.slt | 41 ++++++++++++++++++++++
.../sqllogictest/test_files/string/string_view.slt | 1 +
3 files changed, 45 insertions(+)
diff --git a/datafusion/functions/src/string/bit_length.rs
b/datafusion/functions/src/string/bit_length.rs
index d02c2b6a65..cb815df15e 100644
--- a/datafusion/functions/src/string/bit_length.rs
+++ b/datafusion/functions/src/string/bit_length.rs
@@ -79,6 +79,9 @@ impl ScalarUDFImpl for BitLengthFunc {
ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
ScalarValue::Int64(v.as_ref().map(|x| (x.len() * 8) as
i64)),
)),
+ ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
+ ScalarValue::Int32(v.as_ref().map(|x| (x.len() * 8) as
i32)),
+ )),
_ => unreachable!("bit length"),
},
}
diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt
b/datafusion/sqllogictest/test_files/string/string_literal.slt
index 57261470f6..493da64063 100644
--- a/datafusion/sqllogictest/test_files/string/string_literal.slt
+++ b/datafusion/sqllogictest/test_files/string/string_literal.slt
@@ -1623,3 +1623,44 @@ a\_c \%abc false
\%abc a\_c false
\%abc %abc true
\%abc \%abc false
+
+# test utf8, largeutf8, utf8view, DictionaryString for bit_length
+query IIII
+SELECT
+ bit_length('Andrew'),
+ bit_length('datafusion数据融合'),
+ bit_length('💖'),
+ bit_length('josé')
+;
+----
+48 176 32 40
+
+query IIII
+SELECT
+ bit_length(arrow_cast('Andrew', 'LargeUtf8')),
+ bit_length(arrow_cast('datafusion数据融合', 'LargeUtf8')),
+ bit_length(arrow_cast('💖', 'LargeUtf8')),
+ bit_length(arrow_cast('josé', 'LargeUtf8'))
+;
+----
+48 176 32 40
+
+query IIII
+SELECT
+ bit_length(arrow_cast('Andrew', 'Utf8View')),
+ bit_length(arrow_cast('datafusion数据融合', 'Utf8View')),
+ bit_length(arrow_cast('💖', 'Utf8View')),
+ bit_length(arrow_cast('josé', 'Utf8View'))
+;
+----
+48 176 32 40
+
+query IIII
+SELECT
+ bit_length(arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')),
+ bit_length(arrow_cast('datafusion数据融合', 'Dictionary(Int32, Utf8)')),
+ bit_length(arrow_cast('💖', 'Dictionary(Int32, Utf8)')),
+ bit_length(arrow_cast('josé', 'Dictionary(Int32, Utf8)'))
+;
+----
+48 176 32 40
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt
b/datafusion/sqllogictest/test_files/string/string_view.slt
index dec5488d73..ce8a295373 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -93,6 +93,7 @@ select octet_length(column1_utf8view) from test;
0
NULL
+# TODO: Revisit this issue after upgrading to the arrow-rs version that
includes apache/arrow-rs#6671.
query error DataFusion error: Arrow error: Compute error: bit_length not
supported for Utf8View
select bit_length(column1_utf8view) from test;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]