alamb commented on code in PR #11796:
URL: https://github.com/apache/datafusion/pull/11796#discussion_r1704612803
##########
datafusion/sqllogictest/test_files/string_view.slt:
##########
@@ -379,3 +379,56 @@ select t.dt from dates t where arrow_cast('2024-01-01',
'Utf8View') < t.dt;
statement ok
drop table dates;
+
+statement ok
+create table temp as values
+('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
+('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool',
'Utf8View'));
+
+query T
+select column2||' is fast' from temp;
+----
+rust is fast
+datafusion is fast
+
+
+query T
+select column2 || ' is ' || column3 from temp;
+----
+rust is fast
+datafusion is cool
+
+query TT
+explain select column2 || 'is' || column3 from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3
AS Utf8)
+02)--TableScan: temp projection=[column2, column3]
+
+
+query TT
+explain select column2||' is fast' from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || Utf8(" is fast")
Review Comment:
likewise here it would be better to use Utf8View
##########
datafusion/expr/src/type_coercion/binary.rs:
##########
@@ -890,15 +890,22 @@ fn dictionary_coercion(
/// 2. Data type of the other side should be able to cast to string type
fn string_concat_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
use arrow::datatypes::DataType::*;
- string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
- (Utf8, from_type) | (from_type, Utf8) => {
- string_concat_internal_coercion(from_type, &Utf8)
- }
- (LargeUtf8, from_type) | (from_type, LargeUtf8) => {
- string_concat_internal_coercion(from_type, &LargeUtf8)
+ match (lhs_type, rhs_type) {
+ // If Utf8View is in any side, we coerce to Utf8.
Review Comment:
I think it would be better to coerce to `Utf8View` as that coercsion will
often be faster (it is faster to cast Utf8 -> Utf8View than the other way
around)
Is that possible?
##########
datafusion/sqllogictest/test_files/string_view.slt:
##########
@@ -379,3 +379,56 @@ select t.dt from dates t where arrow_cast('2024-01-01',
'Utf8View') < t.dt;
statement ok
drop table dates;
+
+statement ok
+create table temp as values
+('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
+('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool',
'Utf8View'));
+
+query T
+select column2||' is fast' from temp;
+----
+rust is fast
+datafusion is fast
+
+
+query T
+select column2 || ' is ' || column3 from temp;
+----
+rust is fast
+datafusion is cool
+
+query TT
+explain select column2 || 'is' || column3 from temp;
+----
+logical_plan
+01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3
AS Utf8)
Review Comment:
This would be a better plan (likely faster) if the casting was to `Utf8View`
rather than `Utf8`
##########
datafusion/sqllogictest/test_files/string_view.slt:
##########
@@ -379,3 +379,56 @@ select t.dt from dates t where arrow_cast('2024-01-01',
'Utf8View') < t.dt;
statement ok
drop table dates;
+
+statement ok
+create table temp as values
+('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
+('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool',
'Utf8View'));
+
+query T
+select column2||' is fast' from temp;
+----
+rust is fast
+datafusion is fast
Review Comment:
❤️
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]