This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch string-view
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/string-view by this push:
     new 5b4c365e11 Add more StringView comparison test coverage (#10997)
5b4c365e11 is described below

commit 5b4c365e11f23fdd6f3aaee9c336bdfd8baa30f8
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Jun 19 10:28:59 2024 -0400

    Add more StringView comparison test coverage (#10997)
    
    * Add more StringView comparison test coverage
    
    * add reference
    
    * Add another test showing casting on columns works correctly
---
 datafusion/sqllogictest/test_files/string_view.slt | 270 ++++++++++++++++-----
 1 file changed, 211 insertions(+), 59 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/string_view.slt 
b/datafusion/sqllogictest/test_files/string_view.slt
index 3be3c94770..f8824b23d1 100644
--- a/datafusion/sqllogictest/test_files/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string_view.slt
@@ -15,99 +15,251 @@
 # specific language governing permissions and limitations
 # under the License.
 
+########
+## Test setup
+########
 
-# test StringViewArray with Utf8View columns
 statement ok
-create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X', 
'Utf8View')),
-                           (arrow_cast('Xiangpeng', 'Utf8View'), 
arrow_cast('Xiangpeng', 'Utf8View')),
-                           (arrow_cast('Raphael', 'Utf8View'), arrow_cast('R', 
'Utf8View')),
-                           (arrow_cast(NULL, 'Utf8View'), arrow_cast('R', 
'Utf8View'));
+create table test_source as values
+    ('Andrew', 'X'),
+    ('Xiangpeng', 'Xiangpeng'),
+    ('Raphael', 'R'),
+    (NULL, 'R')
+;
 
-query B
-select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
-----
-false
+# Table with the different combination of column types
+statement ok
+create table test as
+SELECT
+  arrow_cast(column1, 'Utf8') as column1_utf8,
+  arrow_cast(column2, 'Utf8') as column2_utf8,
+  arrow_cast(column1, 'Utf8View') as column1_utf8view,
+  arrow_cast(column2, 'Utf8View') as column2_utf8view,
+  arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict,
+  arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict
+FROM test_source;
 
-query B
-select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
-----
-true
+statement ok
+drop table test_source
 
-query B
-select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
-----
-true
+########
+## StringView to StringView
+########
 
-query B
-select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
+# StringView scalar to StringView scalar
+
+query BBBB
+select
+  arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
 ----
-true
+false true true true
+
+
+# StringView column to StringView column comparison as filters
 
-query ??
-select * from test where column1 = column2;
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view = 
column2_utf8view;
 ----
 Xiangpeng Xiangpeng
 
-query ??
-select * from test where column1 <> column2;
+query TT
+select column1_utf8, column2_utf8 from test where column1_utf8view <> 
column2_utf8view;
 ----
 Andrew X
 Raphael R
 
-query ??
-select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
+# StringView column to StringView column
+query TTBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view = column2_utf8view,
+  column1_utf8view <> column2_utf8view
+from test;
 ----
-Andrew X
+Andrew X false true
+Xiangpeng Xiangpeng true false
+Raphael R false true
+NULL R NULL NULL
 
-query ??
-select * from test where column1 = 'Andrew';
+# StringView column to StringView scalar comparison
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_utf8view
+from test;
 ----
-Andrew X
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
 
-query ??
-select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
+########
+## StringView to String
+########
+
+# test StringViewArray with Utf8 columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_utf8,
+  column2_utf8      = column1_utf8view,
+  column1_utf8view <> column2_utf8,
+  column2_utf8     <> column1_utf8view
+from test;
 ----
-Xiangpeng Xiangpeng
-Raphael R
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
 
-query ??
-select * from test where column1 <> 'Andrew';
+# StringView column to String scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Utf8'),
+  arrow_cast('Andrew', 'Utf8')     = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Utf8'),
+  arrow_cast('Andrew', 'Utf8')     <> column1_utf8view
+from test;
 ----
-Xiangpeng Xiangpeng
-Raphael R
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
 
-statement ok
-drop table test;
+# String column to StringView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8                     = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_utf8,
+  column1_utf8                    <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_utf8
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+########
+## StringView to Dictionary
+########
+
+# test StringViewArray with Dictionary columns
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view  = column2_dict,
+  column2_dict      = column1_utf8view,
+  column1_utf8view <> column2_dict,
+  column2_dict     <> column1_utf8view
+from test;
+----
+Andrew X false false true true
+Xiangpeng Xiangpeng true true false false
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# StringView column to Dict scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_utf8view                 = arrow_cast('Andrew', 'Dictionary(Int32, 
Utf8)'),
+  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     = column1_utf8view,
+  column1_utf8view                 <> arrow_cast('Andrew', 'Dictionary(Int32, 
Utf8)'),
+  arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')     <> column1_utf8view
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+# Dict column to StringView scalar
+query TTBBBB
+select
+  column1_utf8, column2_utf8,
+  column1_dict                     = arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') = column1_dict,
+  column1_dict                    <> arrow_cast('Andrew', 'Utf8View'),
+  arrow_cast('Andrew', 'Utf8View') <> column1_dict
+from test;
+----
+Andrew X true true false false
+Xiangpeng Xiangpeng false false true true
+Raphael R false false true true
+NULL R NULL NULL NULL NULL
+
+
+########
+## Coercion Rules
+########
 
 
-# test StringViewArray with Utf8 and Utf8View columns
 statement ok
-create table test as values ('Andrew', arrow_cast('X', 'Utf8View')),
-                            ('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')),
-                            ('Raphael', arrow_cast('R', 'Utf8View')),
-                            (NULL, arrow_cast('R', 'Utf8View'));
+set datafusion.explain.logical_plan_only = true;
 
-query T?
-select * from test where column1 = column2;
+
+# Filter should have a StringView literal and no column cast
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
 ----
-Xiangpeng Xiangpeng
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
 
-query T?
-select * from test where column1 <> column2;
+# reverse order should be the same
+query TT
+explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
 ----
-Andrew X
-Raphael R
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
 
-query T?
-select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
+# should not be casting the column: 
https://github.com/apache/datafusion/issues/10998
+query TT
+explain SELECT column1_utf8 from test where column1_utf8 = 
arrow_cast('Andrew', 'Utf8View');
 ----
-Andrew X
+logical_plan
+01)Filter: CAST(test.column1_utf8 AS Utf8View) = Utf8View("Andrew")
+02)--TableScan: test projection=[column1_utf8]
 
-query T?
-select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = 
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
 ----
-Xiangpeng Xiangpeng
-Raphael R
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = Utf8View("Andrew")
+03)----TableScan: test projection=[column1_utf8, column1_utf8view]
+
+# compare string / stringview
+# Should cast string -> stringview (which is cheap), not stringview -> string 
(which is not)
+query TT
+explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
+03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column1_utf8view]
+
+query TT
+explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
+----
+logical_plan
+01)Projection: test.column1_utf8
+02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
+03)----TableScan: test projection=[column1_utf8, column2_utf8, 
column1_utf8view]
+
 
 statement ok
 drop table test;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to