(arrow-datafusion) branch main updated: Add tests for filtering, grouping, aggregation of ARRAYs (#9695)

jayzhan Fri, 29 Mar 2024 05:04:16 -0700

This is an automated email from the ASF dual-hosted git repository.

jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 230a6b4768 Add tests for filtering, grouping, aggregation of ARRAYs 
(#9695)
230a6b4768 is described below

commit 230a6b476804c0a8964d559cc16e41328a43efc5
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Mar 29 08:03:21 2024 -0400

    Add tests for filtering, grouping, aggregation of ARRAYs (#9695)
    
    * Add tests for filtering, grouping, aggregation of ARRAYs
    
    * Update output to correct results
---
 datafusion/sqllogictest/test_files/array_query.slt | 160 +++++++++++++++++++++
 1 file changed, 160 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/array_query.slt 
b/datafusion/sqllogictest/test_files/array_query.slt
new file mode 100644
index 0000000000..24c99fc849
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/array_query.slt
@@ -0,0 +1,160 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+## Tests for basic array queries
+
+# Make a table with multiple input partitions
+statement ok
+CREATE TABLE data AS
+  SELECT * FROM (VALUES
+      ([1,2,3], [4,5], 1)
+   )
+    UNION ALL
+  SELECT * FROM (VALUES
+     ([2,3],   [2,3], 1),
+     ([1,2,3], NULL,  1)
+  )
+;
+
+query ??I rowsort
+SELECT * FROM data;
+----
+[1, 2, 3] NULL 1
+[1, 2, 3] [4, 5] 1
+[2, 3] [2, 3] 1
+
+###########
+# Filtering
+###########
+
+query error DataFusion error: Arrow error: Invalid argument error: Invalid 
comparison operation: List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) == List\(Field \{ 
name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: 
false, metadata: \{\} \}\)
+SELECT * FROM data WHERE column1 = [1,2,3];
+
+query error DataFusion error: Arrow error: Invalid argument error: Invalid 
comparison operation: List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) == List\(Field \{ 
name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: 
false, metadata: \{\} \}\)
+SELECT * FROM data WHERE column1 = column2
+
+query error DataFusion error: Arrow error: Invalid argument error: Invalid 
comparison operation: List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) != List\(Field \{ 
name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: 
false, metadata: \{\} \}\)
+SELECT * FROM data WHERE column1 != [1,2,3];
+
+query error DataFusion error: Arrow error: Invalid argument error: Invalid 
comparison operation: List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) != List\(Field \{ 
name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: 
false, metadata: \{\} \}\)
+SELECT * FROM data WHERE column1 != column2
+
+###########
+# Aggregates
+###########
+
+query error Internal error: Min/Max accumulator not implemented for type List
+SELECT min(column1) FROM data;
+
+query error Internal error: Min/Max accumulator not implemented for type List
+SELECT max(column1) FROM data;
+
+query I
+SELECT count(column1) FROM data;
+----
+3
+
+# note single count distincts are rewritten to use a group by
+query I
+SELECT count(distinct column1) FROM data;
+----
+2
+
+query I
+SELECT count(distinct column2) FROM data;
+----
+2
+
+
+# note multiple count distincts are not rewritten
+query II
+SELECT count(distinct column1), count(distinct column2) FROM data;
+----
+2 2
+
+
+###########
+# GROUP BY
+###########
+
+
+query I
+SELECT count(column1) FROM data GROUP BY column3;
+----
+3
+
+# note single count distincts are rewritten to use a group by
+query I
+SELECT count(distinct column1) FROM data GROUP BY column3;
+----
+2
+
+query I
+SELECT count(distinct column2) FROM data GROUP BY column3;
+----
+2
+
+# note multiple count distincts are not rewritten
+query II
+SELECT count(distinct column1), count(distinct column2) FROM data GROUP BY 
column3;
+----
+2 2
+
+
+###########
+# ORDER BY
+###########
+
+query ??I
+SELECT * FROM data ORDER BY column2;
+----
+[2, 3] [2, 3] 1
+[1, 2, 3] [4, 5] 1
+[1, 2, 3] NULL 1
+
+query ??I
+SELECT * FROM data ORDER BY column2 DESC;
+----
+[1, 2, 3] NULL 1
+[1, 2, 3] [4, 5] 1
+[2, 3] [2, 3] 1
+
+query ??I
+SELECT * FROM data ORDER BY column2 DESC NULLS LAST;
+----
+[1, 2, 3] [4, 5] 1
+[2, 3] [2, 3] 1
+[1, 2, 3] NULL 1
+
+# multi column
+query ??I
+SELECT * FROM data ORDER BY column1, column2;
+----
+[1, 2, 3] [4, 5] 1
+[1, 2, 3] NULL 1
+[2, 3] [2, 3] 1
+
+query ??I
+SELECT * FROM data ORDER BY column1, column3, column2;
+----
+[1, 2, 3] [4, 5] 1
+[1, 2, 3] NULL 1
+[2, 3] [2, 3] 1
+
+
+statement ok
+drop table data

(arrow-datafusion) branch main updated: Add tests for filtering, grouping, aggregation of ARRAYs (#9695)

Reply via email to