This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 8353a2ca2f Port tests in `group_by.rs` to sqllogictest (#8834)
8353a2ca2f is described below
commit 8353a2ca2fd1f0ed5fc764b7463dfbcaa033ceef
Author: Trevor Hilton <[email protected]>
AuthorDate: Fri Jan 12 17:25:51 2024 -0500
Port tests in `group_by.rs` to sqllogictest (#8834)
* Add group_by_date_trunc and group_by_limit tests to groupby.slt
* Remove two group_by_date_trunc and group_by_limit from .rs tests
* port group_by_dictionary to groupby.slt
* remove group_by.rs
* rename groupby.slt to group_by.slt
* fix typo in comment
---
datafusion/core/tests/sql/group_by.rs | 253 ------------
datafusion/core/tests/sql/mod.rs | 1 -
.../test_files/{groupby.slt => group_by.slt} | 430 +++++++++++++++++++++
3 files changed, 430 insertions(+), 254 deletions(-)
diff --git a/datafusion/core/tests/sql/group_by.rs
b/datafusion/core/tests/sql/group_by.rs
deleted file mode 100644
index 58f0ac21d9..0000000000
--- a/datafusion/core/tests/sql/group_by.rs
+++ /dev/null
@@ -1,253 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::*;
-use arrow::util::pretty::pretty_format_batches;
-use arrow_schema::{DataType, TimeUnit};
-
-#[tokio::test]
-async fn group_by_date_trunc() -> Result<()> {
- let tmp_dir = TempDir::new()?;
- let ctx = SessionContext::new();
- let schema = Arc::new(Schema::new(vec![
- Field::new("c2", DataType::UInt64, false),
- Field::new(
- "t1",
- DataType::Timestamp(TimeUnit::Microsecond, None),
- false,
- ),
- ]));
-
- // generate a partitioned file
- for partition in 0..4 {
- let filename = format!("partition-{}.{}", partition, "csv");
- let file_path = tmp_dir.path().join(filename);
- let mut file = File::create(file_path)?;
-
- // generate some data
- for i in 0..10 {
- let data = format!("{},2020-12-{}T00:00:00.000Z\n", i, i + 10);
- file.write_all(data.as_bytes())?;
- }
- }
-
- ctx.register_csv(
- "test",
- tmp_dir.path().to_str().unwrap(),
- CsvReadOptions::new().schema(&schema).has_header(false),
- )
- .await?;
-
- let results = plan_and_collect(
- &ctx,
- "SELECT date_trunc('week', t1) as week, SUM(c2) FROM test GROUP BY
date_trunc('week', t1)",
- ).await?;
-
- let expected = [
- "+---------------------+--------------+",
- "| week | SUM(test.c2) |",
- "+---------------------+--------------+",
- "| 2020-12-07T00:00:00 | 24 |",
- "| 2020-12-14T00:00:00 | 156 |",
- "+---------------------+--------------+",
- ];
- assert_batches_sorted_eq!(expected, &results);
-
- Ok(())
-}
-
-#[tokio::test]
-async fn group_by_limit() -> Result<()> {
- let tmp_dir = TempDir::new()?;
- let ctx = create_groupby_context(&tmp_dir).await?;
-
- let sql = "SELECT trace_id, MAX(ts) from traces group by trace_id order by
MAX(ts) desc limit 4";
- let dataframe = ctx.sql(sql).await?;
-
- // ensure we see `lim=[4]`
- let physical_plan = dataframe.create_physical_plan().await?;
- let mut expected_physical_plan = r#"
-GlobalLimitExec: skip=0, fetch=4
- SortExec: TopK(fetch=4), expr=[MAX(traces.ts)@1 DESC]
- AggregateExec: mode=Single, gby=[trace_id@0 as trace_id],
aggr=[MAX(traces.ts)], lim=[4]
- "#.trim().to_string();
- let actual_phys_plan =
- format_plan(physical_plan.clone(), &mut expected_physical_plan);
- assert_eq!(actual_phys_plan, expected_physical_plan);
-
- let batches = collect(physical_plan, ctx.task_ctx()).await?;
- let expected = r#"
-+----------+----------------------+
-| trace_id | MAX(traces.ts) |
-+----------+----------------------+
-| 9 | 2020-12-01T00:00:18Z |
-| 8 | 2020-12-01T00:00:17Z |
-| 7 | 2020-12-01T00:00:16Z |
-| 6 | 2020-12-01T00:00:15Z |
-+----------+----------------------+
-"#
- .trim();
- let actual = format!("{}", pretty_format_batches(&batches)?);
- assert_eq!(actual, expected);
-
- Ok(())
-}
-
-fn format_plan(
- physical_plan: Arc<dyn ExecutionPlan>,
- expected_phys_plan: &mut String,
-) -> String {
- let actual_phys_plan =
displayable(physical_plan.as_ref()).indent(true).to_string();
- let last_line = actual_phys_plan
- .as_str()
- .lines()
- .last()
- .expect("Plan should not be empty");
-
- expected_phys_plan.push('\n');
- expected_phys_plan.push_str(last_line);
- expected_phys_plan.push('\n');
- actual_phys_plan
-}
-
-async fn create_groupby_context(tmp_dir: &TempDir) -> Result<SessionContext> {
- let schema = Arc::new(Schema::new(vec![
- Field::new("trace_id", DataType::Utf8, false),
- Field::new(
- "ts",
- DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
- false,
- ),
- ]));
-
- // generate a file
- let filename = "traces.csv";
- let file_path = tmp_dir.path().join(filename);
- let mut file = File::create(file_path)?;
-
- // generate some data
- for trace_id in 0..10 {
- for ts in 0..10 {
- let ts = trace_id + ts;
- let data =
format!("\"{trace_id}\",2020-12-01T00:00:{ts:02}.000Z\n");
- file.write_all(data.as_bytes())?;
- }
- }
-
- let cfg = SessionConfig::new().with_target_partitions(1);
- let ctx = SessionContext::new_with_config(cfg);
- ctx.register_csv(
- "traces",
- tmp_dir.path().to_str().unwrap(),
- CsvReadOptions::new().schema(&schema).has_header(false),
- )
- .await?;
- Ok(ctx)
-}
-
-#[tokio::test]
-async fn group_by_dictionary() {
- async fn run_test_case<K: ArrowDictionaryKeyType>() {
- let ctx = SessionContext::new();
-
- // input data looks like:
- // A, 1
- // B, 2
- // A, 2
- // A, 4
- // C, 1
- // A, 1
-
- let dict_array: DictionaryArray<K> =
- vec!["A", "B", "A", "A", "C", "A"].into_iter().collect();
- let dict_array = Arc::new(dict_array);
-
- let val_array: Int64Array = vec![1, 2, 2, 4, 1, 1].into();
- let val_array = Arc::new(val_array);
-
- let schema = Arc::new(Schema::new(vec![
- Field::new("dict", dict_array.data_type().clone(), false),
- Field::new("val", val_array.data_type().clone(), false),
- ]));
-
- let batch =
- RecordBatch::try_new(schema.clone(), vec![dict_array,
val_array]).unwrap();
-
- ctx.register_batch("t", batch).unwrap();
-
- let results =
- plan_and_collect(&ctx, "SELECT dict, count(val) FROM t GROUP BY
dict")
- .await
- .expect("ran plan correctly");
-
- let expected = [
- "+------+--------------+",
- "| dict | COUNT(t.val) |",
- "+------+--------------+",
- "| A | 4 |",
- "| B | 1 |",
- "| C | 1 |",
- "+------+--------------+",
- ];
- assert_batches_sorted_eq!(expected, &results);
-
- // Now, use dict as an aggregate
- let results =
- plan_and_collect(&ctx, "SELECT val, count(dict) FROM t GROUP BY
val")
- .await
- .expect("ran plan correctly");
-
- let expected = [
- "+-----+---------------+",
- "| val | COUNT(t.dict) |",
- "+-----+---------------+",
- "| 1 | 3 |",
- "| 2 | 2 |",
- "| 4 | 1 |",
- "+-----+---------------+",
- ];
- assert_batches_sorted_eq!(expected, &results);
-
- // Now, use dict as an aggregate
- let results = plan_and_collect(
- &ctx,
- "SELECT val, count(distinct dict) FROM t GROUP BY val",
- )
- .await
- .expect("ran plan correctly");
-
- let expected = [
- "+-----+------------------------+",
- "| val | COUNT(DISTINCT t.dict) |",
- "+-----+------------------------+",
- "| 1 | 2 |",
- "| 2 | 2 |",
- "| 4 | 1 |",
- "+-----+------------------------+",
- ];
- assert_batches_sorted_eq!(expected, &results);
- }
-
- run_test_case::<Int8Type>().await;
- run_test_case::<Int16Type>().await;
- run_test_case::<Int32Type>().await;
- run_test_case::<Int64Type>().await;
- run_test_case::<UInt8Type>().await;
- run_test_case::<UInt16Type>().await;
- run_test_case::<UInt32Type>().await;
- run_test_case::<UInt64Type>().await;
-}
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 849d85dec6..3f52d2aae8 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -76,7 +76,6 @@ pub mod create_drop;
pub mod csv_files;
pub mod explain_analyze;
pub mod expr;
-pub mod group_by;
pub mod joins;
pub mod order;
pub mod partitioned_csv;
diff --git a/datafusion/sqllogictest/test_files/groupby.slt
b/datafusion/sqllogictest/test_files/group_by.slt
similarity index 92%
rename from datafusion/sqllogictest/test_files/groupby.slt
rename to datafusion/sqllogictest/test_files/group_by.slt
index b09ff79e88..7c5803d385 100644
--- a/datafusion/sqllogictest/test_files/groupby.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4284,3 +4284,433 @@ LIMIT 5
1 FRA 3 2022-01-02T12:00:00 EUR 200
1 TUR 2 2022-01-01T11:30:00 TRY 75
1 TUR 4 2022-01-03T10:00:00 TRY 100
+
+# Create a table with timestamp data
+statement ok
+CREATE TABLE src_table (
+ t1 TIMESTAMP,
+ c2 INT,
+) AS VALUES
+('2020-12-10T00:00:00.00Z', 0),
+('2020-12-11T00:00:00.00Z', 1),
+('2020-12-12T00:00:00.00Z', 2),
+('2020-12-13T00:00:00.00Z', 3),
+('2020-12-14T00:00:00.00Z', 4),
+('2020-12-15T00:00:00.00Z', 5),
+('2020-12-16T00:00:00.00Z', 6),
+('2020-12-17T00:00:00.00Z', 7),
+('2020-12-18T00:00:00.00Z', 8),
+('2020-12-19T00:00:00.00Z', 9);
+
+# Use src_table to create a partitioned file
+query PI
+COPY (SELECT * FROM src_table)
+TO 'test_files/scratch/group_by/timestamp_table/0.csv'
+(FORMAT CSV, SINGLE_FILE_OUTPUT true);
+----
+10
+
+query PI
+COPY (SELECT * FROM src_table)
+TO 'test_files/scratch/group_by/timestamp_table/1.csv'
+(FORMAT CSV, SINGLE_FILE_OUTPUT true);
+----
+10
+
+query PI
+COPY (SELECT * FROM src_table)
+TO 'test_files/scratch/group_by/timestamp_table/2.csv'
+(FORMAT CSV, SINGLE_FILE_OUTPUT true);
+----
+10
+
+query PI
+COPY (SELECT * FROM src_table)
+TO 'test_files/scratch/group_by/timestamp_table/3.csv'
+(FORMAT CSV, SINGLE_FILE_OUTPUT true);
+----
+10
+
+# Create a table from the generated CSV files:
+statement ok
+CREATE EXTERNAL TABLE timestamp_table (
+ t1 TIMESTAMP,
+ c2 INT,
+)
+STORED AS CSV
+WITH HEADER ROW
+LOCATION 'test_files/scratch/group_by/timestamp_table';
+
+# Group By using date_trunc
+query PI rowsort
+SELECT date_trunc('week', t1) as week, sum(c2)
+FROM timestamp_table
+GROUP BY date_trunc('week', t1)
+----
+2020-12-07T00:00:00 24
+2020-12-14T00:00:00 156
+
+# GROUP BY using LIMIT
+query IP
+SELECT c2, MAX(t1)
+FROM timestamp_table
+GROUP BY c2
+ORDER BY MAX(t1) DESC
+LIMIT 4;
+----
+9 2020-12-19T00:00:00
+8 2020-12-18T00:00:00
+7 2020-12-17T00:00:00
+6 2020-12-16T00:00:00
+
+# Explain the GROUP BY with LIMIT to ensure the plan contains `lim=[4]`
+query TT
+EXPLAIN
+SELECT c2, MAX(t1)
+FROM timestamp_table
+GROUP BY c2
+ORDER BY MAX(t1) DESC
+LIMIT 4;
+----
+logical_plan
+Limit: skip=0, fetch=4
+--Sort: MAX(timestamp_table.t1) DESC NULLS FIRST, fetch=4
+----Aggregate: groupBy=[[timestamp_table.c2]], aggr=[[MAX(timestamp_table.t1)]]
+------TableScan: timestamp_table projection=[t1, c2]
+physical_plan
+GlobalLimitExec: skip=0, fetch=4
+--SortPreservingMergeExec: [MAX(timestamp_table.t1)@1 DESC], fetch=4
+----SortExec: TopK(fetch=4), expr=[MAX(timestamp_table.t1)@1 DESC]
+------AggregateExec: mode=FinalPartitioned, gby=[c2@0 as c2],
aggr=[MAX(timestamp_table.t1)], lim=[4]
+--------CoalesceBatchesExec: target_batch_size=2
+----------RepartitionExec: partitioning=Hash([c2@0], 8), input_partitions=8
+------------AggregateExec: mode=Partial, gby=[c2@1 as c2],
aggr=[MAX(timestamp_table.t1)], lim=[4]
+--------------RepartitionExec: partitioning=RoundRobinBatch(8),
input_partitions=4
+----------------CsvExec: file_groups={4 groups:
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/0.csv],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/1.csv],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/2.csv],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/group_by/timestamp_table/3.csv]]},
projection=[t1, c2], has_header=true
+
+# Clean up
+statement ok
+DROP TABLE src_table;
+
+statement ok
+DROP TABLE timestamp_table;
+
+### BEGIN Group By with Dictionary Variants ###
+#
+# The following tests use GROUP BY on tables with dictionary columns.
+# The same test is repeated using dictionaries with the key types:
+#
+# - Int8
+# - Int16
+# - Int32
+# - Int64
+# - UInt8
+# - UInt16
+# - UInt32
+# - UInt64
+
+# Table with an int column and Dict<Int8> column:
+statement ok
+CREATE TABLE int8_dict AS VALUES
+(1, arrow_cast('A', 'Dictionary(Int8, Utf8)')),
+(2, arrow_cast('B', 'Dictionary(Int8, Utf8)')),
+(2, arrow_cast('A', 'Dictionary(Int8, Utf8)')),
+(4, arrow_cast('A', 'Dictionary(Int8, Utf8)')),
+(1, arrow_cast('C', 'Dictionary(Int8, Utf8)')),
+(1, arrow_cast('A', 'Dictionary(Int8, Utf8)'));
+
+# Group by the non-dict column
+query ?I rowsort
+SELECT column2, count(column1) FROM int8_dict GROUP BY column2;
+----
+A 4
+B 1
+C 1
+
+# Group by the value column with dict as aggregate
+query II rowsort
+SELECT column1, count(column2) FROM int8_dict GROUP BY column1;
+----
+1 3
+2 2
+4 1
+
+# Group by with dict as aggregate using distinct
+query II rowsort
+SELECT column1, count(distinct column2) FROM int8_dict GROUP BY column1;
+----
+1 2
+2 2
+4 1
+
+# Clean up
+statement ok
+DROP TABLE int8_dict;
+
+# Table with an int column and Dict<Int16> column:
+statement ok
+CREATE TABLE int16_dict AS VALUES
+(1, arrow_cast('A', 'Dictionary(Int16, Utf8)')),
+(2, arrow_cast('B', 'Dictionary(Int16, Utf8)')),
+(2, arrow_cast('A', 'Dictionary(Int16, Utf8)')),
+(4, arrow_cast('A', 'Dictionary(Int16, Utf8)')),
+(1, arrow_cast('C', 'Dictionary(Int16, Utf8)')),
+(1, arrow_cast('A', 'Dictionary(Int16, Utf8)'));
+
+# Group by the non-dict column
+query ?I rowsort
+SELECT column2, count(column1) FROM int16_dict GROUP BY column2;
+----
+A 4
+B 1
+C 1
+
+# Group by the value column with dict as aggregate
+query II rowsort
+SELECT column1, count(column2) FROM int16_dict GROUP BY column1;
+----
+1 3
+2 2
+4 1
+
+# Group by with dict as aggregate using distinct
+query II rowsort
+SELECT column1, count(distinct column2) FROM int16_dict GROUP BY column1;
+----
+1 2
+2 2
+4 1
+
+# Clean up
+statement ok
+DROP TABLE int16_dict;
+
+# Table with an int column and Dict<Int32> column:
+statement ok
+CREATE TABLE int32_dict AS VALUES
+(1, arrow_cast('A', 'Dictionary(Int32, Utf8)')),
+(2, arrow_cast('B', 'Dictionary(Int32, Utf8)')),
+(2, arrow_cast('A', 'Dictionary(Int32, Utf8)')),
+(4, arrow_cast('A', 'Dictionary(Int32, Utf8)')),
+(1, arrow_cast('C', 'Dictionary(Int32, Utf8)')),
+(1, arrow_cast('A', 'Dictionary(Int32, Utf8)'));
+
+# Group by the non-dict column
+query ?I rowsort
+SELECT column2, count(column1) FROM int32_dict GROUP BY column2;
+----
+A 4
+B 1
+C 1
+
+# Group by the value column with dict as aggregate
+query II rowsort
+SELECT column1, count(column2) FROM int32_dict GROUP BY column1;
+----
+1 3
+2 2
+4 1
+
+# Group by with dict as aggregate using distinct
+query II rowsort
+SELECT column1, count(distinct column2) FROM int32_dict GROUP BY column1;
+----
+1 2
+2 2
+4 1
+
+# Clean up
+statement ok
+DROP TABLE int32_dict;
+
+# Table with an int column and Dict<Int64> column:
+statement ok
+CREATE TABLE int64_dict AS VALUES
+(1, arrow_cast('A', 'Dictionary(Int64, Utf8)')),
+(2, arrow_cast('B', 'Dictionary(Int64, Utf8)')),
+(2, arrow_cast('A', 'Dictionary(Int64, Utf8)')),
+(4, arrow_cast('A', 'Dictionary(Int64, Utf8)')),
+(1, arrow_cast('C', 'Dictionary(Int64, Utf8)')),
+(1, arrow_cast('A', 'Dictionary(Int64, Utf8)'));
+
+# Group by the non-dict column
+query ?I rowsort
+SELECT column2, count(column1) FROM int64_dict GROUP BY column2;
+----
+A 4
+B 1
+C 1
+
+# Group by the value column with dict as aggregate
+query II rowsort
+SELECT column1, count(column2) FROM int64_dict GROUP BY column1;
+----
+1 3
+2 2
+4 1
+
+# Group by with dict as aggregate using distinct
+query II rowsort
+SELECT column1, count(distinct column2) FROM int64_dict GROUP BY column1;
+----
+1 2
+2 2
+4 1
+
+# Clean up
+statement ok
+DROP TABLE int64_dict;
+
+# Table with an int column and Dict<UInt8> column:
+statement ok
+CREATE TABLE uint8_dict AS VALUES
+(1, arrow_cast('A', 'Dictionary(UInt8, Utf8)')),
+(2, arrow_cast('B', 'Dictionary(UInt8, Utf8)')),
+(2, arrow_cast('A', 'Dictionary(UInt8, Utf8)')),
+(4, arrow_cast('A', 'Dictionary(UInt8, Utf8)')),
+(1, arrow_cast('C', 'Dictionary(UInt8, Utf8)')),
+(1, arrow_cast('A', 'Dictionary(UInt8, Utf8)'));
+
+# Group by the non-dict column
+query ?I rowsort
+SELECT column2, count(column1) FROM uint8_dict GROUP BY column2;
+----
+A 4
+B 1
+C 1
+
+# Group by the value column with dict as aggregate
+query II rowsort
+SELECT column1, count(column2) FROM uint8_dict GROUP BY column1;
+----
+1 3
+2 2
+4 1
+
+# Group by with dict as aggregate using distinct
+query II rowsort
+SELECT column1, count(distinct column2) FROM uint8_dict GROUP BY column1;
+----
+1 2
+2 2
+4 1
+
+# Clean up
+statement ok
+DROP TABLE uint8_dict;
+
+# Table with an int column and Dict<UInt16> column:
+statement ok
+CREATE TABLE uint16_dict AS VALUES
+(1, arrow_cast('A', 'Dictionary(UInt16, Utf8)')),
+(2, arrow_cast('B', 'Dictionary(UInt16, Utf8)')),
+(2, arrow_cast('A', 'Dictionary(UInt16, Utf8)')),
+(4, arrow_cast('A', 'Dictionary(UInt16, Utf8)')),
+(1, arrow_cast('C', 'Dictionary(UInt16, Utf8)')),
+(1, arrow_cast('A', 'Dictionary(UInt16, Utf8)'));
+
+# Group by the non-dict column
+query ?I rowsort
+SELECT column2, count(column1) FROM uint16_dict GROUP BY column2;
+----
+A 4
+B 1
+C 1
+
+# Group by the value column with dict as aggregate
+query II rowsort
+SELECT column1, count(column2) FROM uint16_dict GROUP BY column1;
+----
+1 3
+2 2
+4 1
+
+# Group by with dict as aggregate using distinct
+query II rowsort
+SELECT column1, count(distinct column2) FROM uint16_dict GROUP BY column1;
+----
+1 2
+2 2
+4 1
+
+# Clean up
+statement ok
+DROP TABLE uint16_dict;
+
+# Table with an int column and Dict<UInt32> column:
+statement ok
+CREATE TABLE uint32_dict AS VALUES
+(1, arrow_cast('A', 'Dictionary(UInt32, Utf8)')),
+(2, arrow_cast('B', 'Dictionary(UInt32, Utf8)')),
+(2, arrow_cast('A', 'Dictionary(UInt32, Utf8)')),
+(4, arrow_cast('A', 'Dictionary(UInt32, Utf8)')),
+(1, arrow_cast('C', 'Dictionary(UInt32, Utf8)')),
+(1, arrow_cast('A', 'Dictionary(UInt32, Utf8)'));
+
+# Group by the non-dict column
+query ?I rowsort
+SELECT column2, count(column1) FROM uint32_dict GROUP BY column2;
+----
+A 4
+B 1
+C 1
+
+# Group by the value column with dict as aggregate
+query II rowsort
+SELECT column1, count(column2) FROM uint32_dict GROUP BY column1;
+----
+1 3
+2 2
+4 1
+
+# Group by with dict as aggregate using distinct
+query II rowsort
+SELECT column1, count(distinct column2) FROM uint32_dict GROUP BY column1;
+----
+1 2
+2 2
+4 1
+
+# Clean up
+statement ok
+DROP TABLE uint32_dict;
+
+# Table with an int column and Dict<UInt64> column:
+statement ok
+CREATE TABLE uint64_dict AS VALUES
+(1, arrow_cast('A', 'Dictionary(UInt64, Utf8)')),
+(2, arrow_cast('B', 'Dictionary(UInt64, Utf8)')),
+(2, arrow_cast('A', 'Dictionary(UInt64, Utf8)')),
+(4, arrow_cast('A', 'Dictionary(UInt64, Utf8)')),
+(1, arrow_cast('C', 'Dictionary(UInt64, Utf8)')),
+(1, arrow_cast('A', 'Dictionary(UInt64, Utf8)'));
+
+# Group by the non-dict column
+query ?I rowsort
+SELECT column2, count(column1) FROM uint64_dict GROUP BY column2;
+----
+A 4
+B 1
+C 1
+
+# Group by the value column with dict as aggregate
+query II rowsort
+SELECT column1, count(column2) FROM uint64_dict GROUP BY column1;
+----
+1 3
+2 2
+4 1
+
+# Group by with dict as aggregate using distinct
+query II rowsort
+SELECT column1, count(distinct column2) FROM uint64_dict GROUP BY column1;
+----
+1 2
+2 2
+4 1
+
+# Clean up
+statement ok
+DROP TABLE uint64_dict;
+
+### END Group By with Dictionary Variants ###