alamb commented on issue #821: URL: https://github.com/apache/arrow-datafusion/issues/821#issuecomment-892795370
Here is a reproducer: ```diff diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 379cad623..4b8f75191 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -3219,9 +3219,17 @@ async fn query_on_string_dictionary() -> Result<()> { let expected = vec![vec!["NULL", "1"], vec!["one", "1"], vec!["three", "1"]]; assert_eq!(expected, actual); + // window functions + let sql = "SELECT d1, row_number() OVER (partition by d1) FROM test"; + let mut actual = execute(&mut ctx, sql).await; + actual.sort(); + let expected = vec![vec!["NULL", "1"], vec!["one", "1"], vec!["three", "1"]]; + assert_eq!(expected, actual); + Ok(()) } + #[tokio::test] async fn query_without_from() -> Result<()> { // Test for SELECT <expression> without FROM. ``` Fails with: ``` thread 'query_on_string_dictionary' panicked at 'Executing physical plan for 'SELECT d1, row_number() OVER (partition by d1) FROM test': ProjectionExec { expr: [(Column { name: "d1", index: 1 }, "d1"), (Column { name: "ROW_NUMBER() PARTITION BY [#test.d1]", index: 0 }, "ROW_NUMBER()")], schema: Schema { fields: [Field { name: "d1", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }, Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }], metadata: {} }, input: WindowAggExec { input: RepartitionExec { input: SortExec { input: CoalescePartitionsExec { input: CoalesceBatchesExec { input: RepartitionExec { input: RepartitionExec { input: partitions: [...]schema: Schema { fields: [Field { name: "d1", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }], metadata: {} }projection: Some([0]), partitioning: RoundRobinBat ch(16), channels: Mutex { data: {} }, metrics: RepartitionMetrics { fetch_nanos: SQLMetric { value: 0, metric_type: TimeNanos }, repart_nanos: SQLMetric { value: 0, metric_type: TimeNanos }, send_nanos: SQLMetric { value: 0, metric_type: TimeNanos } } }, partitioning: Hash([Column { name: "d1", index: 0 }], 16), channels: Mutex { data: {} }, metrics: RepartitionMetrics { fetch_nanos: SQLMetric { value: 0, metric_type: TimeNanos }, repart_nanos: SQLMetric { value: 0, metric_type: TimeNanos }, send_nanos: SQLMetric { value: 0, metric_type: TimeNanos } } }, target_batch_size: 4096 } }, expr: [PhysicalSortExpr { expr: Column { name: "d1", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }], output_rows: SQLMetric { value: 0, metric_type: Counter }, sort_time_nanos: SQLMetric { value: 0, metric_type: TimeNanos }, preserve_partitioning: false }, partitioning: RoundRobinBatch(16), channels: Mutex { data: {} }, metrics: RepartitionMetrics { fetch_nanos: SQLMetric { value: 0, metric_type: TimeNanos }, repart_nanos: SQLMetric { value: 0, metric_type: TimeNanos }, send_nanos: SQLMetric { value: 0, metric_type: TimeNanos } } }, window_expr: [BuiltInWindowExpr { fun: RowNumber, expr: RowNumber { name: "ROW_NUMBER()" }, partition_by: [Column { name: "d1", index: 0 }], order_by: [], window_frame: None }], schema: Schema { fields: [Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, Field { name: "d1", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }], metadata: {} }, input_schema: Schema { fields: [Field { name: "d1", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }], metadata: {} } } }: ArrowError(ExternalError(ArrowError(ExternalError(ArrowError(ExternalError(Execution("Internal error: Unsupported data type in hasher. This was likely caused by a bug in DataFusion's code a nd we would welcome that you file an bug report in our issue tracker")))))))', datafusion/tests/sql.rs:2642:39 ``` I confirmed that the test passes with the changes in https://github.com/apache/arrow-datafusion/pull/812. I will merge #812 as is and then add a specific test in a follow on -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org