alamb commented on issue #821:
URL: 
https://github.com/apache/arrow-datafusion/issues/821#issuecomment-892795370


   Here is a reproducer:
   ```diff
   diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
   index 379cad623..4b8f75191 100644
   --- a/datafusion/tests/sql.rs
   +++ b/datafusion/tests/sql.rs
   @@ -3219,9 +3219,17 @@ async fn query_on_string_dictionary() -> Result<()> {
        let expected = vec![vec!["NULL", "1"], vec!["one", "1"], vec!["three", 
"1"]];
        assert_eq!(expected, actual);
    
   +    // window functions
   +    let sql = "SELECT d1, row_number() OVER (partition by d1) FROM test";
   +    let mut actual = execute(&mut ctx, sql).await;
   +    actual.sort();
   +    let expected = vec![vec!["NULL", "1"], vec!["one", "1"], vec!["three", 
"1"]];
   +    assert_eq!(expected, actual);
   +
        Ok(())
    }
    
   +
    #[tokio::test]
    async fn query_without_from() -> Result<()> {
        // Test for SELECT <expression> without FROM.
   ```
   
   Fails with:
   ```
   thread 'query_on_string_dictionary' panicked at 'Executing physical plan for 
'SELECT d1, row_number() OVER (partition by d1) FROM test': ProjectionExec { 
expr: [(Column { name: "d1", index: 1 }, "d1"), (Column { name: "ROW_NUMBER() 
PARTITION BY [#test.d1]", index: 0 }, "ROW_NUMBER()")], schema: Schema { 
fields: [Field { name: "d1", data_type: Dictionary(Int32, Utf8), nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: None }, Field { name: 
"ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, 
dict_is_ordered: false, metadata: None }], metadata: {} }, input: WindowAggExec 
{ input: RepartitionExec { input: SortExec { input: CoalescePartitionsExec { 
input: CoalesceBatchesExec { input: RepartitionExec { input: RepartitionExec { 
input: partitions: [...]schema: Schema { fields: [Field { name: "d1", 
data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 0, 
dict_is_ordered: false, metadata: None }], metadata: {} }projection: Some([0]), 
partitioning: RoundRobinBat
 ch(16), channels: Mutex { data: {} }, metrics: RepartitionMetrics { 
fetch_nanos: SQLMetric { value: 0, metric_type: TimeNanos }, repart_nanos: 
SQLMetric { value: 0, metric_type: TimeNanos }, send_nanos: SQLMetric { value: 
0, metric_type: TimeNanos } } }, partitioning: Hash([Column { name: "d1", 
index: 0 }], 16), channels: Mutex { data: {} }, metrics: RepartitionMetrics { 
fetch_nanos: SQLMetric { value: 0, metric_type: TimeNanos }, repart_nanos: 
SQLMetric { value: 0, metric_type: TimeNanos }, send_nanos: SQLMetric { value: 
0, metric_type: TimeNanos } } }, target_batch_size: 4096 } }, expr: 
[PhysicalSortExpr { expr: Column { name: "d1", index: 0 }, options: SortOptions 
{ descending: false, nulls_first: true } }], output_rows: SQLMetric { value: 0, 
metric_type: Counter }, sort_time_nanos: SQLMetric { value: 0, metric_type: 
TimeNanos }, preserve_partitioning: false }, partitioning: RoundRobinBatch(16), 
channels: Mutex { data: {} }, metrics: RepartitionMetrics { fetch_nanos: 
SQLMetric { 
 value: 0, metric_type: TimeNanos }, repart_nanos: SQLMetric { value: 0, 
metric_type: TimeNanos }, send_nanos: SQLMetric { value: 0, metric_type: 
TimeNanos } } }, window_expr: [BuiltInWindowExpr { fun: RowNumber, expr: 
RowNumber { name: "ROW_NUMBER()" }, partition_by: [Column { name: "d1", index: 
0 }], order_by: [], window_frame: None }], schema: Schema { fields: [Field { 
name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, 
dict_is_ordered: false, metadata: None }, Field { name: "d1", data_type: 
Dictionary(Int32, Utf8), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: None }], metadata: {} }, input_schema: Schema { fields: [Field { 
name: "d1", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 0, 
dict_is_ordered: false, metadata: None }], metadata: {} } } }: 
ArrowError(ExternalError(ArrowError(ExternalError(ArrowError(ExternalError(Execution("Internal
 error: Unsupported data type in hasher. This was likely caused by a bug in 
DataFusion's code a
 nd we would welcome that you file an bug report in our issue tracker")))))))', 
datafusion/tests/sql.rs:2642:39
   ```
   
   I confirmed that the test passes with the changes in 
https://github.com/apache/arrow-datafusion/pull/812. I will merge #812 as is 
and then add a specific test in a follow on


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to