alamb opened a new issue #249:
URL: https://github.com/apache/arrow-datafusion/issues/249


   As in https://github.com/apache/arrow-datafusion/issues/199
   
   **Describe the bug**
   Distinct aggregates such as `select count(distinct col) ...` error when 
`col` is a dictionary
   
   **To Reproduce**
   ```
               let mut ctx = ExecutionContext::new();
   
               // input data looks like:
               // A, 1
               // B, 2
               // A, 2
               // A, 4
               // C, 1
               // A, 1
   
               let dict_array: DictionaryArray<K> =
                   vec!["A", "B", "A", "A", "C", "A"].into_iter().collect();
               let dict_array = Arc::new(dict_array);
   
               let val_array: Int64Array = vec![1, 2, 2, 4, 1, 1].into();
               let val_array = Arc::new(val_array);
   
               let schema = Arc::new(Schema::new(vec![
                   Field::new("dict", dict_array.data_type().clone(), false),
                   Field::new("val", val_array.data_type().clone(), false),
               ]));
   
               let batch = RecordBatch::try_new(schema.clone(), 
vec![dict_array, val_array])
                   .unwrap();
   
               let provider = MemTable::try_new(schema.clone(), 
vec![vec![batch]]).unwrap();
               ctx.register_table("t", Arc::new(provider)).unwrap();
   
   
               // Now, use dict as an aggregate
               let results =
                   plan_and_collect(&mut ctx, "SELECT val, count(distinct dict) 
FROM t GROUP BY val")
                       .await
                       .expect("ran plan correctly");
   
               let expected = vec![
                   "+-----+-------------+",
                   "| val | COUNT(dict) |",
                   "+-----+-------------+",
                   "| 1   | 2           |",
                   "| 2   | 2           |",
                   "| 4   | 1           |",
                   "+-----+-------------+",
               ];
               assert_batches_sorted_eq!(expected, &results);
   ```
   
   Fail with an error such as:
   ```
   thread 'execution::context::tests::group_by_dictionary' panicked at 'ran 
plan correctly: ArrowError(ExternalError(NotImplemented("Can\'t create a scalar 
of array of type \"Dictionary(Int8, Utf8)\"")))', 
datafusion/src/execution/context.rs:1774:22
   note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
   ```
   
   
   
   
   **Expected behavior**
   test passes
   
   **Additional context**
   Add any other context about the problem here.
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to