alamb opened a new issue #1605:
URL: https://github.com/apache/arrow-datafusion/issues/1605
**Describe the bug**
Two dictionary array columns can not be compared with each other
(comparisons to a constant do work, however)
**To Reproduce**
```rust
#[tokio::test]
async fn query_on_string_dictionary() -> Result<()> {
// Test to ensure DataFusion can operate on dictionary types
// Use StringDictionary (32 bit indexes = keys)
let d1: DictionaryArray<Int32Type> = vec![Some("one"), None,
Some("three")]
.into_iter()
.collect();
let d2: DictionaryArray<Int32Type> = vec![Some("blarg"), None,
Some("three")]
.into_iter()
.collect();
let d3: StringArray = vec![Some("XYZ"), None, Some("three")]
.into_iter()
.collect();
let batch =
RecordBatch::try_from_iter(vec![
("d1", Arc::new(d1) as ArrayRef),
("d2", Arc::new(d2) as ArrayRef),
("d3", Arc::new(d3) as ArrayRef),
]).unwrap();
let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
let mut ctx = ExecutionContext::new();
ctx.register_table("test", Arc::new(table))?;
// comparison with another dictionary column
let sql = "SELECT d1 FROM test WHERE d1 = d2";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
"+-------+",
"| d1 |",
"+-------+",
"| three |",
"+-------+",
];
assert_batches_eq!(expected, &actual);
// comparison with a non dictionary column
let sql = "SELECT d1 FROM test WHERE d1 = d3";
let actual = execute_to_batches(&mut ctx, sql).await;
let expected = vec![
"+-------+",
"| d1 |",
"+-------+",
"| three |",
"+-------+",
];
assert_batches_eq!(expected, &actual);
```
test fails with
```
thread 'sql::select::query_on_string_dictionary' panicked at 'Executing
physical plan for 'SELECT d1 FROM test WHERE d1 = d2': ProjectionExec { expr:
[(Column { name: "d1", index: 0 }, "d1")], schema: Schema { fields: [Field {
name: "d1", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 0,
dict_is_ordered: false, metadata: None }], metadata: {} }, input:
CoalesceBatchesExec { input: FilterExec { predicate: BinaryExpr { left: Column
{ name: "d1", index: 0 }, op: Eq, right: Column { name: "d2", index: 1 } },
input: RepartitionExec { input: partitions: [...]schema: Schema { fields:
[Field { name: "d1", data_type: Dictionary(Int32, Utf8), nullable: true,
dict_id: 0, dict_is_ordered: false, metadata: None }, Field { name: "d2",
data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 0,
dict_is_ordered: false, metadata: None }], metadata: {} }projection: Some([0,
1]), partitioning: RoundRobinBatch(16), state: Mutex { data:
RepartitionExecState { channels: {}, abort_help
er: AbortOnDropMany([]) } }, metrics: ExecutionPlanMetricsSet { inner: Mutex {
data: MetricsSet { metrics: [] }, poisoned: false, .. } } }, metrics:
ExecutionPlanMetricsSet { inner: Mutex { data: MetricsSet { metrics: [] },
poisoned: false, .. } } }, target_batch_size: 4096, metrics:
ExecutionPlanMetricsSet { inner: Mutex { data: MetricsSet { metrics: [] },
poisoned: false, .. } } }, metrics: ExecutionPlanMetricsSet { inner: Mutex {
data: MetricsSet { metrics: [] }, poisoned: false, .. } } }:
ArrowError(ExternalError(Internal("Data type Dictionary(Int32, Utf8) not
supported for binary operation 'eq' on dyn arrays")))',
datafusion/tests/sql/mod.rs:489:48
```
**Expected behavior**
Expected result: test passes
**Additional context**
Add any other context about the problem here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]