alamb opened a new issue #815:
URL: https://github.com/apache/arrow-datafusion/issues/815


   
   **Describe the bug**
   I expect to be able to run `LIKE` predicates on columns that are encoded as 
dictionaries. 
   
   For example if `host` is a `DictionaryArray` column:
   
   ```sql
   SELECT cpu from cpu where host LIKE 'Andrews-MBP%';
   ```
   
   DataFusion will error thusly:
   ```
   Error during planning:'Dictionary(Int32, Utf8) LIKE Utf8' can't be evaluated 
because there isn't a common type to coerce the types to".
   ```
   
   
   **To Reproduce**
   
   ```diff
   diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
   index 3a83f205a..c84161b98 100644
   --- a/datafusion/tests/sql.rs
   +++ b/datafusion/tests/sql.rs
   @@ -2867,6 +2867,64 @@ async fn like() -> Result<()> {
   Ok(())
   }
   
   +#[tokio::test]
   +async fn like_on_strings() -> Result<()> {
   +    let input = vec![Some("foo"), Some("bar"), None, Some("fazz")]
   +        .into_iter().collect::<StringArray>();
   +
   +    let batch = RecordBatch::try_from_iter(vec![
                                                    +        ("c1", 
Arc::new(input) as _)
                                                    +    ]).unwrap();
   +
   +    let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
   +    let mut ctx = ExecutionContext::new();
   +    ctx.register_table("test", Arc::new(table))?;
   +
   +    let sql = "SELECT * FROM test WHERE c1 LIKE '%a%'";
   +    let actual = execute_to_batches(&mut ctx, sql).await;
   +    let expected = vec![
                            +        "+------+",
                            +        "| c1   |",
                            +        "+------+",
                            +        "| bar  |",
                            +        "| fazz |",
                            +        "+------+",
                            +    ];
   +
   +    assert_batches_eq!(expected, &actual);
   +    Ok(())
   +}
   +
   +#[tokio::test]
   +async fn like_on_string_dictionaries() -> Result<()> {
   +    let input = vec![Some("foo"), Some("bar"), None, Some("fazz")]
   +        .into_iter().collect::<DictionaryArray<Int32Type>>();
   +
   +    let batch = RecordBatch::try_from_iter(vec![
                                                    +        ("c1", 
Arc::new(input) as _)
                                                    +    ]).unwrap();
   +
   +    let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
   +    let mut ctx = ExecutionContext::new();
   +    ctx.register_table("test", Arc::new(table))?;
   +
   +    let sql = "SELECT * FROM test WHERE c1 LIKE '%a%'";
   +    let actual = execute_to_batches(&mut ctx, sql).await;
   +    let expected = vec![
                            +        "+------+",
                            +        "| c1   |",
                            +        "+------+",
                            +        "| bar  |",
                            +        "| fazz |",
                            +        "+------+",
                            +    ];
   +
   +    assert_batches_eq!(expected, &actual);
   +    Ok(())
   +}
   +
   +
   +
   fn make_timestamp_table<A>() -> Result<Arc<MemTable>>
   where
   A: ArrowTimestampType,
   ```
   
   Results in this error:
   ```
   running 2 tests
   test like_on_string_dictionaries ... FAILED
   test like_on_strings ... ok
   
   failures:
   
   ---- like_on_string_dictionaries stdout ----
   thread 'like_on_string_dictionaries' panicked at 'Creating physical plan for 
'SELECT * FROM test WHERE c1 LIKE '%a%'': Projection: #test.c1
   Filter: #test.c1 Like Utf8("%a%")
   TableScan: test projection=Some([0]): Plan("'Dictionary(Int32, Utf8) LIKE 
Utf8' can't be evaluated because there isn't a common type to coerce the types 
to")', datafusion/tests/sql.rs:2639:48
   note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
   ```
   
   **Expected behavior**
   I expect both tests to pass successfully
   
   **Additional context**
   This was found by @atingchen on 
https://github.com/influxdata/influxdb_iox/issues/2137#issuecomment-890539908
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to