This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new a35a58d  support like on DictionaryArray (#876)
a35a58d is described below

commit a35a58d449f49858e064fd737d86a6527b446a70
Author: baishen <[email protected]>
AuthorDate: Tue Aug 17 16:10:53 2021 -0500

    support like on DictionaryArray (#876)
    
    * support like on DictionaryArray
    
    * fmt
---
 datafusion/src/physical_plan/expressions/binary.rs |  4 +-
 .../src/physical_plan/expressions/coercion.rs      |  7 +++
 datafusion/tests/sql.rs                            | 54 ++++++++++++++++++++++
 3 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/datafusion/src/physical_plan/expressions/binary.rs 
b/datafusion/src/physical_plan/expressions/binary.rs
index 3394113..c1b974d 100644
--- a/datafusion/src/physical_plan/expressions/binary.rs
+++ b/datafusion/src/physical_plan/expressions/binary.rs
@@ -44,7 +44,7 @@ use crate::physical_plan::expressions::try_cast;
 use crate::physical_plan::{ColumnarValue, PhysicalExpr};
 use crate::scalar::ScalarValue;
 
-use super::coercion::{eq_coercion, numerical_coercion, order_coercion, 
string_coercion};
+use super::coercion::{eq_coercion, like_coercion, numerical_coercion, 
order_coercion};
 
 /// Binary expression
 #[derive(Debug)]
@@ -356,7 +356,7 @@ fn common_binary_type(
         // logical equality operators have their own rules, and always return 
a boolean
         Operator::Eq | Operator::NotEq => eq_coercion(lhs_type, rhs_type),
         // "like" operators operate on strings and always return a boolean
-        Operator::Like | Operator::NotLike => string_coercion(lhs_type, 
rhs_type),
+        Operator::Like | Operator::NotLike => like_coercion(lhs_type, 
rhs_type),
         // order-comparison operators have their own rules
         Operator::Lt | Operator::Gt | Operator::GtEq | Operator::LtEq => {
             order_coercion(lhs_type, rhs_type)
diff --git a/datafusion/src/physical_plan/expressions/coercion.rs 
b/datafusion/src/physical_plan/expressions/coercion.rs
index e9949f5..fe073df 100644
--- a/datafusion/src/physical_plan/expressions/coercion.rs
+++ b/datafusion/src/physical_plan/expressions/coercion.rs
@@ -89,6 +89,13 @@ pub fn string_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<DataT
     }
 }
 
+/// coercion rules for like operations.
+/// This is a union of string coercion rules and dictionary coercion rules
+pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
+    string_coercion(lhs_type, rhs_type)
+        .or_else(|| dictionary_coercion(lhs_type, rhs_type))
+}
+
 /// Coercion rules for Temporal columns: the type that both lhs and rhs can be
 /// casted to for the purpose of a date computation
 pub fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 2a062f6..0f38568 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -4283,3 +4283,57 @@ async fn test_partial_qualified_name() -> Result<()> {
     assert_eq!(expected, actual);
     Ok(())
 }
+
+#[tokio::test]
+async fn like_on_strings() -> Result<()> {
+    let input = vec![Some("foo"), Some("bar"), None, Some("fazzz")]
+        .into_iter()
+        .collect::<StringArray>();
+
+    let batch = RecordBatch::try_from_iter(vec![("c1", Arc::new(input) as 
_)]).unwrap();
+
+    let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
+    let mut ctx = ExecutionContext::new();
+    ctx.register_table("test", Arc::new(table))?;
+
+    let sql = "SELECT * FROM test WHERE c1 LIKE '%a%'";
+    let actual = execute_to_batches(&mut ctx, sql).await;
+    let expected = vec![
+        "+-------+",
+        "| c1    |",
+        "+-------+",
+        "| bar   |",
+        "| fazzz |",
+        "+-------+",
+    ];
+
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
+
+#[tokio::test]
+async fn like_on_string_dictionaries() -> Result<()> {
+    let input = vec![Some("foo"), Some("bar"), None, Some("fazzz")]
+        .into_iter()
+        .collect::<DictionaryArray<Int32Type>>();
+
+    let batch = RecordBatch::try_from_iter(vec![("c1", Arc::new(input) as 
_)]).unwrap();
+
+    let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
+    let mut ctx = ExecutionContext::new();
+    ctx.register_table("test", Arc::new(table))?;
+
+    let sql = "SELECT * FROM test WHERE c1 LIKE '%a%'";
+    let actual = execute_to_batches(&mut ctx, sql).await;
+    let expected = vec![
+        "+-------+",
+        "| c1    |",
+        "+-------+",
+        "| bar   |",
+        "| fazzz |",
+        "+-------+",
+    ];
+
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}

Reply via email to