This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new a35a58d support like on DictionaryArray (#876)
a35a58d is described below
commit a35a58d449f49858e064fd737d86a6527b446a70
Author: baishen <[email protected]>
AuthorDate: Tue Aug 17 16:10:53 2021 -0500
support like on DictionaryArray (#876)
* support like on DictionaryArray
* fmt
---
datafusion/src/physical_plan/expressions/binary.rs | 4 +-
.../src/physical_plan/expressions/coercion.rs | 7 +++
datafusion/tests/sql.rs | 54 ++++++++++++++++++++++
3 files changed, 63 insertions(+), 2 deletions(-)
diff --git a/datafusion/src/physical_plan/expressions/binary.rs
b/datafusion/src/physical_plan/expressions/binary.rs
index 3394113..c1b974d 100644
--- a/datafusion/src/physical_plan/expressions/binary.rs
+++ b/datafusion/src/physical_plan/expressions/binary.rs
@@ -44,7 +44,7 @@ use crate::physical_plan::expressions::try_cast;
use crate::physical_plan::{ColumnarValue, PhysicalExpr};
use crate::scalar::ScalarValue;
-use super::coercion::{eq_coercion, numerical_coercion, order_coercion,
string_coercion};
+use super::coercion::{eq_coercion, like_coercion, numerical_coercion,
order_coercion};
/// Binary expression
#[derive(Debug)]
@@ -356,7 +356,7 @@ fn common_binary_type(
// logical equality operators have their own rules, and always return
a boolean
Operator::Eq | Operator::NotEq => eq_coercion(lhs_type, rhs_type),
// "like" operators operate on strings and always return a boolean
- Operator::Like | Operator::NotLike => string_coercion(lhs_type,
rhs_type),
+ Operator::Like | Operator::NotLike => like_coercion(lhs_type,
rhs_type),
// order-comparison operators have their own rules
Operator::Lt | Operator::Gt | Operator::GtEq | Operator::LtEq => {
order_coercion(lhs_type, rhs_type)
diff --git a/datafusion/src/physical_plan/expressions/coercion.rs
b/datafusion/src/physical_plan/expressions/coercion.rs
index e9949f5..fe073df 100644
--- a/datafusion/src/physical_plan/expressions/coercion.rs
+++ b/datafusion/src/physical_plan/expressions/coercion.rs
@@ -89,6 +89,13 @@ pub fn string_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataT
}
}
+/// coercion rules for like operations.
+/// This is a union of string coercion rules and dictionary coercion rules
+pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
+ string_coercion(lhs_type, rhs_type)
+ .or_else(|| dictionary_coercion(lhs_type, rhs_type))
+}
+
/// Coercion rules for Temporal columns: the type that both lhs and rhs can be
/// casted to for the purpose of a date computation
pub fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 2a062f6..0f38568 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -4283,3 +4283,57 @@ async fn test_partial_qualified_name() -> Result<()> {
assert_eq!(expected, actual);
Ok(())
}
+
+#[tokio::test]
+async fn like_on_strings() -> Result<()> {
+ let input = vec![Some("foo"), Some("bar"), None, Some("fazzz")]
+ .into_iter()
+ .collect::<StringArray>();
+
+ let batch = RecordBatch::try_from_iter(vec![("c1", Arc::new(input) as
_)]).unwrap();
+
+ let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
+ let mut ctx = ExecutionContext::new();
+ ctx.register_table("test", Arc::new(table))?;
+
+ let sql = "SELECT * FROM test WHERE c1 LIKE '%a%'";
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let expected = vec![
+ "+-------+",
+ "| c1 |",
+ "+-------+",
+ "| bar |",
+ "| fazzz |",
+ "+-------+",
+ ];
+
+ assert_batches_eq!(expected, &actual);
+ Ok(())
+}
+
+#[tokio::test]
+async fn like_on_string_dictionaries() -> Result<()> {
+ let input = vec![Some("foo"), Some("bar"), None, Some("fazzz")]
+ .into_iter()
+ .collect::<DictionaryArray<Int32Type>>();
+
+ let batch = RecordBatch::try_from_iter(vec![("c1", Arc::new(input) as
_)]).unwrap();
+
+ let table = MemTable::try_new(batch.schema(), vec![vec![batch]])?;
+ let mut ctx = ExecutionContext::new();
+ ctx.register_table("test", Arc::new(table))?;
+
+ let sql = "SELECT * FROM test WHERE c1 LIKE '%a%'";
+ let actual = execute_to_batches(&mut ctx, sql).await;
+ let expected = vec![
+ "+-------+",
+ "| c1 |",
+ "+-------+",
+ "| bar |",
+ "| fazzz |",
+ "+-------+",
+ ];
+
+ assert_batches_eq!(expected, &actual);
+ Ok(())
+}