This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new f4fc2639f1 [task #8201] Port tests in expr.rs to sqllogictest, finish
the left c… (#9014)
f4fc2639f1 is described below
commit f4fc2639f1d9d1f4dbc73d39990a83f6bf7a725f
Author: Reilly.tang <[email protected]>
AuthorDate: Sun Jan 28 22:45:45 2024 +0800
[task #8201] Port tests in expr.rs to sqllogictest, finish the left c…
(#9014)
* [task #8201] Port tests in expr.rs to sqllogictest, finish the left cases
Signed-off-by: tangruilin <[email protected]>
* Apply suggestions from code review
* Add a few more comments
---------
Signed-off-by: tangruilin <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/core/tests/sql/expr.rs | 413 ---------------
datafusion/core/tests/sql/mod.rs | 11 -
datafusion/sqllogictest/test_files/expr.slt | 751 ++++++++++++++++++++++++++++
3 files changed, 751 insertions(+), 424 deletions(-)
diff --git a/datafusion/core/tests/sql/expr.rs
b/datafusion/core/tests/sql/expr.rs
deleted file mode 100644
index e8a3d27c08..0000000000
--- a/datafusion/core/tests/sql/expr.rs
+++ /dev/null
@@ -1,413 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use datafusion::datasource::empty::EmptyTable;
-
-use super::*;
-
-#[tokio::test]
-#[cfg_attr(not(feature = "crypto_expressions"), ignore)]
-async fn test_encoding_expressions() -> Result<()> {
- // Input Utf8
- test_expression!("encode('tom','base64')", "dG9t");
- test_expression!("arrow_cast(decode('dG9t','base64'), 'Utf8')", "tom");
- test_expression!("encode('tom','hex')", "746f6d");
- test_expression!("arrow_cast(decode('746f6d','hex'), 'Utf8')", "tom");
-
- // Input LargeUtf8
- test_expression!("encode(arrow_cast('tom', 'LargeUtf8'),'base64')",
"dG9t");
- test_expression!(
- "arrow_cast(decode(arrow_cast('dG9t', 'LargeUtf8'),'base64'), 'Utf8')",
- "tom"
- );
- test_expression!("encode(arrow_cast('tom', 'LargeUtf8'),'hex')", "746f6d");
- test_expression!(
- "arrow_cast(decode(arrow_cast('746f6d', 'LargeUtf8'),'hex'), 'Utf8')",
- "tom"
- );
-
- // Input Binary
- test_expression!("encode(arrow_cast('tom', 'Binary'),'base64')", "dG9t");
- test_expression!(
- "arrow_cast(decode(arrow_cast('dG9t', 'Binary'),'base64'), 'Utf8')",
- "tom"
- );
- test_expression!("encode(arrow_cast('tom', 'Binary'),'hex')", "746f6d");
- test_expression!(
- "arrow_cast(decode(arrow_cast('746f6d', 'Binary'),'hex'), 'Utf8')",
- "tom"
- );
-
- // Input LargeBinary
- test_expression!("encode(arrow_cast('tom', 'LargeBinary'),'base64')",
"dG9t");
- test_expression!(
- "arrow_cast(decode(arrow_cast('dG9t', 'LargeBinary'),'base64'),
'Utf8')",
- "tom"
- );
- test_expression!("encode(arrow_cast('tom', 'LargeBinary'),'hex')",
"746f6d");
- test_expression!(
- "arrow_cast(decode(arrow_cast('746f6d', 'LargeBinary'),'hex'),
'Utf8')",
- "tom"
- );
-
- // NULL
- test_expression!("encode(NULL,'base64')", "NULL");
- test_expression!("decode(NULL,'base64')", "NULL");
- test_expression!("encode(NULL,'hex')", "NULL");
- test_expression!("decode(NULL,'hex')", "NULL");
-
- // Empty string
- test_expression!("encode('','base64')", "");
- test_expression!("decode('','base64')", "");
- test_expression!("encode('','hex')", "");
- test_expression!("decode('','hex')", "");
-
- Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "crypto_expressions"), ignore)]
-async fn test_crypto_expressions() -> Result<()> {
- test_expression!("md5('tom')", "34b7da764b21d298ef307d04d8152dc5");
- test_expression!("digest('tom','md5')",
"34b7da764b21d298ef307d04d8152dc5");
- test_expression!("md5('')", "d41d8cd98f00b204e9800998ecf8427e");
- test_expression!("digest('','md5')", "d41d8cd98f00b204e9800998ecf8427e");
- test_expression!("md5(NULL)", "NULL");
- test_expression!("digest(NULL,'md5')", "NULL");
- test_expression!(
- "sha224('tom')",
- "0bf6cb62649c42a9ae3876ab6f6d92ad36cb5414e495f8873292be4d"
- );
- test_expression!(
- "digest('tom','sha224')",
- "0bf6cb62649c42a9ae3876ab6f6d92ad36cb5414e495f8873292be4d"
- );
- test_expression!(
- "sha224('')",
- "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f"
- );
- test_expression!(
- "digest('','sha224')",
- "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f"
- );
- test_expression!("sha224(NULL)", "NULL");
- test_expression!("digest(NULL,'sha224')", "NULL");
- test_expression!(
- "sha256('tom')",
- "e1608f75c5d7813f3d4031cb30bfb786507d98137538ff8e128a6ff74e84e643"
- );
- test_expression!(
- "digest('tom','sha256')",
- "e1608f75c5d7813f3d4031cb30bfb786507d98137538ff8e128a6ff74e84e643"
- );
- test_expression!(
- "sha256('')",
- "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
- );
- test_expression!(
- "digest('','sha256')",
- "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
- );
- test_expression!("sha256(NULL)", "NULL");
- test_expression!("digest(NULL,'sha256')", "NULL");
- test_expression!("sha384('tom')",
"096f5b68aa77848e4fdf5c1c0b350de2dbfad60ffd7c25d9ea07c6c19b8a4d55a9187eb117c557883f58c16dfac3e343");
- test_expression!("digest('tom','sha384')",
"096f5b68aa77848e4fdf5c1c0b350de2dbfad60ffd7c25d9ea07c6c19b8a4d55a9187eb117c557883f58c16dfac3e343");
- test_expression!("sha384('')",
"38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b");
- test_expression!("digest('','sha384')",
"38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b");
- test_expression!("sha384(NULL)", "NULL");
- test_expression!("digest(NULL,'sha384')", "NULL");
- test_expression!("sha512('tom')",
"6e1b9b3fe840680e37051f7ad5e959d6f39ad0f8885d855166f55c659469d3c8b78118c44a2a49c72ddb481cd6d8731034e11cc030070ba843a90b3495cb8d3e");
- test_expression!("digest('tom','sha512')",
"6e1b9b3fe840680e37051f7ad5e959d6f39ad0f8885d855166f55c659469d3c8b78118c44a2a49c72ddb481cd6d8731034e11cc030070ba843a90b3495cb8d3e");
- test_expression!("sha512('')",
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e");
- test_expression!("digest('','sha512')",
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e");
- test_expression!("sha512(NULL)", "NULL");
- test_expression!("digest(NULL,'sha512')", "NULL");
- test_expression!("digest(NULL,'blake2s')", "NULL");
- test_expression!("digest(NULL,'blake2b')", "NULL");
- test_expression!("digest('','blake2b')",
"786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce");
- test_expression!("digest('tom','blake2b')",
"482499a18da10a18d8d35ab5eb4c635551ec5b8d3ff37c3e87a632caf6680fe31566417834b4732e26e0203d1cad4f5366cb7ab57d89694e4c1fda3e26af2c23");
- test_expression!(
- "digest('','blake2s')",
- "69217a3079908094e11121d042354a7c1f55b6482ca1a51e1b250dfd1ed0eef9"
- );
- test_expression!(
- "digest('tom','blake2s')",
- "5fc3f2b3a07cade5023c3df566e4d697d3823ba1b72bfb3e84cf7e768b2e7529"
- );
- test_expression!(
- "digest('','blake3')",
- "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
- );
- Ok(())
-}
-
-#[cfg(feature = "unicode_expressions")]
-#[tokio::test]
-async fn test_substring_expr() -> Result<()> {
- test_expression!("substring('alphabet' from 2 for 1)", "l");
- test_expression!("substring('alphabet' from 8)", "t");
- test_expression!("substring('alphabet' for 1)", "a");
-
- Ok(())
-}
-
-#[tokio::test]
-#[cfg_attr(not(feature = "regex_expressions"), ignore)]
-async fn test_regex_expressions() -> Result<()> {
- test_expression!("regexp_replace('ABCabcABC', '(abc)', 'X', 'gi')", "XXX");
- test_expression!("regexp_replace('ABCabcABC', '(abc)', 'X', 'i')",
"XabcABC");
- test_expression!("regexp_replace('foobarbaz', 'b..', 'X', 'g')", "fooXX");
- test_expression!("regexp_replace('foobarbaz', 'b..', 'X')", "fooXbaz");
- test_expression!(
- "regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g')",
- "fooXarYXazY"
- );
- test_expression!(
- "regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', NULL)",
- "NULL"
- );
- test_expression!("regexp_replace('foobarbaz', 'b(..)', NULL, 'g')",
"NULL");
- test_expression!("regexp_replace('foobarbaz', NULL, 'X\\1Y', 'g')",
"NULL");
- test_expression!("regexp_replace('Thomas', '.[mN]a.', 'M')", "ThM");
- test_expression!("regexp_replace(NULL, 'b(..)', 'X\\1Y', 'g')", "NULL");
- test_expression!("regexp_match('foobarbequebaz', '')", "[]");
- test_expression!(
- "regexp_match('foobarbequebaz', '(bar)(beque)')",
- "[bar, beque]"
- );
- test_expression!("regexp_match('foobarbequebaz', '(ba3r)(bequ34e)')",
"NULL");
- test_expression!("regexp_match('aaa-0', '.*-(\\d)')", "[0]");
- test_expression!("regexp_match('bb-1', '.*-(\\d)')", "[1]");
- test_expression!("regexp_match('aa', '.*-(\\d)')", "NULL");
- test_expression!("regexp_match(NULL, '.*-(\\d)')", "NULL");
- test_expression!("regexp_match('aaa-0', NULL)", "NULL");
- Ok(())
-}
-
-#[tokio::test]
-async fn csv_query_nullif_divide_by_0() -> Result<()> {
- let ctx = SessionContext::new();
- register_aggregate_csv(&ctx).await?;
- let sql = "SELECT c8/nullif(c7, 0) FROM aggregate_test_100";
- let actual = execute(&ctx, sql).await;
- let actual = &actual[80..90]; // We just want to compare rows 80-89
- let expected = vec![
- vec!["258"],
- vec!["664"],
- vec!["NULL"],
- vec!["22"],
- vec!["164"],
- vec!["448"],
- vec!["365"],
- vec!["1640"],
- vec!["671"],
- vec!["203"],
- ];
- assert_eq!(expected, actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn nested_subquery() -> Result<()> {
- let ctx = SessionContext::new();
- let schema = Schema::new(vec![
- Field::new("id", DataType::Int16, false),
- Field::new("a", DataType::Int16, false),
- ]);
- let empty_table = Arc::new(EmptyTable::new(Arc::new(schema)));
- ctx.register_table("t1", empty_table.clone())?;
- ctx.register_table("t2", empty_table)?;
- let sql = "SELECT COUNT(*) as cnt \
- FROM (\
- (SELECT id FROM t1) EXCEPT \
- (SELECT id FROM t2)\
- ) foo";
- let actual = execute_to_batches(&ctx, sql).await;
- // the purpose of this test is just to make sure the query produces a
valid plan
- #[rustfmt::skip]
- let expected = ["+-----+",
- "| cnt |",
- "+-----+",
- "| 0 |",
- "+-----+"];
- assert_batches_eq!(expected, &actual);
- Ok(())
-}
-
-#[tokio::test]
-async fn comparisons_with_null_lt() {
- let ctx = SessionContext::new();
-
- // we expect all the following queries to yield a two null values
- let cases = vec![
- // 1. Numeric comparison with NULL
- "select column1 < NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select column1 <= NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select column1 > NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select column1 >= NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select column1 = NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select column1 != NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- // 1.1 Float value comparison with NULL
- "select column3 < NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- // String comparison with NULL
- "select column2 < NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- // Boolean comparison with NULL
- "select column1 < NULL from (VALUES (true), (false)) as t",
- // ----
- // ---- same queries, reversed argument order (as they go through
- // ---- a different evaluation path)
- // ----
-
- // 1. Numeric comparison with NULL
- "select NULL < column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select NULL <= column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select NULL > column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select NULL >= column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select NULL = column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- "select NULL != column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- // 1.1 Float value comparison with NULL
- "select NULL < column3 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- // String comparison with NULL
- "select NULL < column2 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4))
as t",
- // Boolean comparison with NULL
- "select NULL < column1 from (VALUES (true), (false)) as t",
- ];
-
- for sql in cases {
- println!("Computing: {sql}");
-
- let mut actual = execute_to_batches(&ctx, sql).await;
- assert_eq!(actual.len(), 1);
-
- let batch = actual.pop().unwrap();
- assert_eq!(batch.num_rows(), 2);
- assert_eq!(batch.num_columns(), 1);
- assert!(batch.columns()[0].is_null(0));
- assert!(batch.columns()[0].is_null(1));
- }
-}
-
-#[tokio::test]
-async fn binary_mathematical_operator_with_null_lt() {
- let ctx = SessionContext::new();
-
- let cases = vec![
- // 1. Integer and NULL
- "select column1 + NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select column1 - NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select column1 * NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select column1 / NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select column1 % NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- // 2. Float and NULL
- "select column2 + NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select column2 - NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select column2 * NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select column2 / NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select column2 % NULL from (VALUES (1, 2.3), (2, 5.4)) as t",
- // ----
- // ---- same queries, reversed argument order
- // ----
- // 3. NULL and Integer
- "select NULL + column1 from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select NULL - column1 from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select NULL * column1 from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select NULL / column1 from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select NULL % column1 from (VALUES (1, 2.3), (2, 5.4)) as t",
- // 4. NULL and Float
- "select NULL + column2 from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select NULL - column2 from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select NULL * column2 from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select NULL / column2 from (VALUES (1, 2.3), (2, 5.4)) as t",
- "select NULL % column2 from (VALUES (1, 2.3), (2, 5.4)) as t",
- ];
-
- for sql in cases {
- println!("Computing: {sql}");
-
- let mut actual = execute_to_batches(&ctx, sql).await;
- assert_eq!(actual.len(), 1);
-
- let batch = actual.pop().unwrap();
- assert_eq!(batch.num_rows(), 2);
- assert_eq!(batch.num_columns(), 1);
- assert!(batch.columns()[0].is_null(0));
- assert!(batch.columns()[0].is_null(1));
- }
-}
-
-#[tokio::test]
-async fn query_binary_eq() -> Result<()> {
- let schema = Arc::new(Schema::new(vec![
- Field::new("c1", DataType::Binary, true),
- Field::new("c2", DataType::LargeBinary, true),
- Field::new("c3", DataType::Binary, true),
- Field::new("c4", DataType::LargeBinary, true),
- ]));
-
- let c1 = BinaryArray::from_opt_vec(vec![
- Some(b"one"),
- Some(b"two"),
- None,
- Some(b""),
- Some(b"three"),
- ]);
- let c2 = LargeBinaryArray::from_opt_vec(vec![
- Some(b"one"),
- Some(b"two"),
- None,
- Some(b""),
- Some(b"three"),
- ]);
- let c3 = BinaryArray::from_opt_vec(vec![
- Some(b"one"),
- Some(b""),
- None,
- Some(b""),
- Some(b"three"),
- ]);
- let c4 = LargeBinaryArray::from_opt_vec(vec![
- Some(b"one"),
- Some(b"two"),
- None,
- Some(b""),
- Some(b""),
- ]);
-
- let data = RecordBatch::try_new(
- schema.clone(),
- vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
- )?;
-
- let ctx = SessionContext::new();
-
- ctx.register_batch("test", data)?;
-
- let sql = "
- SELECT sha256(c1)=digest('one', 'sha256'), sha256(c2)=sha256('two'),
digest(c1, 'blake2b')=digest(c3, 'blake2b'), c2=c4
- FROM test
- ";
- let actual = execute(&ctx, sql).await;
- let expected = vec![
- vec!["true", "false", "true", "true"],
- vec!["false", "true", "false", "true"],
- vec!["NULL", "NULL", "NULL", "NULL"],
- vec!["false", "false", "true", "true"],
- vec!["false", "false", "true", "false"],
- ];
- assert_eq!(expected, actual);
- Ok(())
-}
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 246191e48a..9b7828a777 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -56,20 +56,9 @@ macro_rules! assert_metrics {
};
}
-macro_rules! test_expression {
- ($SQL:expr, $EXPECTED:expr) => {
- println!("Input:\n {}\nExpected:\n {}\n", $SQL, $EXPECTED);
- let ctx = SessionContext::new();
- let sql = format!("SELECT {}", $SQL);
- let actual = execute(&ctx, sql.as_str()).await;
- assert_eq!(actual[0][0], $EXPECTED);
- };
-}
-
pub mod aggregates;
pub mod create_drop;
pub mod explain_analyze;
-pub mod expr;
pub mod joins;
pub mod select;
mod sql_api;
diff --git a/datafusion/sqllogictest/test_files/expr.slt
b/datafusion/sqllogictest/test_files/expr.slt
index b5b50eca81..1a2dcf3a91 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -1249,3 +1249,754 @@ query B
SELECT '2' NOT IN ('a','b',NULL,1)
----
NULL
+
+query T
+SELECT encode('tom','base64');
+----
+dG9t
+
+query T
+SELECT arrow_cast(decode('dG9t','base64'), 'Utf8');
+----
+tom
+
+query T
+SELECT encode('tom','hex');
+----
+746f6d
+
+query T
+SELECT arrow_cast(decode('746f6d','hex'), 'Utf8');
+----
+tom
+
+query T
+SELECT encode(arrow_cast('tom', 'LargeUtf8'),'base64');
+----
+dG9t
+
+query T
+SELECT arrow_cast(decode(arrow_cast('dG9t', 'LargeUtf8'),'base64'), 'Utf8');
+----
+tom
+
+query T
+SELECT encode(arrow_cast('tom', 'LargeUtf8'),'hex');
+----
+746f6d
+
+query T
+SELECT arrow_cast(decode(arrow_cast('746f6d', 'LargeUtf8'),'hex'), 'Utf8');
+----
+tom
+
+query T
+SELECT encode(arrow_cast('tom', 'Binary'),'base64');
+----
+dG9t
+
+query T
+SELECT arrow_cast(decode(arrow_cast('dG9t', 'Binary'),'base64'), 'Utf8');
+----
+tom
+
+query T
+SELECT encode(arrow_cast('tom', 'Binary'),'hex');
+----
+746f6d
+
+query T
+SELECT arrow_cast(decode(arrow_cast('746f6d', 'Binary'),'hex'), 'Utf8');
+----
+tom
+
+query T
+SELECT encode(arrow_cast('tom', 'LargeBinary'),'base64');
+----
+dG9t
+
+query T
+SELECT arrow_cast(decode(arrow_cast('dG9t', 'LargeBinary'),'base64'), 'Utf8');
+----
+tom
+
+query T
+SELECT encode(arrow_cast('tom', 'LargeBinary'),'hex');
+----
+746f6d
+
+query T
+SELECT arrow_cast(decode(arrow_cast('746f6d', 'LargeBinary'),'hex'), 'Utf8');
+----
+tom
+
+query ?
+SELECT encode(NULL,'base64');
+----
+NULL
+
+query ?
+SELECT decode(NULL,'base64');
+----
+NULL
+
+query ?
+SELECT encode(NULL,'hex');
+----
+NULL
+
+query ?
+SELECT decode(NULL,'hex');
+----
+NULL
+
+query T
+SELECT encode('','base64');
+----
+(empty)
+
+query ?
+SELECT decode('','base64');
+----
+
+
+query T
+SELECT encode('','hex');
+----
+(empty)
+
+query ?
+SELECT decode('','hex');
+----
+
+
+query T
+SELECT md5('tom');
+----
+34b7da764b21d298ef307d04d8152dc5
+
+query ?
+SELECT digest('tom','md5');
+----
+34b7da764b21d298ef307d04d8152dc5
+
+query T
+SELECT md5('');
+----
+d41d8cd98f00b204e9800998ecf8427e
+
+query ?
+SELECT md5(NULL);
+----
+NULL
+
+query ?
+SELECT digest('','md5');
+----
+d41d8cd98f00b204e9800998ecf8427e
+
+query ?
+SELECT digest(NULL,'md5');
+----
+NULL
+
+query ?
+SELECT sha224('tom');
+----
+0bf6cb62649c42a9ae3876ab6f6d92ad36cb5414e495f8873292be4d
+
+query ?
+SELECT digest('tom','sha224')
+----
+0bf6cb62649c42a9ae3876ab6f6d92ad36cb5414e495f8873292be4d
+
+query ?
+SELECT sha224('');
+----
+d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f
+
+query ?
+SELECT digest('','sha224');
+----
+d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f
+
+query ?
+SELECT sha224(NULL);
+----
+NULL
+
+query ?
+SELECT digest(NULL,'sha224');
+----
+NULL
+
+query ?
+SELECT sha256('tom');
+----
+e1608f75c5d7813f3d4031cb30bfb786507d98137538ff8e128a6ff74e84e643
+
+query ?
+SELECT digest('tom','sha256');
+----
+e1608f75c5d7813f3d4031cb30bfb786507d98137538ff8e128a6ff74e84e643
+
+query ?
+SELECT sha256('');
+----
+e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+
+query ?
+SELECT digest('','sha256');
+----
+e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+
+query ?
+SELECT sha256(NULL);
+----
+NULL
+
+query ?
+SELECT digest(NULL,'sha256');
+----
+NULL
+
+query ?
+SELECT sha384('tom');
+----
+096f5b68aa77848e4fdf5c1c0b350de2dbfad60ffd7c25d9ea07c6c19b8a4d55a9187eb117c557883f58c16dfac3e343
+
+query ?
+SELECT digest('tom','sha384');
+----
+096f5b68aa77848e4fdf5c1c0b350de2dbfad60ffd7c25d9ea07c6c19b8a4d55a9187eb117c557883f58c16dfac3e343
+
+query ?
+SELECT sha384('');
+----
+38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b
+
+query ?
+SELECT digest('','sha384');
+----
+38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b
+
+query ?
+SELECT sha384(NULL);
+----
+NULL
+
+query ?
+SELECT digest(NULL,'sha384');
+----
+NULL
+
+query ?
+SELECT sha512('tom');
+----
+6e1b9b3fe840680e37051f7ad5e959d6f39ad0f8885d855166f55c659469d3c8b78118c44a2a49c72ddb481cd6d8731034e11cc030070ba843a90b3495cb8d3e
+
+query ?
+SELECT digest('tom','sha512');
+----
+6e1b9b3fe840680e37051f7ad5e959d6f39ad0f8885d855166f55c659469d3c8b78118c44a2a49c72ddb481cd6d8731034e11cc030070ba843a90b3495cb8d3e
+
+query ?
+SELECT sha512('');
+----
+cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e
+
+query ?
+SELECT digest('','sha512');
+----
+cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e
+
+query ?
+SELECT sha512(NULL);
+----
+NULL
+
+query ?
+SELECT digest(NULL,'sha512');
+----
+NULL
+
+query ?
+SELECT digest(NULL,'blake2s');
+----
+NULL
+
+query ?
+SELECT digest(NULL,'blake2b');
+----
+NULL
+
+query ?
+SELECT digest('','blake2b');
+----
+786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce
+
+query ?
+SELECT digest('tom','blake2b');
+----
+482499a18da10a18d8d35ab5eb4c635551ec5b8d3ff37c3e87a632caf6680fe31566417834b4732e26e0203d1cad4f5366cb7ab57d89694e4c1fda3e26af2c23
+
+query ?
+SELECT digest('','blake2s');
+----
+69217a3079908094e11121d042354a7c1f55b6482ca1a51e1b250dfd1ed0eef9
+
+query ?
+SELECT digest('tom','blake2s');
+----
+5fc3f2b3a07cade5023c3df566e4d697d3823ba1b72bfb3e84cf7e768b2e7529
+
+query ?
+SELECT digest('','blake3');
+----
+af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262
+
+query T
+SELECT substring('alphabet' from 2 for 1);
+----
+l
+
+query T
+SELECT substring('alphabet' from 8);
+----
+t
+
+query T
+SELECT substring('alphabet' for 1);
+----
+a
+
+query T
+SELECT regexp_replace('ABCabcABC', '(abc)', 'X', 'gi');
+----
+XXX
+
+query T
+SELECT regexp_replace('ABCabcABC', '(abc)', 'X', 'i');
+----
+XabcABC
+
+query T
+SELECT regexp_replace('foobarbaz', 'b..', 'X', 'g');
+----
+fooXX
+
+query T
+SELECT regexp_replace('foobarbaz', 'b..', 'X');
+----
+fooXbaz
+
+query T
+SELECT regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g');
+----
+fooXarYXazY
+
+query T
+SELECT regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', NULL);
+----
+NULL
+
+query T
+SELECT regexp_replace('foobarbaz', 'b(..)', NULL, 'g');
+----
+NULL
+
+query T
+SELECT regexp_replace('foobarbaz', NULL, 'X\\1Y', 'g');
+----
+NULL
+
+query T
+SELECT regexp_replace('Thomas', '.[mN]a.', 'M');
+----
+ThM
+
+query ?
+SELECT regexp_replace(NULL, 'b(..)', 'X\\1Y', 'g');
+----
+NULL
+
+query ?
+SELECT regexp_match('foobarbequebaz', '');
+----
+[]
+
+query ?
+SELECT regexp_match('foobarbequebaz', '(bar)(beque)');
+----
+[bar, beque]
+
+query ?
+SELECT regexp_match('foobarbequebaz', '(ba3r)(bequ34e)');
+----
+NULL
+
+query ?
+SELECT regexp_match('aaa-0', '.*-(\d)');
+----
+[0]
+
+query ?
+SELECT regexp_match('bb-1', '.*-(\d)');
+----
+[1]
+
+query ?
+SELECT regexp_match('aa', '.*-(\d)');
+----
+NULL
+
+query ?
+SELECT regexp_match(NULL, '.*-(\d)');
+----
+NULL
+
+query ?
+SELECT regexp_match('aaa-0', NULL);
+----
+NULL
+
+##### csv_query_nullif_divide_by_0
+
+
+statement ok
+CREATE EXTERNAL TABLE aggregate_test_100_by_sql (
+ c1 VARCHAR NOT NULL,
+ c2 TINYINT NOT NULL,
+ c3 SMALLINT NOT NULL,
+ c4 SMALLINT,
+ c5 INT,
+ c6 BIGINT NOT NULL,
+ c7 SMALLINT NOT NULL,
+ c8 INT NOT NULL,
+ c9 BIGINT UNSIGNED NOT NULL,
+ c10 VARCHAR NOT NULL,
+ c11 FLOAT NOT NULL,
+ c12 DOUBLE NOT NULL,
+ c13 VARCHAR NOT NULL
+)
+STORED AS CSV
+WITH HEADER ROW
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+
+
+
+query I
+SELECT c8/nullif(c7, 0) FROM aggregate_test_100_by_sql LIMIT 9 OFFSET 81;
+----
+664
+NULL
+22
+164
+448
+365
+1640
+671
+203
+
+#### nested_subquery()
+statement ok
+CREATE TABLE test_t1 (
+ id INT,
+ a INT
+)
+
+statement ok
+CREATE TABLE test_t2 (
+ id INT,
+ a INT
+)
+
+# the purpose of this test is just to make sure the query produces a valid plan
+query I
+SELECT COUNT(*) as cnt FROM ((SELECT id FROM test_t1) EXCEPT (SELECT id FROM
test_t2)) foo;
+----
+0
+
+
+#### comparisons_with_null_lt
+
+# 1. Numeric comparison with NULL
+query B
+select column1 < NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select column1 <= NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select column1 > NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select column1 >= NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select column1 = NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select column1 != NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+# 1.1 Float value comparison with NULL
+query B
+select column3 < NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+# 1.1 String comparison with NULL
+query B
+select column2 < NULL from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+#####
+# same queries, reversed argument order (as they go through
+# a different evaluation path)
+#####
+
+# Boolean comparison with NULL
+query B
+select column1 < NULL from (VALUES (true), (false)) as t;
+----
+NULL
+NULL
+
+query B
+select NULL < column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select NULL <= column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select NULL > column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select NULL >= column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select NULL = column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+query B
+select NULL != column1 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+# 1.1 Float value comparison with NULL
+query B
+select NULL < column3 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+# String comparison with NULL
+query B
+select NULL < column2 from (VALUES (1, 'foo' ,2.3), (2, 'bar', 5.4)) as t;
+----
+NULL
+NULL
+
+# Boolean comparison with NULL
+query B
+select NULL < column1 from (VALUES (true), (false)) as t;
+----
+NULL
+NULL
+
+#### binary_mathematical_operator_with_null_lt
+
+# 1. Integer and NULL
+query I
+select column1 + NULL from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query I
+select column1 - NULL from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query I
+select column1 / NULL from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query I
+select column1 % NULL from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+# 2. Float and NULL
+query R
+select column2 + NULL from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query R
+select column2 - NULL from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query R
+select column2 * NULL from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query R
+select column2 / NULL from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query R
+select column2 % NULL from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+#####
+# same queries, reversed argument order (as they go through
+# a different evaluation path)
+#####
+
+# 3. NULL and Integer
+query I
+select NULL + column1 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query I
+select NULL - column1 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query I
+select NULL * column1 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query I
+select NULL / column1 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query I
+select NULL % column1 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+# 4. NULL and Float
+
+query R
+select NULL + column2 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query R
+select NULL - column2 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query R
+select NULL * column2 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query R
+select NULL / column2 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+query R
+select NULL % column2 from (VALUES (1, 2.3), (2, 5.4)) as t;
+----
+NULL
+NULL
+
+
+# query_binary_eq
+
+statement ok
+CREATE TABLE t_source(
+ column1 String,
+ column2 String,
+ column3 String,
+ column4 String,
+) AS VALUES
+('one', 'one', 'one', 'one'),
+('two', 'two', '', 'two'),
+(NULL, NULL, NULL, NULL),
+('', '', '', ''),
+('three', 'three', 'three', '');
+
+
+statement ok
+CREATE TABLE test as
+select
+ arrow_cast(column1, 'Binary') as c1,
+ arrow_cast(column2, 'LargeBinary') as c2,
+ arrow_cast(column3, 'Binary') as c3,
+ arrow_cast(column4, 'LargeBinary') as c4
+from t_source;
+
+query BBBB
+SELECT sha256(c1)=digest('one', 'sha256'), sha256(c2)=sha256('two'),
digest(c1, 'blake2b')=digest(c3, 'blake2b'), c2=c4 FROM test;
+----
+true false true true
+false true false true
+NULL NULL NULL NULL
+false false true true
+false false true false