This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new f172b7c9a test: Add SQL file tests for left and right expressions
(#3463)
f172b7c9a is described below
commit f172b7c9ae532369671d10146f1baf7a2402e7f1
Author: Andy Grove <[email protected]>
AuthorDate: Wed Feb 11 10:05:23 2026 -0700
test: Add SQL file tests for left and right expressions (#3463)
---
.../sql-tests/expressions/string/left.sql | 27 +++++
.../sql-tests/expressions/string/right.sql | 96 +++++++++++++++++
.../org/apache/comet/CometExpressionSuite.scala | 117 ---------------------
3 files changed, 123 insertions(+), 117 deletions(-)
diff --git a/spark/src/test/resources/sql-tests/expressions/string/left.sql
b/spark/src/test/resources/sql-tests/expressions/string/left.sql
index 4605622e8..31372f0a4 100644
--- a/spark/src/test/resources/sql-tests/expressions/string/left.sql
+++ b/spark/src/test/resources/sql-tests/expressions/string/left.sql
@@ -30,6 +30,17 @@ SELECT left(s, n) FROM test_str_left
query
SELECT left(s, 3) FROM test_str_left
+-- column + literal: edge cases
+query
+SELECT left(s, 0) FROM test_str_left
+
+query
+SELECT left(s, -1) FROM test_str_left
+
+query
+-- n exceeds length of 'hello' (5 chars)
+SELECT left(s, 10) FROM test_str_left
+
-- literal + column
query expect_fallback(Substring pos and len must be literals)
SELECT left('hello', n) FROM test_str_left
@@ -37,3 +48,19 @@ SELECT left('hello', n) FROM test_str_left
-- literal + literal
query ignore(https://github.com/apache/datafusion-comet/issues/3337)
SELECT left('hello', 3), left('hello', 0), left('hello', -1), left('', 3),
left(NULL, 3)
+
+-- unicode
+statement
+CREATE TABLE test_str_left_unicode(s string) USING parquet
+
+statement
+INSERT INTO test_str_left_unicode VALUES ('café'), ('hello世界'), ('😀emoji'),
('తెలుగు'), (NULL)
+
+query
+SELECT s, left(s, 2) FROM test_str_left_unicode
+
+query
+SELECT s, left(s, 4) FROM test_str_left_unicode
+
+query
+SELECT s, left(s, 0) FROM test_str_left_unicode
diff --git a/spark/src/test/resources/sql-tests/expressions/string/right.sql
b/spark/src/test/resources/sql-tests/expressions/string/right.sql
new file mode 100644
index 000000000..4fb9763bc
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/string/right.sql
@@ -0,0 +1,96 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied. See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- Note: Right is a RuntimeReplaceable expression. Spark replaces it with
+-- If(IsNull(str), null, If(len <= 0, "", Substring(str, -len, len)))
+-- before Comet sees it. CometRight handles the serde, but the optimizer
+-- may replace it first. We use spark_answer_only to verify correctness.
+
+-- ConfigMatrix: parquet.enable.dictionary=false,true
+
+statement
+CREATE TABLE test_str_right(s string, n int) USING parquet
+
+statement
+INSERT INTO test_str_right VALUES ('hello', 3), ('hello', 0), ('hello', -1),
('hello', 10), ('', 3), (NULL, 3), ('hello', NULL)
+
+-- both columns: len must be literal, falls back
+query spark_answer_only
+SELECT right(s, n) FROM test_str_right
+
+-- column + literal: basic
+query spark_answer_only
+SELECT right(s, 3) FROM test_str_right
+
+-- column + literal: edge cases
+query spark_answer_only
+SELECT right(s, 0) FROM test_str_right
+
+query spark_answer_only
+SELECT right(s, -1) FROM test_str_right
+
+query spark_answer_only
+-- n exceeds length of 'hello' (5 chars)
+SELECT right(s, 10) FROM test_str_right
+
+-- literal + column: falls back
+query spark_answer_only
+SELECT right('hello', n) FROM test_str_right
+
+-- literal + literal
+query spark_answer_only
+SELECT right('hello', 3), right('hello', 0), right('hello', -1), right('', 3),
right(NULL, 3)
+
+-- null propagation with len <= 0 (critical: NULL str with non-positive len
must return NULL, not empty string)
+query spark_answer_only
+SELECT right(CAST(NULL AS STRING), 0), right(CAST(NULL AS STRING), -1),
right(CAST(NULL AS STRING), 2)
+
+-- mixed null and non-null values with len <= 0
+statement
+CREATE TABLE test_str_right_nulls(s string) USING parquet
+
+statement
+INSERT INTO test_str_right_nulls VALUES ('hello'), (NULL), (''), ('world')
+
+query spark_answer_only
+SELECT s, right(s, 0) FROM test_str_right_nulls
+
+query spark_answer_only
+SELECT s, right(s, -1) FROM test_str_right_nulls
+
+query spark_answer_only
+SELECT s, right(s, 2) FROM test_str_right_nulls
+
+-- equivalence with substring
+query spark_answer_only
+SELECT s, right(s, 3), substring(s, -3, 3) FROM test_str_right_nulls
+
+-- unicode
+statement
+CREATE TABLE test_str_right_unicode(s string) USING parquet
+
+statement
+INSERT INTO test_str_right_unicode VALUES ('café'), ('hello世界'), ('😀emoji'),
('తెలుగు'), (NULL)
+
+query spark_answer_only
+SELECT s, right(s, 2) FROM test_str_right_unicode
+
+query spark_answer_only
+SELECT s, right(s, 4) FROM test_str_right_unicode
+
+query spark_answer_only
+SELECT s, right(s, 0) FROM test_str_right_unicode
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index 112630045..1ab8d54fd 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -523,123 +523,6 @@ class CometExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
}
- test("LEFT function") {
- withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") {
- checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM tbl")
- checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 4) FROM tbl")
- checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM tbl")
- checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, -1) FROM tbl")
- checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 100) FROM tbl")
- checkSparkAnswerAndOperator("SELECT LEFT(CAST(NULL AS STRING), 2) FROM
tbl LIMIT 1")
- }
- }
-
- test("LEFT function with unicode") {
- val data = Seq("café", "hello世界", "😀emoji", "తెలుగు")
- withParquetTable(data.zipWithIndex, "unicode_tbl") {
- checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 2) FROM unicode_tbl")
- checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM unicode_tbl")
- checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 0) FROM unicode_tbl")
- }
- }
-
- test("LEFT function equivalence with SUBSTRING") {
- withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") {
- val df = spark.sql("""
- SELECT _1,
- LEFT(_1, 3) as left_result,
- SUBSTRING(_1, 1, 3) as substring_result
- FROM equiv_tbl
- """)
- checkAnswer(
- df.filter(
- "left_result != substring_result OR " +
- "(left_result IS NULL AND substring_result IS NOT NULL) OR " +
- "(left_result IS NOT NULL AND substring_result IS NULL)"),
- Seq.empty)
- }
- }
-
- test("LEFT function with dictionary") {
- val data = (0 until 1000)
- .map(_ % 5)
- .map(i => s"value$i")
- withParquetTable(data.zipWithIndex, "dict_tbl") {
- checkSparkAnswerAndOperator("SELECT _1, LEFT(_1, 3) FROM dict_tbl")
- }
- }
-
- test("RIGHT function") {
- withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") {
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM tbl")
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 4) FROM tbl")
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM tbl")
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM tbl")
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 100) FROM tbl")
- checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 2) FROM
tbl LIMIT 1")
- }
- }
-
- test("RIGHT function with unicode") {
- val data = Seq("café", "hello世界", "😀emoji", "తెలుగు")
- withParquetTable(data.zipWithIndex, "unicode_tbl") {
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM unicode_tbl")
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM unicode_tbl")
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM unicode_tbl")
- }
- }
-
- test("RIGHT function equivalence with SUBSTRING negative pos") {
- withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") {
- val df = spark.sql("""
- SELECT _1,
- RIGHT(_1, 3) as right_result,
- SUBSTRING(_1, -3, 3) as substring_result
- FROM equiv_tbl
- """)
- checkAnswer(
- df.filter(
- "right_result != substring_result OR " +
- "(right_result IS NULL AND substring_result IS NOT NULL) OR " +
- "(right_result IS NOT NULL AND substring_result IS NULL)"),
- Seq.empty)
- }
- }
-
- test("RIGHT function with dictionary") {
- val data = (0 until 1000)
- .map(_ % 5)
- .map(i => s"value$i")
- withParquetTable(data.zipWithIndex, "dict_tbl") {
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM dict_tbl")
- }
- }
-
- test("RIGHT function NULL handling") {
- // Test NULL propagation with len = 0 (critical edge case)
- withParquetTable((0 until 5).map(i => (s"test$i", i)), "null_tbl") {
- checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 0) FROM
null_tbl LIMIT 1")
- checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -1) FROM
null_tbl LIMIT 1")
- checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -5) FROM
null_tbl LIMIT 1")
- }
-
- // Test non-NULL strings with len <= 0 (should return empty string)
- withParquetTable((0 until 5).map(i => (s"test$i", i)), "edge_tbl") {
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM edge_tbl")
- checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM edge_tbl")
- }
-
- // Test mixed NULL and non-NULL values with a table
- val table = "right_null_edge"
- withTable(table) {
- sql(s"create table $table(str string) using parquet")
- sql(s"insert into $table values('hello'), (NULL), (''), ('world')")
- checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 0) FROM $table")
- checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, -1) FROM $table")
- checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 2) FROM $table")
- }
- }
-
test("hour, minute, second") {
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]