This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 7ba56935 fix: substring with negative indices should produce correct
result (#470)
7ba56935 is described below
commit 7ba569357cadade64b49b6bdf5f0946f81f95301
Author: Son <[email protected]>
AuthorDate: Mon May 27 03:13:19 2024 +0700
fix: substring with negative indices should produce correct result (#470)
---
core/src/execution/datafusion/planner.rs | 3 ++-
spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala | 6 ++++++
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/core/src/execution/datafusion/planner.rs
b/core/src/execution/datafusion/planner.rs
index 01d89238..7a37e3aa 100644
--- a/core/src/execution/datafusion/planner.rs
+++ b/core/src/execution/datafusion/planner.rs
@@ -395,7 +395,8 @@ impl PhysicalPlanner {
let child = self.create_expr(expr.child.as_ref().unwrap(),
input_schema)?;
// Spark Substring's start is 1-based when start > 0
let start = expr.start - i32::from(expr.start > 0);
- let len = expr.len;
+ // substring negative len is treated as 0 in Spark
+ let len = std::cmp::max(expr.len, 0);
Ok(Arc::new(SubstringExec::new(
child,
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index 99261508..1afdd78e 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -156,6 +156,12 @@ class CometExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
test("string type and substring") {
withParquetTable((0 until 5).map(i => (i.toString, (i + 100).toString)),
"tbl") {
checkSparkAnswerAndOperator("SELECT _1, substring(_2, 2, 2) FROM tbl")
+ checkSparkAnswerAndOperator("SELECT _1, substring(_2, 2, -2) FROM tbl")
+ checkSparkAnswerAndOperator("SELECT _1, substring(_2, -2, 2) FROM tbl")
+ checkSparkAnswerAndOperator("SELECT _1, substring(_2, -2, -2) FROM tbl")
+ checkSparkAnswerAndOperator("SELECT _1, substring(_2, -2, 10) FROM tbl")
+ checkSparkAnswerAndOperator("SELECT _1, substring(_2, 0, 0) FROM tbl")
+ checkSparkAnswerAndOperator("SELECT _1, substring(_2, 1, 0) FROM tbl")
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]