This is an automated email from the ASF dual-hosted git repository.
mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 9696cbc5ee [CALCITE-6433] SUBSTRING function can return incorrect
empty result
9696cbc5ee is described below
commit 9696cbc5ee11c324ffb4cb3dd86f84820d41e261
Author: zstan <[email protected]>
AuthorDate: Tue Aug 6 18:16:44 2024 +0300
[CALCITE-6433] SUBSTRING function can return incorrect empty result
---
.../org/apache/calcite/runtime/SqlFunctions.java | 54 +++++++++++++++------
.../org/apache/calcite/test/SqlFunctionsTest.java | 56 ++++++++++++++++++++++
site/_docs/reference.md | 4 +-
.../org/apache/calcite/test/SqlOperatorTest.java | 17 +++++++
4 files changed, 114 insertions(+), 17 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 456977d1ad..7da9705c1c 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -1026,18 +1026,6 @@ public class SqlFunctions {
return s;
}
- /** SQL SUBSTRING(string FROM ...) function. */
- public static String substring(String c, int s) {
- if (s <= 1) {
- return c;
- }
- if (s > c.length()) {
- return "";
- }
- final int s0 = s - 1;
- return c.substring(s0);
- }
-
// Clamp very large long values to integer values.
// Used by the substring functions.
// Java strings do not support long indexes anyway,
@@ -1054,6 +1042,18 @@ public class SqlFunctions {
return (int) s;
}
+ /** SQL SUBSTRING(string FROM ...) function. */
+ public static String substring(String c, int s) {
+ if (s <= 1) {
+ return c;
+ }
+ if (s > c.length()) {
+ return "";
+ }
+ final int s0 = s - 1;
+ return c.substring(s0);
+ }
+
public static String substring(String c, long s) {
return substring(c, clamp(s));
}
@@ -1076,15 +1076,39 @@ public class SqlFunctions {
}
public static String substring(String c, int s, long l) {
- return substring(c, s, clamp(l));
+ if (s < 0) {
+ if (l > 0 && l + s > 0) {
+ l += s;
+ return substring(c, 0, l);
+ }
+ }
+ int l0 = clamp(l);
+ return SqlFunctions.substring(c, s, l0);
}
public static String substring(String c, long s, int l) {
- return substring(c, clamp(s), l);
+ if (s < 0) {
+ s += l;
+ if (s > 0) {
+ return substring(c, 0, s);
+ } else {
+ return "";
+ }
+ }
+ int s0 = clamp(s);
+ return SqlFunctions.substring(c, s0, l);
}
public static String substring(String c, long s, long l) {
- return substring(c, clamp(s), clamp(l));
+ if (s < 0) {
+ if (l > 0) {
+ l += s;
+ return substring(c, 0, clamp(l));
+ }
+ }
+ int s0 = clamp(s);
+ int l0 = clamp(l);
+ return SqlFunctions.substring(c, s0, l0);
}
/** SQL SUBSTRING(binary FROM ...) function for binary. */
diff --git a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
index c96fe61ad9..7e37cd1557 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
@@ -65,6 +65,7 @@ import static org.apache.calcite.runtime.SqlFunctions.rtrim;
import static org.apache.calcite.runtime.SqlFunctions.sha1;
import static org.apache.calcite.runtime.SqlFunctions.sha256;
import static org.apache.calcite.runtime.SqlFunctions.sha512;
+import static org.apache.calcite.runtime.SqlFunctions.substring;
import static org.apache.calcite.runtime.SqlFunctions.toBase64;
import static org.apache.calcite.runtime.SqlFunctions.toInt;
import static org.apache.calcite.runtime.SqlFunctions.toIntOptional;
@@ -172,6 +173,61 @@ class SqlFunctionsTest {
assertThat(concat(null, "b"), is("nullb"));
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-6433">[CALCITE-6433]
+ * SUBSTRING can return incorrect empty result for some parameters</a>. */
+ @Test void testSubString() {
+ // str vs single param
+ assertThat(substring("string", -1), is("string"));
+ assertThat(substring("string", -1L), is("string"));
+ assertThat(substring("string", 2), is("tring"));
+ assertThat(substring("string", 2L), is("tring"));
+ assertThat(substring("string", Integer.MIN_VALUE), is("string"));
+ assertThat(substring("string", Long.MIN_VALUE), is("string"));
+ assertThat(substring("string", Integer.MIN_VALUE + 10), is("string"));
+ assertThat(substring("string", Integer.MAX_VALUE), is(""));
+ assertThat(substring("string", Long.MAX_VALUE), is(""));
+ assertThat(substring("string", Integer.MAX_VALUE - 10), is(""));
+ assertThat(substring("string", Integer.MIN_VALUE - 10L), is("string"));
+ assertThat(substring("string", Integer.MAX_VALUE + 10L), is(""));
+
+ // str vs multi params
+ assertThat(substring("string", -1, 1), is(""));
+ assertThat(substring("string", -1, 1L), is(""));
+ assertThat(substring("string", -1L, 1), is(""));
+ assertThat(substring("string", -1L, 1L), is(""));
+
+ assertThat(substring("string", 1, 2), is("st"));
+ assertThat(substring("string", 1, 2L), is("st"));
+ assertThat(substring("string", 1L, 2), is("st"));
+ assertThat(substring("string", 1L, 2L), is("st"));
+
+ assertThat(substring("string", -1, 2), is(""));
+ assertThat(substring("string", -1L, 2), is(""));
+ assertThat(substring("string", -1, 2L), is(""));
+ assertThat(substring("string", -1L, 2L), is(""));
+
+ assertThat(substring("string", -1, 3), is("s"));
+ assertThat(substring("string", -1L, 3), is("s"));
+ assertThat(substring("string", -1, 3L), is("s"));
+ assertThat(substring("string", -1L, 3L), is("s"));
+
+ assertThat(substring("string", -10, 12), is("s"));
+ assertThat(substring("string", -10L, 12), is("s"));
+ assertThat(substring("string", -10, 12L), is("s"));
+ assertThat(substring("string", -10L, 12L), is("s"));
+
+ assertThat(substring("string", -1, Integer.MAX_VALUE), is("string"));
+ assertThat(substring("string", -1L, Integer.MAX_VALUE), is("string"));
+ assertThat(substring("string", -1, Long.MAX_VALUE), is("string"));
+
+ assertThat(substring("string", Integer.MIN_VALUE, Integer.MAX_VALUE),
is(""));
+ assertThat(substring("string", Integer.MIN_VALUE, Integer.MAX_VALUE +
10L), is("string"));
+ assertThat(substring("string", Long.MIN_VALUE, Integer.MAX_VALUE), is(""));
+ assertThat(substring("string", Integer.MIN_VALUE, Long.MAX_VALUE),
is("string"));
+ assertThat(substring("string", Integer.MIN_VALUE - 10L, Long.MAX_VALUE),
is("string"));
+ }
+
@Test void testConcatWithNull() {
assertThat(concatWithNull("a b", "cd"), is("a bcd"));
// Null value could be passed in. If we pass one null value,
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 02b81864ed..2ceabcc97a 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -1380,8 +1380,8 @@ comp:
| POSITION(substring IN string FROM integer) | Returns the position of the
first occurrence of *substring* in *string* starting at a given point (not
standard SQL)
| TRIM( { BOTH | LEADING | TRAILING } string1 FROM string2) |
Removes the longest string containing only the characters in *string1* from the
start/end/both ends of *string2*
| OVERLAY(string1 PLACING string2 FROM integer [ FOR integer2 ]) | Replaces a
substring of *string1* with *string2*, starting at the specified position
*integer* in *string1* and optionally for a specified length *integer2*
-| SUBSTRING(string FROM integer) | Returns a substring of a character string
starting at a given point
-| SUBSTRING(string FROM integer FOR integer) | Returns a substring of a
character string starting at a given point with a given length
+| SUBSTRING(string FROM integer) | Returns a substring of a character string
starting at a given point. If starting point is less than 1, the returned
expression will begin at the first character that is specified in expression
+| SUBSTRING(string FROM integer FOR integer) | Returns a substring of a
character string starting at a given point with a given length. If start point
is less than 1 in this case, the number of characters that are returned is the
largest value of either the start + length - 1 or 0
| INITCAP(string) | Returns *string* with the first letter of each
word converter to upper case and the rest to lower case. Words are sequences of
alphanumeric characters separated by non-alphanumeric characters.
Not implemented:
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 44e5978670..ce80c28372 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -10783,6 +10783,10 @@ public class SqlOperatorTest {
String.format(Locale.ROOT, "{fn SUBSTRING('abcdef', %d)}",
Integer.MIN_VALUE),
"abcdef", "VARCHAR(6) NOT NULL");
+ f.checkScalar(
+ String.format(Locale.ROOT, "{fn SUBSTRING('abcdef', %d, %d)}",
Integer.MIN_VALUE,
+ Integer.MAX_VALUE + 10L), "abcdef", "VARCHAR(6) NOT NULL");
+
f.checkScalar(
String.format(Locale.ROOT, "{fn SUBSTRING('abcdef', CAST(%d AS
BIGINT))}",
Integer.MIN_VALUE), "abcdef", "VARCHAR(6) NOT NULL");
@@ -10796,6 +10800,9 @@ public class SqlOperatorTest {
"aabb", "VARBINARY(3) NOT NULL");
f.checkString("substring('abc' from 2 for 2147483646)",
"bc", "VARCHAR(3) NOT NULL");
+ f.checkString(
+ String.format(Locale.ROOT, "substring('string', CAST(%d AS TINYINT),
%d)",
+ Byte.MIN_VALUE, Byte.MAX_VALUE + 10), "string", "VARCHAR(6) NOT NULL");
switch (f.conformance().semantics()) {
case BIG_QUERY:
@@ -10805,6 +10812,16 @@ public class SqlOperatorTest {
"VARBINARY(3) NOT NULL");
break;
default:
+ f.checkFails(
+ String.format(Locale.ROOT, "^substring('string', CAST(%d AS DOUBLE),
"
+ + "CAST(%d AS DOUBLE))^", Byte.MIN_VALUE, Byte.MAX_VALUE + 10),
+ "Cannot apply 'SUBSTRING' to arguments of type "
+ + ".*\\n.*\\n.*\\n.*\\n.*\\n.*\\n.*\\n.*", false);
+ f.checkFails(
+ String.format(Locale.ROOT, "^substring('string', CAST(%d AS
DECIMAL), "
+ + "CAST(%d AS DECIMAL))^", Byte.MIN_VALUE, Byte.MAX_VALUE + 10),
+ "Cannot apply 'SUBSTRING' to arguments of type
.*\\n.*\\n.*\\n.*\\n.*\\n.*\\n.*\\n.*",
+ false);
f.checkFails("substring('abc' from 1 for -1)",
"Substring error: negative substring length not allowed",
true);