This is an automated email from the ASF dual-hosted git repository.
vpyatkov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new 0c1debe59c5 IGNITE-27817 Add REGEXP_SUBSTR/REGEXP_EXTRACT function for
Calcite en… (#12726)
0c1debe59c5 is described below
commit 0c1debe59c5059dd4388f8d716e4e5904f960514
Author: Vladislav Pyatkov <[email protected]>
AuthorDate: Tue Feb 17 22:38:23 2026 +0300
IGNITE-27817 Add REGEXP_SUBSTR/REGEXP_EXTRACT function for Calcite en…
(#12726)
…gine
---
docs/_docs/SQL/sql-calcite.adoc | 2 +-
docs/_docs/net-specific/net-linq.adoc | 1 +
docs/_docs/sql-reference/string-functions.adoc | 16 ++
.../query/calcite/exec/exp/RexImpTable.java | 5 +
.../calcite/prepare/IgniteConvertletTable.java | 4 +
.../calcite/sql/fun/IgniteStdSqlOperatorTable.java | 1 +
.../calcite/integration/StdSqlOperatorsTest.java | 3 +
.../sql/function/string/test_regexp_substr.test | 215 +++++++++++++++++++++
8 files changed, 246 insertions(+), 1 deletion(-)
diff --git a/docs/_docs/SQL/sql-calcite.adoc b/docs/_docs/SQL/sql-calcite.adoc
index 558ffda4047..22c54986541 100644
--- a/docs/_docs/SQL/sql-calcite.adoc
+++ b/docs/_docs/SQL/sql-calcite.adoc
@@ -145,7 +145,7 @@ The Calcite-based SQL engine currently supports:
|`COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `ANY_VALUE`, `LISTAGG`, `GROUP_CONCAT`,
`STRING_AGG`, `ARRAY_AGG`, `ARRAY_CONCAT_AGG`, `EVERY`, `SOME`, `BIT_AND`,
`BIT_OR`, `BIT_XOR`
|String functions
-|`UPPER`, `LOWER`, `INITCAP`, `TO_BASE64`, `FROM_BASE64`, `MD5`, `SHA1`,
`SUBSTRING`, `LEFT`, `RIGHT`, `REPLACE`, `TRANSLATE`, `CHR`, `CHAR_LENGTH`,
`CHARACTER_LENGTH`, `LENGTH`, `CONCAT`, `OVERLAY`, `POSITION`, `ASCII`,
`REPEAT`, `SPACE`, `STRCMP`, `SOUNDEX`, `DIFFERENCE`, `REVERSE`, `TRIM`,
`LTRIM`, `RTRIM`, `REGEXP_REPLACE`
+|`UPPER`, `LOWER`, `INITCAP`, `TO_BASE64`, `FROM_BASE64`, `MD5`, `SHA1`,
`SUBSTRING`, `LEFT`, `RIGHT`, `REPLACE`, `TRANSLATE`, `CHR`, `CHAR_LENGTH`,
`CHARACTER_LENGTH`, `LENGTH`, `CONCAT`, `OVERLAY`, `POSITION`, `ASCII`,
`REPEAT`, `SPACE`, `STRCMP`, `SOUNDEX`, `DIFFERENCE`, `REVERSE`, `TRIM`,
`LTRIM`, `RTRIM`, `REGEXP_REPLACE`, `REGEXP_SUBSTR`
|Math functions
|`MOD`, `EXP`, `POWER`, `LN`, `LOG10`, `ABS`, `RAND`, `RAND_INTEGER`, `ACOS`,
`ACOSH`, `ASIN`, `ASINH`, `ATAN`, `ATANH`, `ATAN2`, `SQRT`, `CBRT`, `COS`,
`COSH`, `COT`, `COTH`, `DEGREES`, `RADIANS`, `ROUND`, `SIGN`, `SIN`, `SINH`,
`TAN`, `TANH`, `SEC`, `SECH`, `CSC`, `CSCH`, `TRUNCATE`, `PI`, `BITAND`,
`BITOR`, `BITXOR`
diff --git a/docs/_docs/net-specific/net-linq.adoc
b/docs/_docs/net-specific/net-linq.adoc
index 006535a7aab..b46d54af2f7 100644
--- a/docs/_docs/net-specific/net-linq.adoc
+++ b/docs/_docs/net-specific/net-linq.adoc
@@ -223,6 +223,7 @@ Below is a list of .NET functions and their SQL equivalents
that are supported b
|`String.PadLeft`| `LPAD`
|`String.PadRight`| `RPAD`
|`Regex.Replace`| `REGEXP_REPLACE`
+|`Regex.Substr`| `REGEXP_SUBSTR`
|`Regex.IsMatch`| `REGEXP_LIKE`
|`Math.Abs`| `ABS`
|`Math.Acos`| `ACOS`
diff --git a/docs/_docs/sql-reference/string-functions.adoc
b/docs/_docs/sql-reference/string-functions.adoc
index 187acd2337f..62ed722a9f5 100644
--- a/docs/_docs/sql-reference/string-functions.adoc
+++ b/docs/_docs/sql-reference/string-functions.adoc
@@ -572,6 +572,22 @@ SELECT REGEXP_REPLACE(name, 'w+', 'W', 'i') FROM Players;
----
+== REGEXP_SUBSTR
+
+Returns the substring that matches a regular expression. For details, see the
Java `Matcher.find()` method.
+
+[source,sql]
+----
+REGEXP_SUBSTR(inputString, regexString[, startInt[, occurrenceInt]])
+----
+
+
+Example:
+----
+SELECT REGEXP_SUBSTR(name, '[A-Za-z]+', 1, 2) FROM Players;
+----
+
+
== REGEXP_LIKE
Matches string to a regular expression. For details, see the Java
`Matcher.find()` method. If any parameter is null (except the optional
`flagsString` parameter), the result is null.
diff --git
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/exp/RexImpTable.java
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/exp/RexImpTable.java
index 8cf84e1fcc5..f1b5802b702 100644
---
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/exp/RexImpTable.java
+++
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/exp/RexImpTable.java
@@ -121,6 +121,7 @@ import static
org.apache.calcite.sql.fun.SqlLibraryOperators.LOG;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.MD5;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.MONTHNAME;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_3;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_SUBSTR;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REPEAT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.REVERSE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.RIGHT;
@@ -456,6 +457,10 @@ public class RexImpTable {
BuiltInMethod.REGEXP_REPLACE4.method,
BuiltInMethod.REGEXP_REPLACE5_OCCURRENCE.method,
BuiltInMethod.REGEXP_REPLACE6.method);
+ defineReflective(REGEXP_SUBSTR,
+ BuiltInMethod.REGEXP_EXTRACT2.method,
+ BuiltInMethod.REGEXP_EXTRACT3.method,
+ BuiltInMethod.REGEXP_EXTRACT4.method);
// Multisets & arrays
defineMethod(CARDINALITY, BuiltInMethod.COLLECTION_SIZE.method,
diff --git
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/prepare/IgniteConvertletTable.java
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/prepare/IgniteConvertletTable.java
index 08d51500337..b6e27621b54 100644
---
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/prepare/IgniteConvertletTable.java
+++
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/prepare/IgniteConvertletTable.java
@@ -27,6 +27,7 @@ import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlIntervalQualifier;
+import org.apache.calcite.sql.fun.SqlLibraryOperators;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlTypeName;
@@ -36,6 +37,7 @@ import org.apache.calcite.sql2rel.SqlRexContext;
import org.apache.calcite.sql2rel.SqlRexConvertlet;
import org.apache.calcite.sql2rel.StandardConvertletTable;
import
org.apache.ignite.internal.processors.query.calcite.sql.fun.IgniteOwnSqlOperatorTable;
+import org.apache.ignite.internal.processors.query.calcite.util.Commons;
import org.checkerframework.checker.nullness.qual.Nullable;
/**
@@ -49,6 +51,8 @@ public class IgniteConvertletTable extends
ReflectiveConvertletTable {
protected IgniteConvertletTable() {
// Replace Calcite's convertlet with our own.
registerOp(SqlStdOperatorTable.TIMESTAMP_DIFF, new
TimestampDiffConvertlet());
+ registerOp(SqlLibraryOperators.REGEXP_SUBSTR, (cx, call) ->
cx.getRexBuilder().makeCall(
+ SqlLibraryOperators.REGEXP_SUBSTR,
Commons.transform(call.getOperandList(), cx::convertExpression)));
addAlias(IgniteOwnSqlOperatorTable.LENGTH,
SqlStdOperatorTable.CHAR_LENGTH);
}
diff --git
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/sql/fun/IgniteStdSqlOperatorTable.java
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/sql/fun/IgniteStdSqlOperatorTable.java
index d8e3d047a84..a49e4a161f0 100644
---
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/sql/fun/IgniteStdSqlOperatorTable.java
+++
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/sql/fun/IgniteStdSqlOperatorTable.java
@@ -230,6 +230,7 @@ public class IgniteStdSqlOperatorTable extends
ReflectiveSqlOperatorTable {
register(SqlLibraryOperators.REGEXP_REPLACE_4);
register(SqlLibraryOperators.REGEXP_REPLACE_5_ORACLE);
register(SqlLibraryOperators.REGEXP_REPLACE_6);
+ register(SqlLibraryOperators.REGEXP_SUBSTR);
// Collections.
register(SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR);
diff --git
a/modules/calcite/src/test/java/org/apache/ignite/internal/processors/query/calcite/integration/StdSqlOperatorsTest.java
b/modules/calcite/src/test/java/org/apache/ignite/internal/processors/query/calcite/integration/StdSqlOperatorsTest.java
index 6115d6c8415..72d6feaa335 100644
---
a/modules/calcite/src/test/java/org/apache/ignite/internal/processors/query/calcite/integration/StdSqlOperatorsTest.java
+++
b/modules/calcite/src/test/java/org/apache/ignite/internal/processors/query/calcite/integration/StdSqlOperatorsTest.java
@@ -275,6 +275,9 @@ public class StdSqlOperatorsTest extends
AbstractBasicIntegrationTest {
assertExpression("REGEXP_REPLACE('aAaA', '[Aa]+', 'X',
2)").returns("aX").check();
assertExpression("REGEXP_REPLACE('aAbaAcaA', '[Aa]+', 'X', 2,
2)").returns("aAbXcaA").check();
assertExpression("REGEXP_REPLACE('aAbaAcaA', 'a', 'X', 2, 0,
'i')").returns("aXbXXcXX").check();
+ assertExpression("REGEXP_SUBSTR('hello world',
'l.o')").returns("llo").check();
+ assertExpression("REGEXP_SUBSTR('abc123def456', '[0-9]+',
7)").returns("456").check();
+ assertExpression("REGEXP_SUBSTR('123-456-789', '[0-9]+', 1,
3)").returns("789").check();
}
/** */
diff --git
a/modules/calcite/src/test/sql/function/string/test_regexp_substr.test
b/modules/calcite/src/test/sql/function/string/test_regexp_substr.test
new file mode 100644
index 00000000000..785095ef88e
--- /dev/null
+++ b/modules/calcite/src/test/sql/function/string/test_regexp_substr.test
@@ -0,0 +1,215 @@
+# name: test/sql/function/string/test_regexp_substr.test
+# description: REGEXP_SUBSTR test
+# group: [string]
+
+# Basic tests with one argument
+query T
+SELECT REGEXP_SUBSTR('abc123def', '[0-9]+')
+----
+123
+
+query T
+SELECT REGEXP_SUBSTR('hello world', '[a-z]+')
+----
+hello
+
+query T
+SELECT REGEXP_SUBSTR('HELLO', '[a-z]+')
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('', '[0-9]+')
+----
+NULL
+
+# Tests with start position
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 4)
+----
+123
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 7)
+----
+456
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 10)
+----
+456
+
+query T
+SELECT REGEXP_SUBSTR('hello world', '[a-z]+', 7)
+----
+world
+
+# Tests with occurrence
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 1)
+----
+123
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 2)
+----
+456
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 3)
+----
+789
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 4)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 7, 1)
+----
+456
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 7, 2)
+----
+789
+
+# Tests with complex regex patterns
+query T
+SELECT REGEXP_SUBSTR('[email protected]',
'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}')
+----
[email protected]
+
+query T
+SELECT REGEXP_SUBSTR('Contact: [email protected] or [email protected]',
'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', 1, 2)
+----
[email protected]
+
+query T
+SELECT REGEXP_SUBSTR('price: $19.99, discount: $5.99', '\$\d+\.\d{2}')
+----
+$19.99
+
+query T
+SELECT REGEXP_SUBSTR('The date is 2023-12-25', '\d{4}-\d{2}-\d{2}')
+----
+2023-12-25
+
+query T
+SELECT REGEXP_SUBSTR('file123.txt, file456.doc', '[a-z]+\d+\.[a-z]+', 1, 2)
+----
+file456.doc
+
+# Tests with special characters
+query T
+SELECT REGEXP_SUBSTR('a+b*c?d', '[+*?]')
+----
++
+
+query T
+SELECT REGEXP_SUBSTR('(test) [text] {data}', '[\(\)\[\]\{\}]', 1, 3)
+----
+[
+
+# Edge cases
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'a+', 1, 1)
+----
+aaaa
+
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'a+', 1, 2)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'a', 1, 4)
+----
+a
+
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'a', 1, 5)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('123-456-789', '\d+', 1, 3)
+----
+789
+
+# Tests with NULL values
+query T
+SELECT REGEXP_SUBSTR(NULL, '[0-9]+')
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('abc123', NULL)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('abc123', '[0-9]+', NULL)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('abc123', '[0-9]+', 1, NULL)
+----
+NULL
+
+# Tests with overlapping matches
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'aa', 1, 1)
+----
+aa
+
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'aa', 1, 2)
+----
+aa
+
+# Tests with word boundaries
+query T
+SELECT REGEXP_SUBSTR('cat concat scat', '\bcat\b', 1, 1)
+----
+cat
+
+query T
+SELECT REGEXP_SUBSTR('cat concat scat', '\bcat\b', 1, 2)
+----
+NULL
+
+# Tests with case sensitivity
+query T
+SELECT REGEXP_SUBSTR('ABCabc', '[a-z]+')
+----
+abc
+
+query T
+SELECT REGEXP_SUBSTR('ABCabc', '[A-Z]+')
+----
+ABC
+
+# Error cases
+statement error
+SELECT REGEXP_SUBSTR('abc')
+
+statement error
+SELECT REGEXP_SUBSTR('abc', '[0-9]+', 'invalid')
+
+statement error
+SELECT REGEXP_SUBSTR('abc', '[0-9]+', 1, 'invalid')
+
+statement error
+SELECT REGEXP_SUBSTR('abc', '[0-9]+', 1, 1, 1)
+
+statement error
+SELECT REGEXP_SUBSTR('abc', '[a-z', 1)
+
+# Performance test with large string
+query T
+SELECT REGEXP_SUBSTR(repeat('abc123', 1000), '[0-9]+', 1, 500)
+----
+123