This is an automated email from the ASF dual-hosted git repository.

vpyatkov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c1debe59c5 IGNITE-27817 Add REGEXP_SUBSTR/REGEXP_EXTRACT function for 
Calcite en… (#12726)
0c1debe59c5 is described below

commit 0c1debe59c5059dd4388f8d716e4e5904f960514
Author: Vladislav Pyatkov <[email protected]>
AuthorDate: Tue Feb 17 22:38:23 2026 +0300

    IGNITE-27817 Add REGEXP_SUBSTR/REGEXP_EXTRACT function for Calcite en… 
(#12726)
    
    …gine
---
 docs/_docs/SQL/sql-calcite.adoc                    |   2 +-
 docs/_docs/net-specific/net-linq.adoc              |   1 +
 docs/_docs/sql-reference/string-functions.adoc     |  16 ++
 .../query/calcite/exec/exp/RexImpTable.java        |   5 +
 .../calcite/prepare/IgniteConvertletTable.java     |   4 +
 .../calcite/sql/fun/IgniteStdSqlOperatorTable.java |   1 +
 .../calcite/integration/StdSqlOperatorsTest.java   |   3 +
 .../sql/function/string/test_regexp_substr.test    | 215 +++++++++++++++++++++
 8 files changed, 246 insertions(+), 1 deletion(-)

diff --git a/docs/_docs/SQL/sql-calcite.adoc b/docs/_docs/SQL/sql-calcite.adoc
index 558ffda4047..22c54986541 100644
--- a/docs/_docs/SQL/sql-calcite.adoc
+++ b/docs/_docs/SQL/sql-calcite.adoc
@@ -145,7 +145,7 @@ The Calcite-based SQL engine currently supports:
 |`COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `ANY_VALUE`, `LISTAGG`, `GROUP_CONCAT`, 
`STRING_AGG`, `ARRAY_AGG`, `ARRAY_CONCAT_AGG`, `EVERY`, `SOME`, `BIT_AND`, 
`BIT_OR`, `BIT_XOR`
 
 |String functions
-|`UPPER`, `LOWER`, `INITCAP`, `TO_BASE64`, `FROM_BASE64`, `MD5`, `SHA1`, 
`SUBSTRING`, `LEFT`, `RIGHT`, `REPLACE`, `TRANSLATE`, `CHR`, `CHAR_LENGTH`, 
`CHARACTER_LENGTH`, `LENGTH`, `CONCAT`, `OVERLAY`, `POSITION`, `ASCII`, 
`REPEAT`, `SPACE`, `STRCMP`, `SOUNDEX`, `DIFFERENCE`, `REVERSE`, `TRIM`, 
`LTRIM`, `RTRIM`, `REGEXP_REPLACE`
+|`UPPER`, `LOWER`, `INITCAP`, `TO_BASE64`, `FROM_BASE64`, `MD5`, `SHA1`, 
`SUBSTRING`, `LEFT`, `RIGHT`, `REPLACE`, `TRANSLATE`, `CHR`, `CHAR_LENGTH`, 
`CHARACTER_LENGTH`, `LENGTH`, `CONCAT`, `OVERLAY`, `POSITION`, `ASCII`, 
`REPEAT`, `SPACE`, `STRCMP`, `SOUNDEX`, `DIFFERENCE`, `REVERSE`, `TRIM`, 
`LTRIM`, `RTRIM`, `REGEXP_REPLACE`, `REGEXP_SUBSTR`
 
 |Math functions
 |`MOD`, `EXP`, `POWER`, `LN`, `LOG10`, `ABS`, `RAND`, `RAND_INTEGER`, `ACOS`, 
`ACOSH`, `ASIN`, `ASINH`, `ATAN`, `ATANH`, `ATAN2`, `SQRT`, `CBRT`, `COS`, 
`COSH`, `COT`, `COTH`, `DEGREES`, `RADIANS`, `ROUND`, `SIGN`, `SIN`, `SINH`, 
`TAN`, `TANH`, `SEC`, `SECH`, `CSC`, `CSCH`, `TRUNCATE`, `PI`, `BITAND`, 
`BITOR`, `BITXOR`
diff --git a/docs/_docs/net-specific/net-linq.adoc 
b/docs/_docs/net-specific/net-linq.adoc
index 006535a7aab..b46d54af2f7 100644
--- a/docs/_docs/net-specific/net-linq.adoc
+++ b/docs/_docs/net-specific/net-linq.adoc
@@ -223,6 +223,7 @@ Below is a list of .NET functions and their SQL equivalents 
that are supported b
 |`String.PadLeft`| `LPAD`
 |`String.PadRight`| `RPAD`
 |`Regex.Replace`| `REGEXP_REPLACE`
+|`Regex.Substr`| `REGEXP_SUBSTR`
 |`Regex.IsMatch`| `REGEXP_LIKE`
 |`Math.Abs`| `ABS`
 |`Math.Acos`| `ACOS`
diff --git a/docs/_docs/sql-reference/string-functions.adoc 
b/docs/_docs/sql-reference/string-functions.adoc
index 187acd2337f..62ed722a9f5 100644
--- a/docs/_docs/sql-reference/string-functions.adoc
+++ b/docs/_docs/sql-reference/string-functions.adoc
@@ -572,6 +572,22 @@ SELECT REGEXP_REPLACE(name, 'w+', 'W', 'i') FROM Players;
 ----
 
 
+== REGEXP_SUBSTR
+
+Returns the substring that matches a regular expression. For details, see the 
Java `Matcher.find()` method.
+
+[source,sql]
+----
+REGEXP_SUBSTR(inputString, regexString[, startInt[, occurrenceInt]])
+----
+
+
+Example:
+----
+SELECT REGEXP_SUBSTR(name, '[A-Za-z]+', 1, 2) FROM Players;
+----
+
+
 == REGEXP_LIKE
 
 Matches string to a regular expression. For details, see the Java 
`Matcher.find()` method. If any parameter is null (except the optional 
`flagsString` parameter), the result is null.
diff --git 
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/exp/RexImpTable.java
 
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/exp/RexImpTable.java
index 8cf84e1fcc5..f1b5802b702 100644
--- 
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/exp/RexImpTable.java
+++ 
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/exec/exp/RexImpTable.java
@@ -121,6 +121,7 @@ import static 
org.apache.calcite.sql.fun.SqlLibraryOperators.LOG;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.MD5;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.MONTHNAME;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE_3;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_SUBSTR;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REPEAT;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REVERSE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.RIGHT;
@@ -456,6 +457,10 @@ public class RexImpTable {
             BuiltInMethod.REGEXP_REPLACE4.method,
             BuiltInMethod.REGEXP_REPLACE5_OCCURRENCE.method,
             BuiltInMethod.REGEXP_REPLACE6.method);
+        defineReflective(REGEXP_SUBSTR,
+            BuiltInMethod.REGEXP_EXTRACT2.method,
+            BuiltInMethod.REGEXP_EXTRACT3.method,
+            BuiltInMethod.REGEXP_EXTRACT4.method);
 
         // Multisets & arrays
         defineMethod(CARDINALITY, BuiltInMethod.COLLECTION_SIZE.method,
diff --git 
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/prepare/IgniteConvertletTable.java
 
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/prepare/IgniteConvertletTable.java
index 08d51500337..b6e27621b54 100644
--- 
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/prepare/IgniteConvertletTable.java
+++ 
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/prepare/IgniteConvertletTable.java
@@ -27,6 +27,7 @@ import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.sql.SqlCall;
 import org.apache.calcite.sql.SqlIntervalQualifier;
+import org.apache.calcite.sql.fun.SqlLibraryOperators;
 import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.calcite.sql.parser.SqlParserPos;
 import org.apache.calcite.sql.type.SqlTypeName;
@@ -36,6 +37,7 @@ import org.apache.calcite.sql2rel.SqlRexContext;
 import org.apache.calcite.sql2rel.SqlRexConvertlet;
 import org.apache.calcite.sql2rel.StandardConvertletTable;
 import 
org.apache.ignite.internal.processors.query.calcite.sql.fun.IgniteOwnSqlOperatorTable;
+import org.apache.ignite.internal.processors.query.calcite.util.Commons;
 import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
@@ -49,6 +51,8 @@ public class IgniteConvertletTable extends 
ReflectiveConvertletTable {
     protected IgniteConvertletTable() {
         // Replace Calcite's convertlet with our own.
         registerOp(SqlStdOperatorTable.TIMESTAMP_DIFF, new 
TimestampDiffConvertlet());
+        registerOp(SqlLibraryOperators.REGEXP_SUBSTR, (cx, call) -> 
cx.getRexBuilder().makeCall(
+            SqlLibraryOperators.REGEXP_SUBSTR, 
Commons.transform(call.getOperandList(), cx::convertExpression)));
 
         addAlias(IgniteOwnSqlOperatorTable.LENGTH, 
SqlStdOperatorTable.CHAR_LENGTH);
     }
diff --git 
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/sql/fun/IgniteStdSqlOperatorTable.java
 
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/sql/fun/IgniteStdSqlOperatorTable.java
index d8e3d047a84..a49e4a161f0 100644
--- 
a/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/sql/fun/IgniteStdSqlOperatorTable.java
+++ 
b/modules/calcite/src/main/java/org/apache/ignite/internal/processors/query/calcite/sql/fun/IgniteStdSqlOperatorTable.java
@@ -230,6 +230,7 @@ public class IgniteStdSqlOperatorTable extends 
ReflectiveSqlOperatorTable {
         register(SqlLibraryOperators.REGEXP_REPLACE_4);
         register(SqlLibraryOperators.REGEXP_REPLACE_5_ORACLE);
         register(SqlLibraryOperators.REGEXP_REPLACE_6);
+        register(SqlLibraryOperators.REGEXP_SUBSTR);
 
         // Collections.
         register(SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR);
diff --git 
a/modules/calcite/src/test/java/org/apache/ignite/internal/processors/query/calcite/integration/StdSqlOperatorsTest.java
 
b/modules/calcite/src/test/java/org/apache/ignite/internal/processors/query/calcite/integration/StdSqlOperatorsTest.java
index 6115d6c8415..72d6feaa335 100644
--- 
a/modules/calcite/src/test/java/org/apache/ignite/internal/processors/query/calcite/integration/StdSqlOperatorsTest.java
+++ 
b/modules/calcite/src/test/java/org/apache/ignite/internal/processors/query/calcite/integration/StdSqlOperatorsTest.java
@@ -275,6 +275,9 @@ public class StdSqlOperatorsTest extends 
AbstractBasicIntegrationTest {
         assertExpression("REGEXP_REPLACE('aAaA', '[Aa]+', 'X', 
2)").returns("aX").check();
         assertExpression("REGEXP_REPLACE('aAbaAcaA', '[Aa]+', 'X', 2, 
2)").returns("aAbXcaA").check();
         assertExpression("REGEXP_REPLACE('aAbaAcaA', 'a', 'X', 2, 0, 
'i')").returns("aXbXXcXX").check();
+        assertExpression("REGEXP_SUBSTR('hello world', 
'l.o')").returns("llo").check();
+        assertExpression("REGEXP_SUBSTR('abc123def456', '[0-9]+', 
7)").returns("456").check();
+        assertExpression("REGEXP_SUBSTR('123-456-789', '[0-9]+', 1, 
3)").returns("789").check();
     }
 
     /** */
diff --git 
a/modules/calcite/src/test/sql/function/string/test_regexp_substr.test 
b/modules/calcite/src/test/sql/function/string/test_regexp_substr.test
new file mode 100644
index 00000000000..785095ef88e
--- /dev/null
+++ b/modules/calcite/src/test/sql/function/string/test_regexp_substr.test
@@ -0,0 +1,215 @@
+# name: test/sql/function/string/test_regexp_substr.test
+# description: REGEXP_SUBSTR test
+# group: [string]
+
+# Basic tests with one argument
+query T
+SELECT REGEXP_SUBSTR('abc123def', '[0-9]+')
+----
+123
+
+query T
+SELECT REGEXP_SUBSTR('hello world', '[a-z]+')
+----
+hello
+
+query T
+SELECT REGEXP_SUBSTR('HELLO', '[a-z]+')
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('', '[0-9]+')
+----
+NULL
+
+# Tests with start position
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 4)
+----
+123
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 7)
+----
+456
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 10)
+----
+456
+
+query T
+SELECT REGEXP_SUBSTR('hello world', '[a-z]+', 7)
+----
+world
+
+# Tests with occurrence
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 1)
+----
+123
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 2)
+----
+456
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 3)
+----
+789
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 4)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 7, 1)
+----
+456
+
+query T
+SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 7, 2)
+----
+789
+
+# Tests with complex regex patterns
+query T
+SELECT REGEXP_SUBSTR('[email protected]', 
'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}')
+----
[email protected]
+
+query T
+SELECT REGEXP_SUBSTR('Contact: [email protected] or [email protected]', 
'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', 1, 2)
+----
[email protected]
+
+query T
+SELECT REGEXP_SUBSTR('price: $19.99, discount: $5.99', '\$\d+\.\d{2}')
+----
+$19.99
+
+query T
+SELECT REGEXP_SUBSTR('The date is 2023-12-25', '\d{4}-\d{2}-\d{2}')
+----
+2023-12-25
+
+query T
+SELECT REGEXP_SUBSTR('file123.txt, file456.doc', '[a-z]+\d+\.[a-z]+', 1, 2)
+----
+file456.doc
+
+# Tests with special characters
+query T
+SELECT REGEXP_SUBSTR('a+b*c?d', '[+*?]')
+----
++
+
+query T
+SELECT REGEXP_SUBSTR('(test) [text] {data}', '[\(\)\[\]\{\}]', 1, 3)
+----
+[
+
+# Edge cases
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'a+', 1, 1)
+----
+aaaa
+
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'a+', 1, 2)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'a', 1, 4)
+----
+a
+
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'a', 1, 5)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('123-456-789', '\d+', 1, 3)
+----
+789
+
+# Tests with NULL values
+query T
+SELECT REGEXP_SUBSTR(NULL, '[0-9]+')
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('abc123', NULL)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('abc123', '[0-9]+', NULL)
+----
+NULL
+
+query T
+SELECT REGEXP_SUBSTR('abc123', '[0-9]+', 1, NULL)
+----
+NULL
+
+# Tests with overlapping matches
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'aa', 1, 1)
+----
+aa
+
+query T
+SELECT REGEXP_SUBSTR('aaaa', 'aa', 1, 2)
+----
+aa
+
+# Tests with word boundaries
+query T
+SELECT REGEXP_SUBSTR('cat concat scat', '\bcat\b', 1, 1)
+----
+cat
+
+query T
+SELECT REGEXP_SUBSTR('cat concat scat', '\bcat\b', 1, 2)
+----
+NULL
+
+# Tests with case sensitivity
+query T
+SELECT REGEXP_SUBSTR('ABCabc', '[a-z]+')
+----
+abc
+
+query T
+SELECT REGEXP_SUBSTR('ABCabc', '[A-Z]+')
+----
+ABC
+
+# Error cases
+statement error
+SELECT REGEXP_SUBSTR('abc')
+
+statement error
+SELECT REGEXP_SUBSTR('abc', '[0-9]+', 'invalid')
+
+statement error
+SELECT REGEXP_SUBSTR('abc', '[0-9]+', 1, 'invalid')
+
+statement error
+SELECT REGEXP_SUBSTR('abc', '[0-9]+', 1, 1, 1)
+
+statement error
+SELECT REGEXP_SUBSTR('abc', '[a-z', 1)
+
+# Performance test with large string
+query T
+SELECT REGEXP_SUBSTR(repeat('abc123', 1000), '[0-9]+', 1, 500)
+----
+123

Reply via email to