This is an automated email from the ASF dual-hosted git repository.
dockerzhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push:
new cc06854292 [INLONG-11060][SDK] Transform support REGEXP_...() related
functions (#11102)
cc06854292 is described below
commit cc06854292e6fc1318135711f669f363018d9f91
Author: emptyOVO <[email protected]>
AuthorDate: Fri Sep 20 17:29:30 2024 +0800
[INLONG-11060][SDK] Transform support REGEXP_...() related functions
(#11102)
---
.../process/function/RegexpCountFunction.java | 71 ++++++++++++++++
.../process/function/RegexpExtractAllFunction.java | 94 ++++++++++++++++++++++
.../process/function/RegexpExtractFunction.java | 84 +++++++++++++++++++
.../transform/process/function/RegexpFunction.java | 65 +++++++++++++++
.../process/function/RegexpInstrFunction.java | 71 ++++++++++++++++
.../process/function/RegexpReplaceFunction.java | 67 +++++++++++++++
.../process/function/RegexpSubstrFunction.java | 70 ++++++++++++++++
.../function/string/TestRegexExtractFunction.java | 91 +++++++++++++++++++++
.../function/string/TestRegexpCountFunction.java | 77 ++++++++++++++++++
.../string/TestRegexpExtractAllFunction.java | 92 +++++++++++++++++++++
.../function/string/TestRegexpFunction.java | 77 ++++++++++++++++++
.../function/string/TestRegexpInstrFunction.java | 84 +++++++++++++++++++
.../function/string/TestRegexpReplaceFunction.java | 65 +++++++++++++++
.../function/string/TestRegexpSubstrFunction.java | 84 +++++++++++++++++++
14 files changed, 1092 insertions(+)
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java
new file mode 100644
index 0000000000..bc7091a0ce
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpCountFunction
+ * description: REGEXP_COUNT(str, regexp)--Returns the number of times str
matches the regexp pattern.
+ * regexp must be a Java regular expression.
+ * Returns an INTEGER representation of the number of matches.
+ * NULL if any of the arguments are NULL or regexp is invalid.
+ */
+@TransformFunction(names = {"regexp_count"})
+public class RegexpCountFunction implements ValueParser {
+
+ private ValueParser inputStringParser;
+
+ private ValueParser patternStringParser;
+
+ public RegexpCountFunction(Function expr) {
+ if (expr.getParameters() != null) {
+ List<Expression> expressions =
expr.getParameters().getExpressions();
+ if (expressions != null && expressions.size() >= 2) {
+ inputStringParser =
OperatorTools.buildParser(expressions.get(0));
+ patternStringParser =
OperatorTools.buildParser(expressions.get(1));
+ }
+ }
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ if (inputStringParser == null || patternStringParser == null) {
+ return null;
+ }
+ String inputString =
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex,
context));
+ String patternString =
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex,
context));
+ Pattern pattern = Pattern.compile(patternString);
+ Matcher matcher = pattern.matcher(inputString);
+ int count = 0;
+ while (matcher.find()) {
+ count++;
+ }
+ return count;
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java
new file mode 100644
index 0000000000..5c825915d3
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpExtractAllFunction
+ * description: REGEXP_EXTRACT_ALL(str, regexp[, extractIndex])--Returns an
ARRAY representation of all the matched substrings.
+ * NULL if any of the arguments are NULL or invalid.Extracts all
the substrings in str that match the regexp
+ * expression and correspond to the regexp group extractIndex.
regexp may contain multiple groups. extractIndex
+ * indicates which regexp group to extract and starts from 1,
also the default value if not specified.
+ * 0 means matching the entire regular expression.
+ * for example: REGEXP_EXTRACT_ALL("abc123def456ghi789", "(\\d+)", 0)--return
[123, 456, 789]
+ * REGEXP_EXTRACT_ALL("Name: John, Age: 25, Location: NY", "Name:
(\\w+), Age: (\\d+), Location: (\\w+)", 1)--return [John]
+ * REGEXP_EXTRACT_ALL("Name: John, Age: 25, Location: NY", "Name:
(\\w+), Age: (\\d+), Location: (\\w+)", 0)--return [Name: John, Age: 25,
Location: NY]
+ */
+@TransformFunction(names = {"regexp_extract_all"})
+public class RegexpExtractAllFunction implements ValueParser {
+
+ private ValueParser inputStringParser;
+
+ private ValueParser patternStringParser;
+
+ private ValueParser indexIntegerParser;
+
+ public RegexpExtractAllFunction(Function expr) {
+ if (expr.getParameters() != null) {
+ List<Expression> expressions =
expr.getParameters().getExpressions();
+ if (expressions != null && expressions.size() >= 2) {
+ inputStringParser =
OperatorTools.buildParser(expressions.get(0));
+ patternStringParser =
OperatorTools.buildParser(expressions.get(1));
+ if (expressions.size() >= 3) {
+ indexIntegerParser =
OperatorTools.buildParser(expressions.get(2));
+ }
+ }
+ }
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ if (inputStringParser == null || patternStringParser == null) {
+ return null;
+ }
+ String inputString =
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex,
context));
+ String patternString =
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex,
context));
+ int index = 0;
+ if (indexIntegerParser != null) {
+ index =
OperatorTools.parseBigDecimal(indexIntegerParser.parse(sourceData, rowIndex,
context)).intValue();
+ }
+ if (index < 0) {
+ return null;
+ }
+ List<String> resultList = new ArrayList<>();
+
+ Pattern pattern = Pattern.compile(patternString);
+ Matcher matcher = pattern.matcher(inputString);
+ while (matcher.find()) {
+ if (index <= matcher.groupCount()) {
+ resultList.add(matcher.group(index));
+ } else {
+ return null;
+ }
+ }
+
+ return resultList.isEmpty() ? null : resultList;
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java
new file mode 100644
index 0000000000..20a2d28925
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpExtractFunction
+ * description: REGEXP_EXTRACT(string1, string2[, integer])--Returns a string
from string1 which extracted with a specified
+ * regular expression string2 and a regexp match group index
integer.The regexp match group index starts
+ * from 1 and 0 means matching the whole regexp. In addition, the
regexp match group index should not exceed
+ * the number of the defined groups.
+ * for example: REGEXP_EXTRACT("abc123def", "(\\d+)", 1)--return 123
+ * REGEXP_EXTRACT("Name: John, Age: 25, Location: NY", "Name:
(\\w+), Age: (\\d+), Location: (\\w+)", 2)--return 25
+ * REGEXP_EXTRACT("abc123def", "(\\d+)", 2)--return null
+ * REGEXP_EXTRACT("abc123def", "abcdef", 1)--return null
+ */
+@TransformFunction(names = {"regexp_extract"})
+public class RegexpExtractFunction implements ValueParser {
+
+ private ValueParser inputStringParser;
+
+ private ValueParser patternStringParser;
+
+ private ValueParser indexIntegerParser;
+
+ public RegexpExtractFunction(Function expr) {
+ if (expr.getParameters() != null) {
+ List<Expression> expressions =
expr.getParameters().getExpressions();
+ if (expressions != null && expressions.size() >= 3) {
+ inputStringParser =
OperatorTools.buildParser(expressions.get(0));
+ patternStringParser =
OperatorTools.buildParser(expressions.get(1));
+ indexIntegerParser =
OperatorTools.buildParser(expressions.get(2));
+ }
+ }
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ if (inputStringParser == null || patternStringParser == null ||
indexIntegerParser == null) {
+ return null;
+ }
+ String inputString =
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex,
context));
+ String patternString =
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex,
context));
+ int indexInteger =
+
OperatorTools.parseBigDecimal(indexIntegerParser.parse(sourceData, rowIndex,
context)).intValue();
+ if (indexInteger < 0) {
+ return null;
+ }
+ Pattern pattern = Pattern.compile(patternString);
+ Matcher matcher = pattern.matcher(inputString);
+ if (matcher.find()) {
+ if (indexInteger <= matcher.groupCount()) {
+ return matcher.group(indexInteger);
+ }
+ }
+ return null;
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java
new file mode 100644
index 0000000000..4e43b5bc9e
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+/**
+ * RegexpFunction
+ * description: REGEX(string1, string2)--Returns TRUE if any (possibly empty)
substring of string1 matches the Java
+ * regular expression string2, otherwise FALSE. Returns NULL if
any of arguments is NULL.
+ * SIMILAR(string1, string2)--Same as above
+ */
+@TransformFunction(names = {"regex", "similar"})
+public class RegexpFunction implements ValueParser {
+
+ private ValueParser inputParser;
+
+ private ValueParser patternParser;
+
+ public RegexpFunction(Function expr) {
+ if (expr.getParameters() != null) {
+ List<Expression> expressions =
expr.getParameters().getExpressions();
+ if (expressions != null && expressions.size() == 2) {
+ inputParser = OperatorTools.buildParser(expressions.get(0));
+ patternParser = OperatorTools.buildParser(expressions.get(1));
+ }
+ }
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ if (inputParser == null || patternParser == null) {
+ return null;
+ }
+ String inputString =
OperatorTools.parseString(inputParser.parse(sourceData, rowIndex, context));
+ String patternString =
OperatorTools.parseString(patternParser.parse(sourceData, rowIndex, context));
+ Pattern pattern = Pattern.compile(patternString);
+ Matcher matcher = pattern.matcher(inputString);
+ return matcher.find();
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java
new file mode 100644
index 0000000000..9cd128e532
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpInstrFunction
+ * description: REGEXP_INSTR(str, regexp)--Returns the position of the first
substring in str that matches regexp.
+ * Result indexes begin at 1, 0 if there is no match.
+ * Returns an INTEGER representation of the first matched
substring index.
+ * NULL if any of the arguments are NULL or regexp is invalid.
+ */
+@TransformFunction(names = {"regexp_instr"})
+public class RegexpInstrFunction implements ValueParser {
+
+ private ValueParser inputStringParser;
+
+ private ValueParser patternStringParser;
+
+ public RegexpInstrFunction(Function expr) {
+ if (expr.getParameters() != null) {
+ List<Expression> expressions =
expr.getParameters().getExpressions();
+ if (expressions != null && expressions.size() >= 2) {
+ inputStringParser =
OperatorTools.buildParser(expressions.get(0));
+ patternStringParser =
OperatorTools.buildParser(expressions.get(1));
+ }
+ }
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ if (inputStringParser == null || patternStringParser == null) {
+ return null;
+ }
+ String inputString =
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex,
context));
+ String patternString =
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex,
context));
+ Pattern pattern = Pattern.compile(patternString);
+ Matcher matcher = pattern.matcher(inputString);
+ if (matcher.find()) {
+ return matcher.start() + 1;
+ } else {
+ return 0;
+ }
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpReplaceFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpReplaceFunction.java
new file mode 100644
index 0000000000..834739b801
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpReplaceFunction.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpReplaceFunction
+ * description: REGEXP_REPLACE(string1, string2, string3)--Returns a string
from STRING1 with all the substrings that
+ * match a regular expression STRING2 consecutively being
replaced with STRING3.
+ */
+@TransformFunction(names = {"regexp_replace"})
+public class RegexpReplaceFunction implements ValueParser {
+
+ private ValueParser inputStringParser;
+
+ private ValueParser patternStringParser;
+
+ private ValueParser replaceStringParser;
+
+ public RegexpReplaceFunction(Function expr) {
+ if (expr.getParameters() != null) {
+ List<Expression> expressions =
expr.getParameters().getExpressions();
+ if (expressions != null && expressions.size() >= 3) {
+ inputStringParser =
OperatorTools.buildParser(expressions.get(0));
+ patternStringParser =
OperatorTools.buildParser(expressions.get(1));
+ replaceStringParser =
OperatorTools.buildParser(expressions.get(2));
+ }
+ }
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ if (inputStringParser == null || patternStringParser == null ||
replaceStringParser == null) {
+ return null;
+ }
+ String inputString =
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex,
context));
+ String patternString =
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex,
context));
+ String replaceString =
OperatorTools.parseString(replaceStringParser.parse(sourceData, rowIndex,
context));
+ Pattern pattern = Pattern.compile(patternString);
+ return pattern.matcher(inputString).replaceAll(replaceString);
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpSubstrFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpSubstrFunction.java
new file mode 100644
index 0000000000..9e2a46af36
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpSubstrFunction.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpSubstrFunction
+ * description: REGEXP_SUBSTR(str, regexp)--Returns the first substring in str
that matches regexp.
+ * Returns an STRING representation of the first matched
substring. NULL if any of the arguments are NULL
+ * or regexp if invalid or pattern is not found.
+ */
+@TransformFunction(names = {"regex_substr"})
+public class RegexpSubstrFunction implements ValueParser {
+
+ private ValueParser inputStringParser;
+
+ private ValueParser patternStringParser;
+
+ public RegexpSubstrFunction(Function expr) {
+ if (expr.getParameters() != null) {
+ List<Expression> expressions =
expr.getParameters().getExpressions();
+ if (expressions != null && expressions.size() >= 2) {
+ inputStringParser =
OperatorTools.buildParser(expressions.get(0));
+ patternStringParser =
OperatorTools.buildParser(expressions.get(1));
+ }
+ }
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ if (inputStringParser == null || patternStringParser == null) {
+ return null;
+ }
+ String inputString =
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex,
context));
+ String patternString =
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex,
context));
+ Pattern pattern = Pattern.compile(patternString);
+ Matcher matcher = pattern.matcher(inputString);
+ if (matcher.find()) {
+ return matcher.group(0);
+ } else {
+ return null;
+ }
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexExtractFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexExtractFunction.java
new file mode 100644
index 0000000000..4749cd3959
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexExtractFunction.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexExtractFunction extends AbstractFunctionStringTestBase {
+
+ @Test
+ public void testRegexpExtractFunction() throws Exception {
+ String transformSql1 = "select
regexp_extract(string1,string2,numeric1) from source";
+ TransformConfig config1 = new TransformConfig(transformSql1);
+ TransformProcessor<String, String> processor1 = TransformProcessor
+ .create(config1,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case1: regexp_extract("abc123def", "(\\d+)", 1)
+ List<String> output1 =
processor1.transform("abc123def|(\\\\d+)|2|1|3|4", new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=123");
+ // case2: regexp_extract("abc123def123", "(\\d+)", 0)
+ List<String> output2 =
processor1.transform("abc123def124|(\\\\d+)|1|0|3", new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=123");
+ // case3: regexp_extract("Name: John, Age: 25, Location: NY", "Name:
(\\w+), Age: (\\d+), Location: (\\w+)", 2)
+ List<String> output3 = processor1.transform(
+ "Name: John, Age: 25, Location: NY|Name: (\\\\w+), Age:
(\\\\d+), Location: (\\\\w+)|1|2|3",
+ new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=25");
+ // case4: regexp_extract("Email: [email protected]",
"([a-zA-Z]+)\\.([a-zA-Z]+)@([a-zA-Z]+)\\.([a-zA-Z]+)",
+ // 3)
+ List<String> output4 = processor1.transform(
+ "Email:
[email protected]|([a-zA-Z]+)\\\\.([a-zA-Z]+)@([a-zA-Z]+)\\\\.([a-zA-Z]+)|1|3|2",
+ new HashMap<>());
+ Assert.assertEquals(1, output4.size());
+ Assert.assertEquals(output4.get(0), "result=example");
+
+ String transformSql2 = "select regexp_extract(string1) from source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case4: regexp_extract("The quick brown fox quick")
+ List<String> output5 =
+ processor2.transform("The quick brown fox
quick|quick|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output5.size());
+ Assert.assertEquals(output5.get(0), "result=");
+ String transformSql3 = "select regexp_extract(string1,string2) from
source";
+ TransformConfig config3 = new TransformConfig(transformSql3);
+ TransformProcessor<String, String> processor3 = TransformProcessor
+ .create(config3,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case5: regexp_extract("The quick brown fox quick", "[q-")
+ List<String> output6 =
+ processor3.transform("The quick brown fox
quick|[q-|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output6.size());
+ PatternSyntaxException exception =
assertThrows(PatternSyntaxException.class, () -> {
+ Pattern.compile("[q-");
+ });
+ assertTrue(exception.getMessage().contains("Illegal character range
near index 3"));
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpCountFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpCountFunction.java
new file mode 100644
index 0000000000..b0c83c86c9
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpCountFunction.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexpCountFunction extends AbstractFunctionStringTestBase {
+
+ @Test
+ public void testRegexpCountFunction() throws Exception {
+ String transformSql1 = "select regexp_count(string1,string2) from
source";
+ TransformConfig config1 = new TransformConfig(transformSql1);
+ TransformProcessor<String, String> processor1 = TransformProcessor
+ .create(config1,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case1: regexp_count("The quick brown fox quick", "quick")
+ List<String> output1 = processor1.transform("The quick brown fox
quick|quick|slow|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=2");
+ // case2: regexp_count("The quick brown fox quick", "slow")
+ List<String> output2 = processor1.transform("The quick brown fox
quick|slow|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=0");
+ String transformSql2 = "select regexp_count(string1) from source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case3: regexp_count("The quick brown fox quick")
+ List<String> output3 =
+ processor2.transform("The quick brown fox
quick|quick|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=");
+ String transformSql3 = "select regexp_count(string1,string2) from
source";
+ TransformConfig config3 = new TransformConfig(transformSql3);
+ TransformProcessor<String, String> processor3 = TransformProcessor
+ .create(config3,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case4: regexp_count("The quick brown fox quick", "[q-")
+ List<String> output4 =
+ processor3.transform("The quick brown fox
quick|[q-|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output4.size());
+ PatternSyntaxException exception =
assertThrows(PatternSyntaxException.class, () -> {
+ Pattern.compile("[q-");
+ });
+ assertTrue(exception.getMessage().contains("Illegal character range
near index 3"));
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpExtractAllFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpExtractAllFunction.java
new file mode 100644
index 0000000000..1774a52d09
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpExtractAllFunction.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexpExtractAllFunction extends
AbstractFunctionStringTestBase {
+
+ @Test
+ public void testRegexpExtractAllFunction() throws Exception {
+ String transformSql1 = "select
regexp_extract_all(string1,string2,numeric1) from source";
+ TransformConfig config1 = new TransformConfig(transformSql1);
+ TransformProcessor<String, String> processor1 = TransformProcessor
+ .create(config1,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case1: regexp_extract_all("abc123def456ghi789", "(\\d+)", 1)
+ List<String> output1 =
processor1.transform("abc123def456ghi789|(\\\\d+)|2|1|3|4", new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=[123, 456, 789]");
+ // case2: regexp_extract_all("abc123def124", "(\\d+)", 0)
+ List<String> output2 =
processor1.transform("abc123def124|(\\\\d+)|1|0|3", new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=[123, 124]");
+ // case3: regexp_extract_all("Name: John, Age: 25, Location: NY",
"Name: (\\w+), Age: (\\d+)
+ // , Location: (\\w+)", 1)
+ List<String> output3 = processor1.transform(
+ "Name: John, Age: 25, Location: NY|Name: (\\\\w+), Age:
(\\\\d+), Location: (\\\\w+)|2|1|3",
+ new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=[John]");
+ // case4: regexp_extract_all("Name: John, Age: 25, Location: NY",
"Name: (\\w+), Age: (\\d+)
+ // , Location: (\\w+)", 4)
+ List<String> output4 = processor1.transform(
+ "Name: John, Age: 25, Location: NY|Name: (\\\\w+), Age:
(\\\\d+), Location: (\\\\w+)|1|4|3",
+ new HashMap<>());
+ Assert.assertEquals(1, output4.size());
+ Assert.assertEquals(output4.get(0), "result=");
+
+ String transformSql2 = "select regexp_extract_all(string1,string2)
from source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case4: regexp_extract_all("The quick brown fox quick",quick)
+ List<String> output5 =
+ processor2.transform("The quick brown fox
quick|quick|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output5.size());
+ Assert.assertEquals(output5.get(0), "result=[quick, quick]");
+ String transformSql3 = "select regexp_extract_all(string1,string2)
from source";
+ TransformConfig config3 = new TransformConfig(transformSql3);
+ TransformProcessor<String, String> processor3 = TransformProcessor
+ .create(config3,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case5: regexp_extract_all("The quick brown fox quick", "[q-")
+ List<String> output6 =
+ processor3.transform("The quick brown fox
quick|[q-|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output6.size());
+ PatternSyntaxException exception =
assertThrows(PatternSyntaxException.class, () -> {
+ Pattern.compile("[q-");
+ });
+ assertTrue(exception.getMessage().contains("Illegal character range
near index 3"));
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpFunction.java
new file mode 100644
index 0000000000..c050689772
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpFunction.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+
+public class TestRegexpFunction extends AbstractFunctionStringTestBase {
+
+ @Test
+ public void testRegexFunction() throws Exception {
+ String transformSql1 = "select regex(string1, string2) from source";
+ TransformConfig config1 = new TransformConfig(transformSql1);
+ TransformProcessor<String, String> processor1 = TransformProcessor
+ .create(config1,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case1: regexp("The quick brown fox", "quick")
+ List<String> output1 = processor1.transform("The quick brown
fox|quick|5|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=true");
+
+ // case2: regexp("The quick brown fox", "cold")
+ List<String> output2 = processor1.transform("The quick brown
fox|cold|5|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=false");
+
+ String transformSql2 = "select regex(string1) from source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case3: regexp("User: Alice, ID: 12345")
+ List<String> output3 =
+ processor2.transform("User: Alice, ID: 12345|User: (\\\\w+),
ID: (\\\\d+)|5|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=");
+
+ String transformSql3 = "select similar(string1, string2) from source";
+ TransformConfig config3 = new TransformConfig(transformSql3);
+ TransformProcessor<String, String> processor3 = TransformProcessor
+ .create(config3,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+
+ // case4: similar("The quick brown fox", "quick")
+ List<String> output4 = processor3.transform("The quick brown
fox|quick|5|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output4.size());
+ Assert.assertEquals(output4.get(0), "result=true");
+
+ // case5: similar("The quick brown fox", "cold")
+ List<String> output5 = processor3.transform("The quick brown
fox|cold|5|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output5.size());
+ Assert.assertEquals(output5.get(0), "result=false");
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpInstrFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpInstrFunction.java
new file mode 100644
index 0000000000..b65a2e9801
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpInstrFunction.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexpInstrFunction extends AbstractFunctionStringTestBase {
+
+ @Test
+ public void testRegexpInstrFunction() throws Exception {
+ String transformSql1 = "select regexp_instr(string1,string2) from
source";
+ TransformConfig config1 = new TransformConfig(transformSql1);
+ TransformProcessor<String, String> processor1 = TransformProcessor
+ .create(config1,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case1: regexp_instr("abc123def", "(\\d+)")
+ List<String> output1 =
processor1.transform("abc123def|(\\\\d+)|2|1|3|4", new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=4");
+ // case2: regexp_instr("hello world!", "world")
+ List<String> output2 = processor1.transform("hello
world!|world|1|0|3", new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=7");
+ // case3: regexp_instr("abcdef", "\\d+")
+ List<String> output3 = processor1.transform(
+ "abcdef|\\\\d+|1|2|3",
+ new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=0");
+
+ String transformSql2 = "select regexp_instr(string1) from source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case4: regexp_instr("The quick brown fox quick")
+ List<String> output5 =
+ processor2.transform("The quick brown fox
quick|quick|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output5.size());
+ Assert.assertEquals(output5.get(0), "result=");
+ String transformSql3 = "select regexp_instr(string1,string2) from
source";
+ TransformConfig config3 = new TransformConfig(transformSql3);
+ TransformProcessor<String, String> processor3 = TransformProcessor
+ .create(config3,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case5: regexp_instr("abc123def", "[q-")
+ List<String> output6 =
+ processor3.transform("abc123def|[q-|QAQ|2|1|3", new
HashMap<>());
+ Assert.assertEquals(1, output6.size());
+ PatternSyntaxException exception =
assertThrows(PatternSyntaxException.class, () -> {
+ Pattern.compile("[q-");
+ });
+ assertTrue(exception.getMessage().contains("Illegal character range
near index 3"));
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpReplaceFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpReplaceFunction.java
new file mode 100644
index 0000000000..08cd9fce93
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpReplaceFunction.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+
+public class TestRegexpReplaceFunction extends AbstractFunctionStringTestBase {
+
+ @Test
+ public void testRegexpReplaceFunction() throws Exception {
+ String transformSql1 = "select regexp_replace(string1,string2,string3)
from source";
+ TransformConfig config1 = new TransformConfig(transformSql1);
+ TransformProcessor<String, String> processor1 = TransformProcessor
+ .create(config1,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case1: regexp_replace("The quick brown fox quick", "quick", "slow")
+ List<String> output1 = processor1.transform("The quick brown fox
quick|quick|slow|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=The slow brown fox slow");
+ String transformSql2 = "select regexp_replace(string1,string2,string3)
from source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case2: regexp_replace("User: Alice, ID: 12345", "\\d+", "QAQ")
+ List<String> output2 =
+ processor2.transform("User: Alice, ID:
12345|\\\\d+|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=User: Alice, ID: QAQ");
+ String transformSql3 = "select regexp_replace(string1,string2) from
source";
+ TransformConfig config3 = new TransformConfig(transformSql3);
+ TransformProcessor<String, String> processor3 = TransformProcessor
+ .create(config3,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case3: regexp_replace("User: Alice, ID: 12345", "\\d+")
+ List<String> output3 =
+ processor3.transform("User: Alice, ID:
12345|\\\\d+|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=");
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpSubstrFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpSubstrFunction.java
new file mode 100644
index 0000000000..eb5cea2176
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpSubstrFunction.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexpSubstrFunction extends AbstractFunctionStringTestBase {
+
+ @Test
+ public void testRegexpSubstrFunction() throws Exception {
+ String transformSql1 = "select regex_substr(string1,string2) from
source";
+ TransformConfig config1 = new TransformConfig(transformSql1);
+ TransformProcessor<String, String> processor1 = TransformProcessor
+ .create(config1,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case1: regex_substr("abc123def", "(\\d+)")
+ List<String> output1 =
processor1.transform("abc123def|(\\\\d+)|2|1|3|4", new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=123");
+ // case2: regex_substr("hello world!", "\\w+")
+ List<String> output2 = processor1.transform("hello
world!|\\\\w+|1|0|3", new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=hello");
+ // case3: regex_substr("abcdef", "\\d+")
+ List<String> output3 = processor1.transform(
+ "abcdef|\\\\d+|1|2|3",
+ new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=");
+
+ String transformSql2 = "select regex_substr(string1) from source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case4: regex_substr("The quick brown fox quick")
+ List<String> output5 =
+ processor2.transform("The quick brown fox
quick|quick|QAQ|2|1|3", new HashMap<>());
+ Assert.assertEquals(1, output5.size());
+ Assert.assertEquals(output5.get(0), "result=");
+ String transformSql3 = "select regex_substr(string1,string2) from
source";
+ TransformConfig config3 = new TransformConfig(transformSql3);
+ TransformProcessor<String, String> processor3 = TransformProcessor
+ .create(config3,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case5: regex_substr("abc123def", "[q-")
+ List<String> output6 =
+ processor3.transform("abc123def|[q-|QAQ|2|1|3", new
HashMap<>());
+ Assert.assertEquals(1, output6.size());
+ PatternSyntaxException exception =
assertThrows(PatternSyntaxException.class, () -> {
+ Pattern.compile("[q-");
+ });
+ assertTrue(exception.getMessage().contains("Illegal character range
near index 3"));
+ }
+}