This is an automated email from the ASF dual-hosted git repository.
aloyszhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push:
new 04971c37e9 [INLONG-10832][SDK] Transform SQL support Translate
function (#10863)
04971c37e9 is described below
commit 04971c37e908771526b2ac7c888cc55badfeff27
Author: Huan Liang <[email protected]>
AuthorDate: Wed Aug 28 16:03:07 2024 +0800
[INLONG-10832][SDK] Transform SQL support Translate function (#10863)
Co-authored-by: AloysZhang <[email protected]>
---
.../process/function/TranslateFunction.java | 109 +++++++++++++++++++++
.../transform/process/operator/OperatorTools.java | 4 +-
.../TestTransformStringFunctionsProcessor.java | 27 +++++
3 files changed, 139 insertions(+), 1 deletion(-)
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TranslateFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TranslateFunction.java
new file mode 100644
index 0000000000..ce05eb0fc3
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TranslateFunction.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * TranslateFunction
+ * translate(expression, find_chars, replace_chars)
+ * Description:
+ * For a given expression, replaces all occurrences of specified characters
with specified substitutes.
+ * Existing characters are mapped to replacement characters by their positions
in the find_chars and replace_chars arguments.
+ * If more characters are specified in the find_chars argument than in the
replace_chars argument, the extra characters from the find_chars argument are
omitted in the return value.
+ *
+ * Translate function is similar to the replace function and the
regexp_replace function,
+ * except that replace substitutes one entire string with another string and
regexp_replace lets you search a string for a regular expression pattern,
+ * while translate makes multiple single-character substitutions.
+ *
+ * Arguments:
+ * expression: The expression to be translated.
+ * find_chars: A string containing the characters to be replaced.
+ * replace_chars: A string containing the characters to substitute.
+ * examples:
+ * case1: translate(email, '@', '.') -> original_expression:
[email protected] target_expression: harry.inlong.com
+ * case2: translate(hello WorD, 'WD', 'wd') -> original_expression: hello
WorD target_expression: hello word
+ */
+public class TranslateFunction implements ValueParser {
+
+ private ValueParser originalStrParser;
+
+ private ValueParser findCharsParser;
+
+ private ValueParser replaceCharsParser;
+
+ public TranslateFunction(Function expr) {
+ List<Expression> expressions = expr.getParameters().getExpressions();
+ originalStrParser = OperatorTools.buildParser(expressions.get(0));
+ findCharsParser = OperatorTools.buildParser(expressions.get(1));
+ replaceCharsParser = OperatorTools.buildParser(expressions.get(2));
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ Object originalStrObject = originalStrParser.parse(sourceData,
rowIndex, context);
+ Object findCharsObject = findCharsParser.parse(sourceData, rowIndex,
context);
+ Object replaceCharsObject = replaceCharsParser.parse(sourceData,
rowIndex, context);
+ String originalStr = OperatorTools.parseString(originalStrObject);
+ String findChars = OperatorTools.parseString(findCharsObject);
+ String replaceChars = OperatorTools.parseString(replaceCharsObject);
+
+ if (originalStr == null) {
+ return "";
+ }
+ StringBuilder builder = null;
+ final int findSize = findChars == null ? 0 : findChars.length();
+ final int replaceSize = replaceChars == null ? 0 :
replaceChars.length();
+ final int commonSize = Math.min(findSize, replaceSize);
+ // Create a map to store character replacements
+ Map<Character, Character> replacementMap = new HashMap<>();
+ for (int i = 0; i < commonSize; i++) {
+ char findChar = findChars.charAt(i);
+ char replaceChar = replaceChars.charAt(i);
+ replacementMap.put(findChar, replaceChar);
+ }
+ for (int i = 0, size = originalStr.length(); i < size; i++) {
+ char ch = originalStr.charAt(i);
+ if (replacementMap.containsKey(ch)) {
+ // Find the index of the current character in findChars,
+ // and replace the character at that index with the character
at the same index in replaceChars.
+ if (builder == null) {
+ builder = new StringBuilder(size);
+ if (i > 0) {
+ builder.append(originalStr, 0, i);
+ }
+ }
+ ch = replacementMap.get(ch);
+ }
+ if (builder != null) {
+ builder.append(ch);
+ }
+ }
+ return builder == null ? originalStr : builder.toString();
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java
index fbd52185e0..e4bfb2cf62 100644
---
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/operator/OperatorTools.java
@@ -59,6 +59,7 @@ import
org.apache.inlong.sdk.transform.process.function.TimestampExtractFunction
import org.apache.inlong.sdk.transform.process.function.ToBase64Function;
import org.apache.inlong.sdk.transform.process.function.ToDateFunction;
import org.apache.inlong.sdk.transform.process.function.ToTimestampFunction;
+import org.apache.inlong.sdk.transform.process.function.TranslateFunction;
import org.apache.inlong.sdk.transform.process.function.TrimFunction;
import org.apache.inlong.sdk.transform.process.function.UnixTimestampFunction;
import org.apache.inlong.sdk.transform.process.function.UpperFunction;
@@ -111,7 +112,7 @@ import java.util.Map;
/**
* OperatorTools
- *
+ *
*/
public class OperatorTools {
@@ -179,6 +180,7 @@ public class OperatorTools {
functionMap.put("right", RightFunction::new);
functionMap.put("timestampadd", TimestampAddFunction::new);
functionMap.put("md5", Md5Function::new);
+ functionMap.put("translate", TranslateFunction::new);
}
public static ExpressionOperator buildOperator(Expression expr) {
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java
index f28e9ac50e..2a47615958 100644
---
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/TestTransformStringFunctionsProcessor.java
@@ -442,4 +442,31 @@ public class TestTransformStringFunctionsProcessor {
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=null", output1.get(0));
}
+
+ @Test
+ public void testTranslateFunction() throws Exception {
+ String transformSql1 = "select translate(string1, string2, string3)
from source";
+ TransformConfig config1 = new TransformConfig(transformSql1);
+ TransformProcessor<String, String> processor1 = TransformProcessor
+ .create(config1,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case1: translate("hello word!", "el", "EL")
+ List<String> output1 = processor1.transform("hello word!|el|EL|2|1|3",
new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=hELLo word!");
+ String transformSql2 = "select translate(string3, string1, string2)
from source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case2: translate("hello word!", "el", "EL")
+ List<String> output2 = processor2.transform("el|EL|hello word!|1|1|3",
new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=hELLo word!");
+ // case3: translate('ApaCHe Inlong', CH, ch)
+ List<String> output3 = processor2.transform("CH|ch|ApaCHe
Inlong|2|1|9", new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=Apache Inlong");
+ }
+
}