This is an automated email from the ASF dual-hosted git repository.
luchunliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push:
new 751f69dad8 [INLONG-11037][SDK] Transform support ENCODE() and DECODE()
function (#11041)
751f69dad8 is described below
commit 751f69dad806dff68ce796399c5e7f0c7dbfdb95
Author: emptyOVO <[email protected]>
AuthorDate: Wed Sep 11 14:13:37 2024 +0800
[INLONG-11037][SDK] Transform support ENCODE() and DECODE() function
(#11041)
* [INLONG-11037][SDK] Transform support ENCODE() and DECODE() function
* fix: add NP check
* fix: clear definition of the specific encoding type
* fix: add description
---
.../transform/process/function/DecodeFunction.java | 95 +++++++++++++++++++++
.../transform/process/function/EncodeFunction.java | 97 ++++++++++++++++++++++
.../function/string/TestDecodeFunction.java | 80 ++++++++++++++++++
.../function/string/TestEncodeFunction.java | 76 +++++++++++++++++
4 files changed, 348 insertions(+)
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java
new file mode 100644
index 0000000000..042e6b4f98
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+/**
+ * DecodeFunction
+ * description: decode(binary, string)
+ * Decode using the supplied character set (' US-ASCII ', 'ISO-8859-1',
'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+ * If either parameter is empty, the result will also be empty.
+ */
+@TransformFunction(names = {"decode"})
+public class DecodeFunction implements ValueParser {
+
+ private ValueParser binaryParser;
+
+ private ValueParser characterSetParser;
+
+ private static final Set<String> SUPPORTED_CHARSETS;
+
+ static {
+ Set<String> charsets = new HashSet<>();
+ charsets.add(StandardCharsets.US_ASCII.name());
+ charsets.add(StandardCharsets.ISO_8859_1.name());
+ charsets.add(StandardCharsets.UTF_8.name());
+ charsets.add(StandardCharsets.UTF_16.name());
+ charsets.add(StandardCharsets.UTF_16BE.name());
+ charsets.add(StandardCharsets.UTF_16LE.name());
+ SUPPORTED_CHARSETS = Collections.unmodifiableSet(charsets);
+ }
+
+ public DecodeFunction(Function expr) {
+ List<Expression> expressions = expr.getParameters().getExpressions();
+ if (expressions != null && expressions.size() == 2) {
+ binaryParser = OperatorTools.buildParser(expressions.get(0));
+ characterSetParser = OperatorTools.buildParser(expressions.get(1));
+ }
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ Object binaryObj = binaryParser.parse(sourceData, rowIndex, context);
+ Object characterObj = characterSetParser.parse(sourceData, rowIndex,
context);
+ if (binaryObj == null || characterObj == null) {
+ return null;
+ }
+ String binaryString = OperatorTools.parseString(binaryObj);
+ String characterSetValue =
OperatorTools.parseString(characterObj).toUpperCase();
+ return decode(binaryString, characterSetValue);
+ }
+
+ private String decode(String binaryString, String charsetName) {
+ if (binaryString == null || binaryString.isEmpty() || charsetName ==
null || charsetName.isEmpty()) {
+ return "";
+ }
+ String[] byteValues = binaryString.split(" ");
+ byte[] byteArray = new byte[byteValues.length];
+ for (int i = 0; i < byteValues.length; i++) {
+ byteArray[i] = (byte) Integer.parseInt(byteValues[i]);
+ }
+ if (Charset.isSupported(charsetName) &&
SUPPORTED_CHARSETS.contains(charsetName)) {
+ Charset charset = Charset.forName(charsetName);
+ return new String(byteArray, charset);
+ }
+ return "";
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java
new file mode 100644
index 0000000000..8196c529fc
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+/**
+ * EncodeFunction
+ * description: encode(string1, string2)
+ * Encode using the provided character set (' US-ASCII ', 'ISO-8859-1',
'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+ * If either parameter is empty, the result will also be empty.
+ */
+@TransformFunction(names = {"encode"})
+public class EncodeFunction implements ValueParser {
+
+ private ValueParser stringParser;
+
+ private ValueParser characterSetParser;
+
+ private static final Set<String> SUPPORTED_CHARSETS;
+
+ static {
+ Set<String> charsets = new HashSet<>();
+ charsets.add(StandardCharsets.US_ASCII.name());
+ charsets.add(StandardCharsets.ISO_8859_1.name());
+ charsets.add(StandardCharsets.UTF_8.name());
+ charsets.add(StandardCharsets.UTF_16.name());
+ charsets.add(StandardCharsets.UTF_16BE.name());
+ charsets.add(StandardCharsets.UTF_16LE.name());
+ SUPPORTED_CHARSETS = Collections.unmodifiableSet(charsets);
+ }
+
+ public EncodeFunction(Function expr) {
+ List<Expression> expressions = expr.getParameters().getExpressions();
+ if (expressions != null && expressions.size() == 2) {
+ stringParser = OperatorTools.buildParser(expressions.get(0));
+ characterSetParser = OperatorTools.buildParser(expressions.get(1));
+ }
+ }
+
+ @Override
+ public Object parse(SourceData sourceData, int rowIndex, Context context) {
+ Object stringObj = stringParser.parse(sourceData, rowIndex, context);
+ Object characterObj = characterSetParser.parse(sourceData, rowIndex,
context);
+ if (stringObj == null || characterObj == null) {
+ return null;
+ }
+ String stringValue = OperatorTools.parseString(stringObj);
+ String characterSetValue =
OperatorTools.parseString(characterObj).toUpperCase();
+ byte[] encodeBytes = encode(stringValue, characterSetValue);
+ StringBuilder res = new StringBuilder();
+ if (encodeBytes != null) {
+ for (byte encodeByte : encodeBytes) {
+ res.append((int) encodeByte).append(" ");
+ }
+ }
+ return res.toString().trim();
+ }
+
+ private byte[] encode(String stringValue, String characterSetValue) {
+ if (stringValue == null || stringValue.isEmpty() || characterSetValue
== null || characterSetValue.isEmpty()) {
+ return new byte[0];
+ }
+ if (Charset.isSupported(characterSetValue) &&
SUPPORTED_CHARSETS.contains(characterSetValue)) {
+ Charset charset = Charset.forName(characterSetValue);
+ return stringValue.getBytes(charset);
+ }
+ return null;
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java
new file mode 100644
index 0000000000..4368334b79
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+
+public class TestDecodeFunction extends AbstractFunctionStringTestBase {
+
+ @Test
+ public void testDecodeFunction() throws Exception {
+ String transformSql = "select decode(string1,string2) from source";
+ TransformConfig config = new TransformConfig(transformSql);
+ TransformProcessor<String, String> processor = TransformProcessor
+ .create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+
+ // case1: decode('72 101 108 108 111','UTF-8')
+ List<String> output1 = processor.transform("72 101 108 108
111|UTF-8|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=Hello");
+
+ // case2: decode('72 101 108 108 111','US-ASCII')
+ List<String> output2 = processor.transform("72 101 108 108
111|US-ASCII|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=Hello");
+
+ // case3: decode('72 101 108 108 111','ISO-8859-1')
+ List<String> output3 = processor.transform("72 101 108 108
111|ISO-8859-1|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=Hello");
+
+ // case4: decode('0 72 0 101 0 108 0 108 0 111','UTF-16BE')
+ List<String> output4 =
+ processor.transform("0 72 0 101 0 108 0 108 0
111|UTF-16BE|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output4.size());
+ Assert.assertEquals(output4.get(0), "result=Hello");
+
+ // case5: decode('72 0 101 0 108 0 108 0 111 0','UTF-16LE')
+ List<String> output5 =
+ processor.transform("72 0 101 0 108 0 108 0 111
0|UTf-16LE|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output5.size());
+ Assert.assertEquals(output5.get(0), "result=Hello");
+
+ // case6: decode('-2 -1 0 72 0 101 0 108 0 108 0 111','UTF-16')
+ List<String> output6 =
+ processor.transform("-2 -1 0 72 0 101 0 108 0 108 0
111|UtF-16|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output6.size());
+ Assert.assertEquals(output6.get(0), "result=Hello");
+
+ // case7: decode('-2 -1 0 72 0 101 0 108 0 108 0 111','UTF-16--')
+ List<String> output7 =
+ processor.transform("-2 -1 0 72 0 101 0 108 0 108 0
111|UTF-16--|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output7.size());
+ Assert.assertEquals(output7.get(0), "result=");
+ }
+}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java
new file mode 100644
index 0000000000..73ff2f4876
--- /dev/null
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+
+public class TestEncodeFunction extends AbstractFunctionStringTestBase {
+
+ @Test
+ public void testEncodeFunction() throws Exception {
+ String transformSql = "select encode(string1,string2) from source";
+ TransformConfig config = new TransformConfig(transformSql);
+ TransformProcessor<String, String> processor = TransformProcessor
+ .create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+
+ // case1: encode('Hello','UTF-8')
+ List<String> output1 =
processor.transform("Hello|UTF-8|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0), "result=72 101 108 108 111");
+
+ // case2: encode('Hello','US-ASCII')
+ List<String> output2 =
processor.transform("Hello|US-ASCII|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0), "result=72 101 108 108 111");
+
+ // case3: encode('Hello','ISO-8859-1')
+ List<String> output3 =
processor.transform("Hello|ISO-8859-1|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=72 101 108 108 111");
+
+ // case4: encode('Hello','UTF-16BE')
+ List<String> output4 =
processor.transform("Hello|UTF-16BE|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output4.size());
+ Assert.assertEquals(output4.get(0), "result=0 72 0 101 0 108 0 108 0
111");
+
+ // case5: encode('Hello','UTF-16LE')
+ List<String> output5 =
processor.transform("Hello|UTf-16LE|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output5.size());
+ Assert.assertEquals(output5.get(0), "result=72 0 101 0 108 0 108 0 111
0");
+
+ // case6: encode('Hello','UTF-16')
+ List<String> output6 =
processor.transform("Hello|UtF-16|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output6.size());
+ Assert.assertEquals(output6.get(0), "result=-2 -1 0 72 0 101 0 108 0
108 0 111");
+
+ // case7: encode('Hello','UTF-16--')
+ List<String> output7 =
processor.transform("Hello|UTF-16--|banana|cloud|1", new HashMap<>());
+ Assert.assertEquals(1, output7.size());
+ Assert.assertEquals(output7.get(0), "result=");
+ }
+}