This is an automated email from the ASF dual-hosted git repository.

luchunliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git


The following commit(s) were added to refs/heads/master by this push:
     new 4ba289dabd [INLONG-11237][SDK] Transform SQL supports CHAR_LENGTH 
function (#11275)
4ba289dabd is described below

commit 4ba289dabdb55c203b634fcc00a8d0be0ff92792
Author: Zkplo <[email protected]>
AuthorDate: Thu Oct 10 11:04:11 2024 +0800

    [INLONG-11237][SDK] Transform SQL supports CHAR_LENGTH function (#11275)
    
    Co-authored-by: ZKpLo <[email protected]>
---
 ...LengthFunction.java => CharLengthFunction.java} | 19 ++++++----
 .../transform/process/function/LengthFunction.java | 24 ++++++++++--
 ...thFunction.java => TestCharLengthFunction.java} | 43 +++++++++++++--------
 .../function/string/TestCompressFunction.java      | 20 +++++-----
 .../function/string/TestLengthFunction.java        | 44 +++++++++++++++++-----
 5 files changed, 104 insertions(+), 46 deletions(-)

diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/CharLengthFunction.java
similarity index 73%
copy from 
inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
copy to 
inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/CharLengthFunction.java
index 0c1abba8f0..a78d16db8f 100644
--- 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/CharLengthFunction.java
@@ -22,21 +22,25 @@ import org.apache.inlong.sdk.transform.process.Context;
 import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
 import org.apache.inlong.sdk.transform.process.parser.ValueParser;
 
+import net.sf.jsqlparser.expression.Expression;
 import net.sf.jsqlparser.expression.Function;
 
+import java.util.List;
+
 /**
  * LengthFunction
- * description: length(string)
- * - return the length of the string
+ * description: char_length(string)
+ * - return the character length of the string
  * - return NULL if the string is NULL
  */
-@TransformFunction(names = {"length"})
-public class LengthFunction implements ValueParser {
+@TransformFunction(names = {"char_length"})
+public class CharLengthFunction implements ValueParser {
 
     private final ValueParser stringParser;
 
-    public LengthFunction(Function expr) {
-        stringParser = 
OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
+    public CharLengthFunction(Function expr) {
+        List<Expression> expressions = expr.getParameters().getExpressions();
+        stringParser = OperatorTools.buildParser(expressions.get(0));
     }
 
     @Override
@@ -45,6 +49,7 @@ public class LengthFunction implements ValueParser {
         if (stringObject == null) {
             return null;
         }
-        return OperatorTools.parseString(stringObject).length();
+        String str = OperatorTools.parseString(stringObject);
+        return str.length();
     }
 }
diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
index 0c1abba8f0..e7c7df2a22 100644
--- 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
@@ -22,21 +22,31 @@ import org.apache.inlong.sdk.transform.process.Context;
 import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
 import org.apache.inlong.sdk.transform.process.parser.ValueParser;
 
+import net.sf.jsqlparser.expression.Expression;
 import net.sf.jsqlparser.expression.Function;
 
+import java.nio.charset.Charset;
+import java.util.List;
+
 /**
  * LengthFunction
- * description: length(string)
- * - return the length of the string
+ * description: length(string,[charsetName])
+ * - return the byte length of the string
  * - return NULL if the string is NULL
  */
 @TransformFunction(names = {"length"})
 public class LengthFunction implements ValueParser {
 
     private final ValueParser stringParser;
+    private ValueParser charSetNameParser;
+    private final Charset DEFAULT_CHARSET = Charset.defaultCharset();
 
     public LengthFunction(Function expr) {
-        stringParser = 
OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
+        List<Expression> expressions = expr.getParameters().getExpressions();
+        stringParser = OperatorTools.buildParser(expressions.get(0));
+        if (expressions.size() > 1) {
+            charSetNameParser = OperatorTools.buildParser(expressions.get(1));
+        }
     }
 
     @Override
@@ -45,6 +55,12 @@ public class LengthFunction implements ValueParser {
         if (stringObject == null) {
             return null;
         }
-        return OperatorTools.parseString(stringObject).length();
+        Charset charset = DEFAULT_CHARSET;
+        if (charSetNameParser != null) {
+            charset = Charset.forName(OperatorTools.parseString(
+                    charSetNameParser.parse(sourceData, rowIndex, context)));
+        }
+        String str = OperatorTools.parseString(stringObject);
+        return str.getBytes(charset).length;
     }
 }
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCharLengthFunction.java
similarity index 55%
copy from 
inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
copy to 
inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCharLengthFunction.java
index 7181dcedf9..35a360c6f6 100644
--- 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCharLengthFunction.java
@@ -28,28 +28,41 @@ import org.junit.Test;
 import java.util.HashMap;
 import java.util.List;
 
-public class TestLengthFunction extends AbstractFunctionStringTestBase {
+public class TestCharLengthFunction extends AbstractFunctionStringTestBase {
 
     @Test
-    public void testLengthFunction() throws Exception {
-        String transformSql = "select length(string1) from source";
-        TransformConfig config = new TransformConfig(transformSql);
-        TransformProcessor<String, String> processor1 = TransformProcessor
+    public void testCharLengthFunction() throws Exception {
+        String transformSql = null, data = null;
+        TransformConfig config = null;
+        TransformProcessor<String, String> processor = null;
+        List<String> output = null;
+
+        transformSql = "select char_length(string1) from source";
+        config = new TransformConfig(transformSql);
+        processor = TransformProcessor
                 .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
                         SinkEncoderFactory.createKvEncoder(kvSink));
-        // case1: length('hello world')
-        List<String> output1 = processor1.transform("hello 
world|apple|cloud|2|1|3", new HashMap<>());
-        Assert.assertEquals(1, output1.size());
-        Assert.assertEquals("result=11", output1.get(0));
+        // case1: char_length('hello world')
+        data = "hello world|";
+        output = processor.transform(data, new HashMap<>());
+        Assert.assertEquals(1, output.size());
+        Assert.assertEquals("result=11", output.get(0));
+
+        // case2: char_length('应龙')
+        data = "应龙|";
+        output = processor.transform(data, new HashMap<>());
+        Assert.assertEquals(1, output.size());
+        Assert.assertEquals("result=2", output.get(0));
 
-        transformSql = "select length(xxd) from source";
+        transformSql = "select char_length(xxd) from source";
         config = new TransformConfig(transformSql);
-        processor1 = TransformProcessor
+        processor = TransformProcessor
                 .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
                         SinkEncoderFactory.createKvEncoder(kvSink));
-        // case2: length(null)
-        output1 = processor1.transform("hello world|apple|cloud|2|1|3", new 
HashMap<>());
-        Assert.assertEquals(1, output1.size());
-        Assert.assertEquals("result=", output1.get(0));
+        // case3: char_length(null)
+        data = "hello world|apple|cloud|2|1|3";
+        output = processor.transform(data, new HashMap<>());
+        Assert.assertEquals(1, output.size());
+        Assert.assertEquals("result=", output.get(0));
     }
 }
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java
index dd01c06ecb..c31867be56 100644
--- 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java
@@ -32,52 +32,52 @@ public class TestCompressFunction extends 
AbstractFunctionStringTestBase {
 
     @Test
     public void testCompressFunction() throws Exception {
-        String transformSql = "select length(compress(replicate(string1,100))) 
from source";
+        String transformSql = "select 
length(compress(replicate(string1,100)),'ISO_8859_1') from source";
         TransformConfig config = new TransformConfig(transformSql);
         TransformProcessor<String, String> processor1 = TransformProcessor
                 .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
                         SinkEncoderFactory.createKvEncoder(kvSink));
-        // case1: length(compress(replicate(string1,100)))
+        // case1: length(compress(replicate(string1,100)),'ISO_8859_1')
         List<String> output1 = 
processor1.transform("abcdefghijk|apple|cloud|2|1|3", new HashMap<>());
         Assert.assertEquals(1, output1.size());
         Assert.assertEquals("result=33", output1.get(0));
 
-        transformSql = "select length(compress(string1)) from source";
+        transformSql = "select length(compress(string1),'ISO_8859_1') from 
source";
         config = new TransformConfig(transformSql);
         processor1 = TransformProcessor
                 .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
                         SinkEncoderFactory.createKvEncoder(kvSink));
-        // case2: length(compress(''))
+        // case2: length(compress(''),'ISO_8859_1')
         output1 = processor1.transform("|apple|cloud|2|1|3", new HashMap<>());
         Assert.assertEquals(1, output1.size());
         Assert.assertEquals("result=0", output1.get(0));
 
-        transformSql = "select length(compress(xxd)) from source";
+        transformSql = "select length(compress(xxd),'ISO_8859_1') from source";
         config = new TransformConfig(transformSql);
         processor1 = TransformProcessor
                 .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
                         SinkEncoderFactory.createKvEncoder(kvSink));
-        // case3: length(compress(null))
+        // case3: length(compress(null),'ISO_8859_1')
         output1 = processor1.transform("hello world|apple|cloud|2|1|3", new 
HashMap<>());
         Assert.assertEquals(1, output1.size());
         Assert.assertEquals("result=", output1.get(0));
 
-        transformSql = "select length(compress(string1,string2)) from source";
+        transformSql = "select length(compress(string1,string2),'ISO_8859_1') 
from source";
         config = new TransformConfig(transformSql);
         processor1 = TransformProcessor
                 .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
                         SinkEncoderFactory.createKvEncoder(kvSink));
-        // case4: length(compress('hello world','Gzip'))
+        // case4: length(compress('hello world','Gzip'),'ISO_8859_1')
         output1 = processor1.transform("hello world|Gzip|cloud|2|1|3", new 
HashMap<>());
         Assert.assertEquals(1, output1.size());
         Assert.assertEquals("result=35", output1.get(0));
 
-        // case5: length(compress('hello world','zip'))
+        // case5: length(compress('hello world','zip'),'ISO_8859_1')
         output1 = processor1.transform("hello world|zip|cloud|2|1|3", new 
HashMap<>());
         Assert.assertEquals(1, output1.size());
         Assert.assertEquals("result=131", output1.get(0));
 
-        // case5: length(compress('hello world','undefinedType'))
+        // case5: length(compress('hello world','undefinedType'),'ISO_8859_1')
         output1 = processor1.transform("hello 
world|undefinedType|cloud|2|1|3", new HashMap<>());
         Assert.assertEquals(1, output1.size());
         Assert.assertEquals("result=", output1.get(0));
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
index 7181dcedf9..a4c87b68e4 100644
--- 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
@@ -32,24 +32,48 @@ public class TestLengthFunction extends 
AbstractFunctionStringTestBase {
 
     @Test
     public void testLengthFunction() throws Exception {
-        String transformSql = "select length(string1) from source";
-        TransformConfig config = new TransformConfig(transformSql);
-        TransformProcessor<String, String> processor1 = TransformProcessor
+        String transformSql = null, data = null;
+        TransformConfig config = null;
+        TransformProcessor<String, String> processor = null;
+        List<String> output = null;
+
+        transformSql = "select length(string1) from source";
+        config = new TransformConfig(transformSql);
+        processor = TransformProcessor
                 .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
                         SinkEncoderFactory.createKvEncoder(kvSink));
         // case1: length('hello world')
-        List<String> output1 = processor1.transform("hello 
world|apple|cloud|2|1|3", new HashMap<>());
-        Assert.assertEquals(1, output1.size());
-        Assert.assertEquals("result=11", output1.get(0));
+        data = "hello world|apple|cloud|2|1|3";
+        output = processor.transform(data, new HashMap<>());
+        Assert.assertEquals(1, output.size());
+        Assert.assertEquals("result=11", output.get(0));
 
         transformSql = "select length(xxd) from source";
         config = new TransformConfig(transformSql);
-        processor1 = TransformProcessor
+        processor = TransformProcessor
                 .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
                         SinkEncoderFactory.createKvEncoder(kvSink));
         // case2: length(null)
-        output1 = processor1.transform("hello world|apple|cloud|2|1|3", new 
HashMap<>());
-        Assert.assertEquals(1, output1.size());
-        Assert.assertEquals("result=", output1.get(0));
+        data = "hello world|apple|cloud|2|1|3";
+        output = processor.transform(data, new HashMap<>());
+        Assert.assertEquals(1, output.size());
+        Assert.assertEquals("result=", output.get(0));
+
+        transformSql = "select length(string1,string2) from source";
+        config = new TransformConfig(transformSql);
+        processor = TransformProcessor
+                .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case3: length(应龙, utf-8)
+        data = "应龙|utf-8|cloud|2|1|3";
+        output = processor.transform(data, new HashMap<>());
+        Assert.assertEquals(1, output.size());
+        Assert.assertEquals("result=6", output.get(0));
+
+        // case4: length(应龙, gbk)
+        data = "应龙|gbk|cloud|2|1|3";
+        output = processor.transform(data, new HashMap<>());
+        Assert.assertEquals(1, output.size());
+        Assert.assertEquals("result=4", output.get(0));
     }
 }

Reply via email to