This is an automated email from the ASF dual-hosted git repository.
luchunliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push:
new 4ba289dabd [INLONG-11237][SDK] Transform SQL supports CHAR_LENGTH
function (#11275)
4ba289dabd is described below
commit 4ba289dabdb55c203b634fcc00a8d0be0ff92792
Author: Zkplo <[email protected]>
AuthorDate: Thu Oct 10 11:04:11 2024 +0800
[INLONG-11237][SDK] Transform SQL supports CHAR_LENGTH function (#11275)
Co-authored-by: ZKpLo <[email protected]>
---
...LengthFunction.java => CharLengthFunction.java} | 19 ++++++----
.../transform/process/function/LengthFunction.java | 24 ++++++++++--
...thFunction.java => TestCharLengthFunction.java} | 43 +++++++++++++--------
.../function/string/TestCompressFunction.java | 20 +++++-----
.../function/string/TestLengthFunction.java | 44 +++++++++++++++++-----
5 files changed, 104 insertions(+), 46 deletions(-)
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/CharLengthFunction.java
similarity index 73%
copy from
inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
copy to
inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/CharLengthFunction.java
index 0c1abba8f0..a78d16db8f 100644
---
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/CharLengthFunction.java
@@ -22,21 +22,25 @@ import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
+import java.util.List;
+
/**
* LengthFunction
- * description: length(string)
- * - return the length of the string
+ * description: char_length(string)
+ * - return the character length of the string
* - return NULL if the string is NULL
*/
-@TransformFunction(names = {"length"})
-public class LengthFunction implements ValueParser {
+@TransformFunction(names = {"char_length"})
+public class CharLengthFunction implements ValueParser {
private final ValueParser stringParser;
- public LengthFunction(Function expr) {
- stringParser =
OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
+ public CharLengthFunction(Function expr) {
+ List<Expression> expressions = expr.getParameters().getExpressions();
+ stringParser = OperatorTools.buildParser(expressions.get(0));
}
@Override
@@ -45,6 +49,7 @@ public class LengthFunction implements ValueParser {
if (stringObject == null) {
return null;
}
- return OperatorTools.parseString(stringObject).length();
+ String str = OperatorTools.parseString(stringObject);
+ return str.length();
}
}
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
index 0c1abba8f0..e7c7df2a22 100644
---
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/LengthFunction.java
@@ -22,21 +22,31 @@ import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
+import java.nio.charset.Charset;
+import java.util.List;
+
/**
* LengthFunction
- * description: length(string)
- * - return the length of the string
+ * description: length(string,[charsetName])
+ * - return the byte length of the string
* - return NULL if the string is NULL
*/
@TransformFunction(names = {"length"})
public class LengthFunction implements ValueParser {
private final ValueParser stringParser;
+ private ValueParser charSetNameParser;
+ private final Charset DEFAULT_CHARSET = Charset.defaultCharset();
public LengthFunction(Function expr) {
- stringParser =
OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
+ List<Expression> expressions = expr.getParameters().getExpressions();
+ stringParser = OperatorTools.buildParser(expressions.get(0));
+ if (expressions.size() > 1) {
+ charSetNameParser = OperatorTools.buildParser(expressions.get(1));
+ }
}
@Override
@@ -45,6 +55,12 @@ public class LengthFunction implements ValueParser {
if (stringObject == null) {
return null;
}
- return OperatorTools.parseString(stringObject).length();
+ Charset charset = DEFAULT_CHARSET;
+ if (charSetNameParser != null) {
+ charset = Charset.forName(OperatorTools.parseString(
+ charSetNameParser.parse(sourceData, rowIndex, context)));
+ }
+ String str = OperatorTools.parseString(stringObject);
+ return str.getBytes(charset).length;
}
}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCharLengthFunction.java
similarity index 55%
copy from
inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
copy to
inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCharLengthFunction.java
index 7181dcedf9..35a360c6f6 100644
---
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCharLengthFunction.java
@@ -28,28 +28,41 @@ import org.junit.Test;
import java.util.HashMap;
import java.util.List;
-public class TestLengthFunction extends AbstractFunctionStringTestBase {
+public class TestCharLengthFunction extends AbstractFunctionStringTestBase {
@Test
- public void testLengthFunction() throws Exception {
- String transformSql = "select length(string1) from source";
- TransformConfig config = new TransformConfig(transformSql);
- TransformProcessor<String, String> processor1 = TransformProcessor
+ public void testCharLengthFunction() throws Exception {
+ String transformSql = null, data = null;
+ TransformConfig config = null;
+ TransformProcessor<String, String> processor = null;
+ List<String> output = null;
+
+ transformSql = "select char_length(string1) from source";
+ config = new TransformConfig(transformSql);
+ processor = TransformProcessor
.create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
- // case1: length('hello world')
- List<String> output1 = processor1.transform("hello
world|apple|cloud|2|1|3", new HashMap<>());
- Assert.assertEquals(1, output1.size());
- Assert.assertEquals("result=11", output1.get(0));
+ // case1: char_length('hello world')
+ data = "hello world|";
+ output = processor.transform(data, new HashMap<>());
+ Assert.assertEquals(1, output.size());
+ Assert.assertEquals("result=11", output.get(0));
+
+ // case2: char_length('应龙')
+ data = "应龙|";
+ output = processor.transform(data, new HashMap<>());
+ Assert.assertEquals(1, output.size());
+ Assert.assertEquals("result=2", output.get(0));
- transformSql = "select length(xxd) from source";
+ transformSql = "select char_length(xxd) from source";
config = new TransformConfig(transformSql);
- processor1 = TransformProcessor
+ processor = TransformProcessor
.create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
- // case2: length(null)
- output1 = processor1.transform("hello world|apple|cloud|2|1|3", new
HashMap<>());
- Assert.assertEquals(1, output1.size());
- Assert.assertEquals("result=", output1.get(0));
+ // case3: char_length(null)
+ data = "hello world|apple|cloud|2|1|3";
+ output = processor.transform(data, new HashMap<>());
+ Assert.assertEquals(1, output.size());
+ Assert.assertEquals("result=", output.get(0));
}
}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java
index dd01c06ecb..c31867be56 100644
---
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestCompressFunction.java
@@ -32,52 +32,52 @@ public class TestCompressFunction extends
AbstractFunctionStringTestBase {
@Test
public void testCompressFunction() throws Exception {
- String transformSql = "select length(compress(replicate(string1,100)))
from source";
+ String transformSql = "select
length(compress(replicate(string1,100)),'ISO_8859_1') from source";
TransformConfig config = new TransformConfig(transformSql);
TransformProcessor<String, String> processor1 = TransformProcessor
.create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
- // case1: length(compress(replicate(string1,100)))
+ // case1: length(compress(replicate(string1,100)),'ISO_8859_1')
List<String> output1 =
processor1.transform("abcdefghijk|apple|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=33", output1.get(0));
- transformSql = "select length(compress(string1)) from source";
+ transformSql = "select length(compress(string1),'ISO_8859_1') from
source";
config = new TransformConfig(transformSql);
processor1 = TransformProcessor
.create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
- // case2: length(compress(''))
+ // case2: length(compress(''),'ISO_8859_1')
output1 = processor1.transform("|apple|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=0", output1.get(0));
- transformSql = "select length(compress(xxd)) from source";
+ transformSql = "select length(compress(xxd),'ISO_8859_1') from source";
config = new TransformConfig(transformSql);
processor1 = TransformProcessor
.create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
- // case3: length(compress(null))
+ // case3: length(compress(null),'ISO_8859_1')
output1 = processor1.transform("hello world|apple|cloud|2|1|3", new
HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=", output1.get(0));
- transformSql = "select length(compress(string1,string2)) from source";
+ transformSql = "select length(compress(string1,string2),'ISO_8859_1')
from source";
config = new TransformConfig(transformSql);
processor1 = TransformProcessor
.create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
- // case4: length(compress('hello world','Gzip'))
+ // case4: length(compress('hello world','Gzip'),'ISO_8859_1')
output1 = processor1.transform("hello world|Gzip|cloud|2|1|3", new
HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=35", output1.get(0));
- // case5: length(compress('hello world','zip'))
+ // case5: length(compress('hello world','zip'),'ISO_8859_1')
output1 = processor1.transform("hello world|zip|cloud|2|1|3", new
HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=131", output1.get(0));
- // case5: length(compress('hello world','undefinedType'))
+ // case5: length(compress('hello world','undefinedType'),'ISO_8859_1')
output1 = processor1.transform("hello
world|undefinedType|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=", output1.get(0));
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
index 7181dcedf9..a4c87b68e4 100644
---
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestLengthFunction.java
@@ -32,24 +32,48 @@ public class TestLengthFunction extends
AbstractFunctionStringTestBase {
@Test
public void testLengthFunction() throws Exception {
- String transformSql = "select length(string1) from source";
- TransformConfig config = new TransformConfig(transformSql);
- TransformProcessor<String, String> processor1 = TransformProcessor
+ String transformSql = null, data = null;
+ TransformConfig config = null;
+ TransformProcessor<String, String> processor = null;
+ List<String> output = null;
+
+ transformSql = "select length(string1) from source";
+ config = new TransformConfig(transformSql);
+ processor = TransformProcessor
.create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: length('hello world')
- List<String> output1 = processor1.transform("hello
world|apple|cloud|2|1|3", new HashMap<>());
- Assert.assertEquals(1, output1.size());
- Assert.assertEquals("result=11", output1.get(0));
+ data = "hello world|apple|cloud|2|1|3";
+ output = processor.transform(data, new HashMap<>());
+ Assert.assertEquals(1, output.size());
+ Assert.assertEquals("result=11", output.get(0));
transformSql = "select length(xxd) from source";
config = new TransformConfig(transformSql);
- processor1 = TransformProcessor
+ processor = TransformProcessor
.create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case2: length(null)
- output1 = processor1.transform("hello world|apple|cloud|2|1|3", new
HashMap<>());
- Assert.assertEquals(1, output1.size());
- Assert.assertEquals("result=", output1.get(0));
+ data = "hello world|apple|cloud|2|1|3";
+ output = processor.transform(data, new HashMap<>());
+ Assert.assertEquals(1, output.size());
+ Assert.assertEquals("result=", output.get(0));
+
+ transformSql = "select length(string1,string2) from source";
+ config = new TransformConfig(transformSql);
+ processor = TransformProcessor
+ .create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case3: length(应龙, utf-8)
+ data = "应龙|utf-8|cloud|2|1|3";
+ output = processor.transform(data, new HashMap<>());
+ Assert.assertEquals(1, output.size());
+ Assert.assertEquals("result=6", output.get(0));
+
+ // case4: length(应龙, gbk)
+ data = "应龙|gbk|cloud|2|1|3";
+ output = processor.transform(data, new HashMap<>());
+ Assert.assertEquals(1, output.size());
+ Assert.assertEquals("result=4", output.get(0));
}
}