This is an automated email from the ASF dual-hosted git repository.
luchunliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push:
new 239db8a3c3 [INLONG-11942][Sort] TransformFunction: url_decode supports
specifying character sets (#11947)
239db8a3c3 is described below
commit 239db8a3c31a5739cd8bc4785b21a69b5e5133f1
Author: ChunLiang Lu <[email protected]>
AuthorDate: Thu Jul 24 09:33:46 2025 +0800
[INLONG-11942][Sort] TransformFunction: url_decode supports specifying
character sets (#11947)
---
.../process/function/string/UrlDecodeFunction.java | 32 +++++++++++++----
.../function/string/TestUrlDecodeFunction.java | 41 ++++++++++++++++++++++
2 files changed, 66 insertions(+), 7 deletions(-)
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlDecodeFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlDecodeFunction.java
index ff26ee1bfb..9daf27d02f 100644
---
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlDecodeFunction.java
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlDecodeFunction.java
@@ -24,34 +24,40 @@ import
org.apache.inlong.sdk.transform.process.function.TransformFunction;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
+import java.util.List;
/**
- * UrlDecodeFunction -> url_decode(str)
+ * UrlDecodeFunction -> url_decode(str[, charset])
* description:
* - Return NULL if 'str' is NULL, or there is an issue with the decoding
process(such as encountering an illegal
* escape pattern), or the encoding scheme is not supported;
- * - Return the result of decoding a given 'str' in
'application/x-www-form-urlencoded' format using the UTF-8 encoding scheme.
+ * - Return the result of decoding a given 'str' in
'application/x-www-form-urlencoded' format using the charset(default:UTF-8)
encoding scheme.
*/
@TransformFunction(type = FunctionConstant.STRING_TYPE, names = {
- "url_decode"}, parameter = "(String str)", descriptions = {
+ "url_decode"}, parameter = "(String str[, String charset])",
descriptions = {
"- Return \"\" if 'str' is NULL, or there is an issue with the
decoding process(such as encountering an "
+
"illegal escape pattern), or the encoding scheme is
not supported;",
"- Return the result of decoding a given 'str' in
'application/x-www-form-urlencoded' format using the "
+
- "UTF-8 encoding scheme."
+ "charset(default:UTF-8) encoding scheme."
}, examples = {
-
"url_decode('https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode') =
\"https://apache.inlong.com/search?q=java url encode\""})
+
"url_decode('https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode') =
\"https://apache.inlong.com/search?q=java url encode\"",
+
"url_decode('https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode','UTF-8')
= \"https://apache.inlong.com/search?q=java url encode\""})
public class UrlDecodeFunction implements ValueParser {
private final ValueParser stringParser;
+ private final ValueParser charsetParser;
public UrlDecodeFunction(Function expr) {
- stringParser =
OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
+ List<Expression> params = expr.getParameters().getExpressions();
+ stringParser = OperatorTools.buildParser(params.get(0));
+ charsetParser = params.size() > 1 ?
OperatorTools.buildParser(params.get(1)) : null;
}
@Override
@@ -66,7 +72,19 @@ public class UrlDecodeFunction implements ValueParser {
}
try {
- return URLDecoder.decode(string,
StandardCharsets.UTF_8.toString());
+ if (charsetParser == null) {
+ return URLDecoder.decode(string,
StandardCharsets.UTF_8.toString());
+ } else {
+ Object charsetObj = charsetParser.parse(sourceData, rowIndex,
context);
+ if (charsetObj == null) {
+ return null;
+ }
+ String charset = OperatorTools.parseString(charsetObj);
+ if (charset == null) {
+ return null;
+ }
+ return URLDecoder.decode(string, charset);
+ }
} catch (Exception e) {
return null;
}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlDecodeFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlDecodeFunction.java
index c4a7aac385..045176903c 100644
---
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlDecodeFunction.java
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlDecodeFunction.java
@@ -54,4 +54,45 @@ public class TestUrlDecodeFunction extends
AbstractFunctionStringTestBase {
Assert.assertEquals(1, output2.size());
Assert.assertEquals(output2.get(0), "result=");
}
+
+ @Test
+ public void testUrlDecodeCharsetFunction() throws Exception {
+ String transformSql = "select url_decode(string1,'GBK') from source";
+ TransformConfig config = new TransformConfig(transformSql);
+ TransformProcessor<String, String> processor = TransformProcessor
+ .create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+
+ // case1:
url_decode('A160%3D%C9%C7%CD%B7%CA%D0%26vuserid%3D%26version_build%3D76','GBK')
+ List<String> output1 = processor.transform(
+
"A160%3D%C9%C7%CD%B7%CA%D0%26vuserid%3D%26version_build%3D76|banana|cloud|1",
new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0),
"result=A160=汕头市&vuserid=&version_build=76");
+
+ String transformSql2 = "select url_decode(string1,'UTF-8') from
source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case2:
url_decode('A160%3D%E6%B1%95%E5%A4%B4%E5%B8%82%26vuserid%3D%26version_build%3D76','UTF-8')
+ List<String> output2 = processor2.transform(
+
"A160%3D%E6%B1%95%E5%A4%B4%E5%B8%82%26vuserid%3D%26version_build%3D76|banana|cloud|1",
+ new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0),
"result=A160=汕头市&vuserid=&version_build=76");
+
+ String transformSql3 =
+ "select
json_query(parse_url(url_decode(string1,'GBK'),'QUERY','udf_kv'),'$.vcid') from
source";
+ TransformConfig config3 = new TransformConfig(transformSql3);
+ TransformProcessor<String, String> processor3 = TransformProcessor
+ .create(config3,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case3:
url_decode('A160%3D%C9%C7%CD%B7%CA%D0%26udf_kv%3D%7B%22vcid%22%3A%22%C9%C7%CD%B7%CA%D0%22%7D','GBK')
+ List<String> output3 = processor3.transform(
+ "https%3A%2F%2Fwww.google.com%2Fsearch%3F"
+ +
"A160%3D%C9%C7%CD%B7%CA%D0%26udf_kv%3D%7B%22vcid%22%3A%22%C9%C7%CD%B7%CA%D0%22%7D|banana|cloud|1",
+ new HashMap<>());
+ Assert.assertEquals(1, output3.size());
+ Assert.assertEquals(output3.get(0), "result=汕头市");
+ }
}