This is an automated email from the ASF dual-hosted git repository.

luchunliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git


The following commit(s) were added to refs/heads/master by this push:
     new 239db8a3c3 [INLONG-11942][Sort] TransformFunction: url_decode supports 
specifying character sets (#11947)
239db8a3c3 is described below

commit 239db8a3c31a5739cd8bc4785b21a69b5e5133f1
Author: ChunLiang Lu <[email protected]>
AuthorDate: Thu Jul 24 09:33:46 2025 +0800

    [INLONG-11942][Sort] TransformFunction: url_decode supports specifying 
character sets (#11947)
---
 .../process/function/string/UrlDecodeFunction.java | 32 +++++++++++++----
 .../function/string/TestUrlDecodeFunction.java     | 41 ++++++++++++++++++++++
 2 files changed, 66 insertions(+), 7 deletions(-)

diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlDecodeFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlDecodeFunction.java
index ff26ee1bfb..9daf27d02f 100644
--- 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlDecodeFunction.java
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlDecodeFunction.java
@@ -24,34 +24,40 @@ import 
org.apache.inlong.sdk.transform.process.function.TransformFunction;
 import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
 import org.apache.inlong.sdk.transform.process.parser.ValueParser;
 
+import net.sf.jsqlparser.expression.Expression;
 import net.sf.jsqlparser.expression.Function;
 
 import java.net.URLDecoder;
 import java.nio.charset.StandardCharsets;
+import java.util.List;
 
 /**
- * UrlDecodeFunction  ->  url_decode(str)
+ * UrlDecodeFunction  ->  url_decode(str[, charset])
  * description:
  * - Return NULL if 'str' is NULL, or there is an issue with the decoding 
process(such as encountering an illegal
  *          escape pattern), or the encoding scheme is not supported;
- * - Return the result of decoding a given 'str' in 
'application/x-www-form-urlencoded' format using the UTF-8 encoding scheme.
+ * - Return the result of decoding a given 'str' in 
'application/x-www-form-urlencoded' format using the charset(default:UTF-8) 
encoding scheme.
  */
 @TransformFunction(type = FunctionConstant.STRING_TYPE, names = {
-        "url_decode"}, parameter = "(String str)", descriptions = {
+        "url_decode"}, parameter = "(String str[, String charset])", 
descriptions = {
                 "- Return \"\" if 'str' is NULL, or there is an issue with the 
decoding process(such as encountering an "
                         +
                         "illegal escape pattern), or the encoding scheme is 
not supported;",
                 "- Return the result of decoding a given 'str' in 
'application/x-www-form-urlencoded' format using the "
                         +
-                        "UTF-8 encoding scheme."
+                        "charset(default:UTF-8) encoding scheme."
         }, examples = {
-                
"url_decode('https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode') = 
\"https://apache.inlong.com/search?q=java url encode\""})
+                
"url_decode('https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode') = 
\"https://apache.inlong.com/search?q=java url encode\"",
+                
"url_decode('https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode','UTF-8')
 = \"https://apache.inlong.com/search?q=java url encode\""})
 public class UrlDecodeFunction implements ValueParser {
 
     private final ValueParser stringParser;
+    private final ValueParser charsetParser;
 
     public UrlDecodeFunction(Function expr) {
-        stringParser = 
OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
+        List<Expression> params = expr.getParameters().getExpressions();
+        stringParser = OperatorTools.buildParser(params.get(0));
+        charsetParser = params.size() > 1 ? 
OperatorTools.buildParser(params.get(1)) : null;
     }
 
     @Override
@@ -66,7 +72,19 @@ public class UrlDecodeFunction implements ValueParser {
         }
 
         try {
-            return URLDecoder.decode(string, 
StandardCharsets.UTF_8.toString());
+            if (charsetParser == null) {
+                return URLDecoder.decode(string, 
StandardCharsets.UTF_8.toString());
+            } else {
+                Object charsetObj = charsetParser.parse(sourceData, rowIndex, 
context);
+                if (charsetObj == null) {
+                    return null;
+                }
+                String charset = OperatorTools.parseString(charsetObj);
+                if (charset == null) {
+                    return null;
+                }
+                return URLDecoder.decode(string, charset);
+            }
         } catch (Exception e) {
             return null;
         }
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlDecodeFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlDecodeFunction.java
index c4a7aac385..045176903c 100644
--- 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlDecodeFunction.java
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlDecodeFunction.java
@@ -54,4 +54,45 @@ public class TestUrlDecodeFunction extends 
AbstractFunctionStringTestBase {
         Assert.assertEquals(1, output2.size());
         Assert.assertEquals(output2.get(0), "result=");
     }
+
+    @Test
+    public void testUrlDecodeCharsetFunction() throws Exception {
+        String transformSql = "select url_decode(string1,'GBK') from source";
+        TransformConfig config = new TransformConfig(transformSql);
+        TransformProcessor<String, String> processor = TransformProcessor
+                .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+
+        // case1: 
url_decode('A160%3D%C9%C7%CD%B7%CA%D0%26vuserid%3D%26version_build%3D76','GBK')
+        List<String> output1 = processor.transform(
+                
"A160%3D%C9%C7%CD%B7%CA%D0%26vuserid%3D%26version_build%3D76|banana|cloud|1", 
new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), 
"result=A160=汕头市&vuserid=&version_build=76");
+
+        String transformSql2 = "select url_decode(string1,'UTF-8') from 
source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case2: 
url_decode('A160%3D%E6%B1%95%E5%A4%B4%E5%B8%82%26vuserid%3D%26version_build%3D76','UTF-8')
+        List<String> output2 = processor2.transform(
+                
"A160%3D%E6%B1%95%E5%A4%B4%E5%B8%82%26vuserid%3D%26version_build%3D76|banana|cloud|1",
+                new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), 
"result=A160=汕头市&vuserid=&version_build=76");
+
+        String transformSql3 =
+                "select 
json_query(parse_url(url_decode(string1,'GBK'),'QUERY','udf_kv'),'$.vcid') from 
source";
+        TransformConfig config3 = new TransformConfig(transformSql3);
+        TransformProcessor<String, String> processor3 = TransformProcessor
+                .create(config3, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case3: 
url_decode('A160%3D%C9%C7%CD%B7%CA%D0%26udf_kv%3D%7B%22vcid%22%3A%22%C9%C7%CD%B7%CA%D0%22%7D','GBK')
+        List<String> output3 = processor3.transform(
+                "https%3A%2F%2Fwww.google.com%2Fsearch%3F"
+                        + 
"A160%3D%C9%C7%CD%B7%CA%D0%26udf_kv%3D%7B%22vcid%22%3A%22%C9%C7%CD%B7%CA%D0%22%7D|banana|cloud|1",
+                new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=汕头市");
+    }
 }

Reply via email to