[
https://issues.apache.org/jira/browse/FLINK-8301?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16302801#comment-16302801
]
ASF GitHub Bot commented on FLINK-8301:
---------------------------------------
Github user sunjincheng121 commented on a diff in the pull request:
https://github.com/apache/flink/pull/5203#discussion_r158598528
--- Diff:
flink-libraries/flink-table/src/test/scala/org/apache/flink/table/runtime/batch/table/CalcITCase.scala
---
@@ -541,6 +541,48 @@ class CalcITCase(
"default-nosharp,Sunny-nosharp,kevin2-nosharp"
TestBaseUtils.compareResultAsText(results.asJava, expected)
}
+
+ @Test
+ def testDeterministicUDFWithUnicodeParameter(): Unit = {
+ val data = List(
+ ("a\u0001b", "c\"d", "e\\\"\u0004f"),
+ ("x\u0001y", "y\"z", "z\\\"\u0004z")
+ )
+ val env = ExecutionEnvironment.getExecutionEnvironment
+ val tEnv = TableEnvironment.getTableEnvironment(env)
+ val splitUDF = new SplitUDF(deterministic = true)
+ val ds = env.fromCollection(data).toTable(tEnv, 'a, 'b, 'c)
+ .select(splitUDF('a, "\u0001", 0) as 'a,
+ splitUDF('b, "\"", 1) as 'b,
+ splitUDF('c, "\\\"\u0004", 0) as 'c
+ )
+ val results = ds.collect()
+ val expected = List(
+ "a,d,e", "x,z,z"
+ ).mkString("\n")
+ TestBaseUtils.compareResultAsText(results.asJava, expected)
+ }
+
+ @Test
+ def testNonDeterministicUDFWithUnicodeParameter(): Unit = {
+ val data = List(
--- End diff --
Same suggest as above.
> Support Unicode in codegen for SQL && TableAPI
> ----------------------------------------------
>
> Key: FLINK-8301
> URL: https://issues.apache.org/jira/browse/FLINK-8301
> Project: Flink
> Issue Type: Improvement
> Components: Table API & SQL
> Reporter: Ruidong Li
> Assignee: Ruidong Li
>
> The current code generation do not support Unicode, "\u0001" will be
> generated to "\\u0001", function call like concat(str, "\u0001") will lead to
> wrong result.
> This issue intend to handle char/varchar literal correctly, some examples
> followed as below.
> literal: '\u0001abc' -> codegen: "\u0001abc"
> literal: '\u0022\' -> codegen: "\"\\"
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)