uros-db commented on code in PR #47503:
URL: https://github.com/apache/spark/pull/47503#discussion_r1698279561


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala:
##########
@@ -2319,6 +2319,57 @@ class CollationSQLExpressionsSuite
     )
   }
 
+  // scalastyle:off nonascii
+  test("approx_count_distinct returns correct with collation") {
+
+    case class ACDTestCase(
+      collation: String,
+      input: Seq[String],
+      output: Seq[Row]
+    )
+
+    val testCases = Seq(
+      ACDTestCase("utf8_lcase", Seq("a", "a", "A"), Seq(Row(1))),
+      ACDTestCase("utf8_lcase", Seq("aCfew", "acFEw", "ACFEW"), Seq(Row(1))),
+      ACDTestCase("utf8_lcase", Seq("a"), Seq(Row(1))),
+      ACDTestCase("utf8_lcase", Seq("B"), Seq(Row(1))),
+      ACDTestCase("utf8_lcase", Seq("aCfew", "aCfew", "aCfew", "aCfew", 
"aCfew"), Seq(Row(1))),
+      ACDTestCase("utf8_lcase", Seq("Aaa", "AAA", "AAA", "aAa", "aAA", "aaa"), 
Seq(Row(1))),
+      ACDTestCase("UNICODE_CI_AI", Seq("č", "c", "Ć", "C", "Z", "Ž"), 
Seq(Row(2))),
+      ACDTestCase("UNICODE_CI_AI", Seq("Ž"), Seq(Row(1))),
+      ACDTestCase("UNICODE_CI_AI", Seq("z"), Seq(Row(1))),
+      ACDTestCase("UNICODE_CI_AI", Seq("fAFfewfćČĆfečwćCsŠ", 
"fAffEWfćČCfeCwcćss"), Seq(Row(1))),
+      ACDTestCase("UNICODE_CI_AI", Seq("fAFfe", "fAFfe", "fAFfe", "fAFfe", 
"fAFfe"), Seq(Row(1))),
+      ACDTestCase("UNICODE_CI_AI", Seq("fAcfewfćČĆfečwćCsŠ", 
"fAffEWfćČCfeCwcćss"), Seq(Row(2))),
+      ACDTestCase("UNICODE_CI_AI", Seq("Ćcc", "CCc", "CCC", "ĆĆČ", "CČĆ", 
"ČcĆ", "Ćčč", "ČČČ",
+        "CĆC", "ććć", "ccc"), Seq(Row(1))),
+      ACDTestCase("UNICODE_CI_AI", Seq("sŠŠ", "SŠS", "SŠS", "ššS", "ššs", 
"SšS", "ššš", "ŠŠs",
+        "ŠsŠ", "ssŠ", "SSS"), Seq(Row(1))),
+      ACDTestCase("UNICODE_CI_AI", Seq("zŽŽ", "ŽZŽ", "ZŽZ", "ZZZ", "ZžZ", 
"žžž", "žžZ", "ZžZ",
+        "ŽzZ", "zZZ", "ZZZ"), Seq(Row(1))),
+      ACDTestCase("UNICODE_CI_AI", Seq("zŽŽ", "ŽZŽ", "ZŽZ", "ZZZ", "ZžZ", 
"žžž", "žžZ", "ZžZ",
+        "ŽzZ", "zZZ", "ZZZ", "ZZŠ"), Seq(Row(2)))
+    )
+
+    testCases.foreach( t => {
+      val insertQuery = s"INSERT INTO t VALUES ${t.input.map(s => 
s"'${s}'").mkString(", ") }"
+      val testQuery = "SELECT approx_count_distinct(s) FROM t"
+
+      sql(s"CREATE TABLE t(s string collate ${t.collation})")
+      sql(insertQuery)
+      checkAnswer(sql(testQuery), t.output)
+      sql("DROP TABLE t")

Review Comment:
   to avoid worrying about stuff like this, please see `withTable` (for future 
reference):
   
https://github.com/apache/spark/blob/0e07873d368fa17dfff11e28fd45531f1f388864/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala#L305
   
   however, in these tests - let's try to avoid creating the table



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to