cloud-fan commented on code in PR #45643:
URL: https://github.com/apache/spark/pull/45643#discussion_r1537156582


##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -70,6 +74,148 @@ class CollationStringExpressionsSuite extends QueryTest 
with SharedSparkSession
     })
   }
 
+  test("INSTR check result on non-explicit default collation") {
+    checkEvaluation(StringInstr(Literal("aAads"), Literal("Aa")), 2)
+  }
+
+  test("INSTR check result on explicitly collated strings") {
+    // UTF8_BINARY_LCASE
+    checkEvaluation(StringInstr(Literal.create("aaads", StringType(1)),
+      Literal.create("Aa", StringType(1))), 1)
+    checkEvaluation(StringInstr(Collate(Literal("aaads"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("Aa"), "UTF8_BINARY_LCASE")), 1)
+    // UNICODE
+    checkEvaluation(StringInstr(Literal.create("aaads", StringType(2)),
+      Literal.create("Aa", StringType(2))), 0)
+    checkEvaluation(StringInstr(Collate(Literal("aaads"), "UNICODE"),
+      Collate(Literal("Aa"), "UNICODE")), 0)
+    // UNICODE_CI
+    checkEvaluation(StringInstr(Literal.create("aaads", StringType(3)),
+      Literal.create("de", StringType(3))), 0)
+    checkEvaluation(StringInstr(Collate(Literal("aaads"), "UNICODE_CI"),
+      Collate(Literal("Aa"), "UNICODE_CI")), 0)
+  }
+
+  test("INSTR fail mismatched collation types") {
+    // UNICODE and UNICODE_CI
+    val expr1 = StringInstr(Collate(Literal("aaads"), "UNICODE"),
+      Collate(Literal("Aa"), "UNICODE_CI"))
+    assert(expr1.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UNICODE",
+          "collationNameRight" -> "UNICODE_CI"
+        )
+      )
+    )
+    // DEFAULT(UTF8_BINARY) and UTF8_BINARY_LCASE
+    val expr2 = StringInstr(Literal("aaads"),
+      Collate(Literal("Aa"), "UTF8_BINARY_LCASE"))
+    assert(expr2.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UTF8_BINARY",
+          "collationNameRight" -> "UTF8_BINARY_LCASE"
+        )
+      )
+    )
+    // UTF8_BINARY_LCASE and UNICODE_CI
+    val expr3 = StringInstr(Collate(Literal("aaads"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("Aa"), "UNICODE_CI"))
+    assert(expr3.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UTF8_BINARY_LCASE",
+          "collationNameRight" -> "UNICODE_CI"
+        )
+      )
+    )
+  }
+
+  test("FIND_IN_SET check result on non-explicit default collation") {
+    checkEvaluation(FindInSet(Literal("def"), Literal("abc,b,ab,c,def")), 5)
+    checkEvaluation(FindInSet(Literal("defg"), Literal("abc,b,ab,c,def")), 0)
+  }
+
+  test("FIND_IN_SET check result on explicitly collated strings") {
+    // UTF8_BINARY
+    checkEvaluation(FindInSet(Collate(Literal("a"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("c"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY")), 4)
+    checkEvaluation(FindInSet(Collate(Literal("AB"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("abcd"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY")), 0)
+    // UTF8_BINARY_LCASE
+    checkEvaluation(FindInSet(Collate(Literal("aB"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY_LCASE")), 3)
+    checkEvaluation(FindInSet(Collate(Literal("a"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY_LCASE")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("abc"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("aBc,b,ab,c,def"), "UTF8_BINARY_LCASE")), 1)
+    checkEvaluation(FindInSet(Collate(Literal("abcd"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("aBc,b,ab,c,def"), "UTF8_BINARY_LCASE")), 0)
+    // UNICODE
+    checkEvaluation(FindInSet(Collate(Literal("a"), "UNICODE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("ab"), "UNICODE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE")), 3)
+    checkEvaluation(FindInSet(Collate(Literal("Ab"), "UNICODE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE")), 0)
+    // UNICODE_CI
+    checkEvaluation(FindInSet(Collate(Literal("a"), "UNICODE_CI"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("C"), "UNICODE_CI"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI")), 4)
+    checkEvaluation(FindInSet(Collate(Literal("DeF"), "UNICODE_CI"),
+      Collate(Literal("abc,b,ab,c,dEf"), "UNICODE_CI")), 5)
+    checkEvaluation(FindInSet(Collate(Literal("DEFG"), "UNICODE_CI"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI")), 0)
+  }
+
+  test("FIND_IN_SET fail mismatched collation types") {
+    // UNICODE and UNICODE_CI
+    val expr1 = FindInSet(Collate(Literal("a"), "UNICODE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI"))
+    assert(expr1.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UNICODE",
+          "collationNameRight" -> "UNICODE_CI"
+        )
+      )
+    )
+    // DEFAULT(UTF8_BINARY) and UTF8_BINARY_LCASE
+    val expr2 = FindInSet(Collate(Literal("a"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY_LCASE"))
+    assert(expr2.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UTF8_BINARY",
+          "collationNameRight" -> "UTF8_BINARY_LCASE"
+        )
+      )
+    )
+    // UTF8_BINARY_LCASE and UNICODE_CI
+    val expr3 = FindInSet(Collate(Literal("a"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI"))
+    assert(expr3.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UTF8_BINARY_LCASE",
+          "collationNameRight" -> "UNICODE_CI"
+        )
+      )
+    )
+  }

Review Comment:
   end-to-end tests should go to golden files.



##########
sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala:
##########
@@ -70,6 +74,148 @@ class CollationStringExpressionsSuite extends QueryTest 
with SharedSparkSession
     })
   }
 
+  test("INSTR check result on non-explicit default collation") {
+    checkEvaluation(StringInstr(Literal("aAads"), Literal("Aa")), 2)
+  }
+
+  test("INSTR check result on explicitly collated strings") {
+    // UTF8_BINARY_LCASE
+    checkEvaluation(StringInstr(Literal.create("aaads", StringType(1)),
+      Literal.create("Aa", StringType(1))), 1)
+    checkEvaluation(StringInstr(Collate(Literal("aaads"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("Aa"), "UTF8_BINARY_LCASE")), 1)
+    // UNICODE
+    checkEvaluation(StringInstr(Literal.create("aaads", StringType(2)),
+      Literal.create("Aa", StringType(2))), 0)
+    checkEvaluation(StringInstr(Collate(Literal("aaads"), "UNICODE"),
+      Collate(Literal("Aa"), "UNICODE")), 0)
+    // UNICODE_CI
+    checkEvaluation(StringInstr(Literal.create("aaads", StringType(3)),
+      Literal.create("de", StringType(3))), 0)
+    checkEvaluation(StringInstr(Collate(Literal("aaads"), "UNICODE_CI"),
+      Collate(Literal("Aa"), "UNICODE_CI")), 0)
+  }
+
+  test("INSTR fail mismatched collation types") {
+    // UNICODE and UNICODE_CI
+    val expr1 = StringInstr(Collate(Literal("aaads"), "UNICODE"),
+      Collate(Literal("Aa"), "UNICODE_CI"))
+    assert(expr1.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UNICODE",
+          "collationNameRight" -> "UNICODE_CI"
+        )
+      )
+    )
+    // DEFAULT(UTF8_BINARY) and UTF8_BINARY_LCASE
+    val expr2 = StringInstr(Literal("aaads"),
+      Collate(Literal("Aa"), "UTF8_BINARY_LCASE"))
+    assert(expr2.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UTF8_BINARY",
+          "collationNameRight" -> "UTF8_BINARY_LCASE"
+        )
+      )
+    )
+    // UTF8_BINARY_LCASE and UNICODE_CI
+    val expr3 = StringInstr(Collate(Literal("aaads"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("Aa"), "UNICODE_CI"))
+    assert(expr3.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UTF8_BINARY_LCASE",
+          "collationNameRight" -> "UNICODE_CI"
+        )
+      )
+    )
+  }
+
+  test("FIND_IN_SET check result on non-explicit default collation") {
+    checkEvaluation(FindInSet(Literal("def"), Literal("abc,b,ab,c,def")), 5)
+    checkEvaluation(FindInSet(Literal("defg"), Literal("abc,b,ab,c,def")), 0)
+  }
+
+  test("FIND_IN_SET check result on explicitly collated strings") {
+    // UTF8_BINARY
+    checkEvaluation(FindInSet(Collate(Literal("a"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("c"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY")), 4)
+    checkEvaluation(FindInSet(Collate(Literal("AB"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("abcd"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY")), 0)
+    // UTF8_BINARY_LCASE
+    checkEvaluation(FindInSet(Collate(Literal("aB"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY_LCASE")), 3)
+    checkEvaluation(FindInSet(Collate(Literal("a"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY_LCASE")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("abc"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("aBc,b,ab,c,def"), "UTF8_BINARY_LCASE")), 1)
+    checkEvaluation(FindInSet(Collate(Literal("abcd"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("aBc,b,ab,c,def"), "UTF8_BINARY_LCASE")), 0)
+    // UNICODE
+    checkEvaluation(FindInSet(Collate(Literal("a"), "UNICODE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("ab"), "UNICODE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE")), 3)
+    checkEvaluation(FindInSet(Collate(Literal("Ab"), "UNICODE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE")), 0)
+    // UNICODE_CI
+    checkEvaluation(FindInSet(Collate(Literal("a"), "UNICODE_CI"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI")), 0)
+    checkEvaluation(FindInSet(Collate(Literal("C"), "UNICODE_CI"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI")), 4)
+    checkEvaluation(FindInSet(Collate(Literal("DeF"), "UNICODE_CI"),
+      Collate(Literal("abc,b,ab,c,dEf"), "UNICODE_CI")), 5)
+    checkEvaluation(FindInSet(Collate(Literal("DEFG"), "UNICODE_CI"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI")), 0)
+  }
+
+  test("FIND_IN_SET fail mismatched collation types") {
+    // UNICODE and UNICODE_CI
+    val expr1 = FindInSet(Collate(Literal("a"), "UNICODE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI"))
+    assert(expr1.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UNICODE",
+          "collationNameRight" -> "UNICODE_CI"
+        )
+      )
+    )
+    // DEFAULT(UTF8_BINARY) and UTF8_BINARY_LCASE
+    val expr2 = FindInSet(Collate(Literal("a"), "UTF8_BINARY"),
+      Collate(Literal("abc,b,ab,c,def"), "UTF8_BINARY_LCASE"))
+    assert(expr2.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UTF8_BINARY",
+          "collationNameRight" -> "UTF8_BINARY_LCASE"
+        )
+      )
+    )
+    // UTF8_BINARY_LCASE and UNICODE_CI
+    val expr3 = FindInSet(Collate(Literal("a"), "UTF8_BINARY_LCASE"),
+      Collate(Literal("abc,b,ab,c,def"), "UNICODE_CI"))
+    assert(expr3.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "COLLATION_MISMATCH",
+        messageParameters = Map(
+          "collationNameLeft" -> "UTF8_BINARY_LCASE",
+          "collationNameRight" -> "UNICODE_CI"
+        )
+      )
+    )
+  }

Review Comment:
   end-to-end tests should go to SQL golden files.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to