HyukjinKwon commented on code in PR #45819:
URL: https://github.com/apache/spark/pull/45819#discussion_r1548777557
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala:
##########
@@ -20,421 +20,406 @@ package org.apache.spark.sql
import scala.collection.immutable.Seq
import org.apache.spark.SparkConf
-import org.apache.spark.sql.catalyst.ExtendedAnalysisException
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StringType
-class CollationRegexpExpressionsSuite extends QueryTest with
SharedSparkSession {
+class CollationRegexpExpressionsSuite
+ extends QueryTest
+ with SharedSparkSession
+ with ExpressionEvalHelper {
case class CollationTestCase[R](s1: String, s2: String, collation: String,
expectedResult: R)
case class CollationTestFail[R](s1: String, s2: String, collation: String)
test("Support Like string expression with Collation") {
+ def prepareLike(input: String,
+ regExp: String,
+ collation: String): Expression = {
+ val collationId = CollationFactory.collationNameToId(collation)
+ val inputExpr = Literal.create(input, StringType(collationId))
+ val regExpExpr = Literal.create(regExp, StringType(collationId))
+ Like(inputExpr, regExpExpr, '\\')
+ }
// Supported collations
val checks = Seq(
CollationTestCase("ABC", "%B%", "UTF8_BINARY", true)
)
- checks.foreach(ct => {
- checkAnswer(sql(s"SELECT collate('${ct.s1}', '${ct.collation}') like " +
- s"collate('${ct.s2}', '${ct.collation}')"), Row(ct.expectedResult))
- })
+ checks.foreach(ct =>
+ checkEvaluation(prepareLike(ct.s1, ct.s2, ct.collation),
ct.expectedResult))
// Unsupported collations
val fails = Seq(
- CollationTestCase("ABC", "%b%", "UTF8_BINARY_LCASE", false),
- CollationTestCase("ABC", "%B%", "UNICODE", true),
- CollationTestCase("ABC", "%b%", "UNICODE_CI", false)
- )
- fails.foreach(ct => {
- checkError(
- exception = intercept[ExtendedAnalysisException] {
- sql(s"SELECT collate('${ct.s1}', '${ct.collation}') like " +
- s"collate('${ct.s2}', '${ct.collation}')")
- },
- errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- sqlState = "42K09",
- parameters = Map(
- "sqlExpr" -> s"\"collate(${ct.s1}) LIKE collate(${ct.s2})\"",
- "paramIndex" -> "first",
- "inputSql" -> s"\"collate(${ct.s1})\"",
- "inputType" -> s"\"STRING COLLATE ${ct.collation}\"",
- "requiredType" -> "\"STRING\""
- ),
- context = ExpectedContext(
- fragment = s"like collate('${ct.s2}', '${ct.collation}')",
- start = 26 + ct.collation.length,
- stop = 48 + 2 * ct.collation.length
+ CollationTestFail("ABC", "%b%", "UTF8_BINARY_LCASE"),
+ CollationTestFail("ABC", "%B%", "UNICODE"),
+ CollationTestFail("ABC", "%b%", "UNICODE_CI")
+ )
+ fails.foreach(ct =>
+ assert(prepareLike(ct.s1, ct.s2, ct.collation)
+ .checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "UNEXPECTED_INPUT_TYPE",
+ messageParameters = Map(
+ "paramIndex" -> "first",
+ "requiredType" -> """"STRING"""",
+ "inputSql" -> s""""'${ct.s1}' collate ${ct.collation}"""",
+ "inputType" -> s""""STRING COLLATE ${ct.collation}""""
+ )
)
)
- })
+ )
}
test("Support ILike string expression with Collation") {
+ def prepareILike(input: String,
+ regExp: String,
+ collation: String): Expression = {
+ val collationId = CollationFactory.collationNameToId(collation)
+ val inputExpr = Literal.create(input, StringType(collationId))
+ val regExpExpr = Literal.create(regExp, StringType(collationId))
+ ILike(inputExpr, regExpExpr, '\\').replacement
+ }
+
// Supported collations
val checks = Seq(
CollationTestCase("ABC", "%b%", "UTF8_BINARY", true)
)
- checks.foreach(ct => {
- checkAnswer(sql(s"SELECT collate('${ct.s1}', '${ct.collation}') ilike " +
- s"collate('${ct.s2}', '${ct.collation}')"), Row(ct.expectedResult))
- })
+ checks.foreach(ct =>
+ checkEvaluation(prepareILike(ct.s1, ct.s2, ct.collation),
ct.expectedResult)
+ )
// Unsupported collations
val fails = Seq(
- CollationTestCase("ABC", "%b%", "UTF8_BINARY_LCASE", false),
- CollationTestCase("ABC", "%b%", "UNICODE", true),
- CollationTestCase("ABC", "%b%", "UNICODE_CI", false)
- )
- fails.foreach(ct => {
- checkError(
- exception = intercept[ExtendedAnalysisException] {
- sql(s"SELECT collate('${ct.s1}', '${ct.collation}') ilike " +
- s"collate('${ct.s2}', '${ct.collation}')")
- },
- errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- sqlState = "42K09",
- parameters = Map(
- "sqlExpr" -> s"\"ilike(collate(${ct.s1}), collate(${ct.s2}))\"",
- "paramIndex" -> "first",
- "inputSql" -> s"\"collate(${ct.s1})\"",
- "inputType" -> s"\"STRING COLLATE ${ct.collation}\"",
- "requiredType" -> "\"STRING\""
- ),
- context = ExpectedContext(
- fragment = s"ilike collate('${ct.s2}', '${ct.collation}')",
- start = 26 + ct.collation.length,
- stop = 49 + 2 * ct.collation.length
+ CollationTestFail("ABC", "%b%", "UTF8_BINARY_LCASE"),
+ CollationTestFail("ABC", "%b%", "UNICODE"),
+ CollationTestFail("ABC", "%b%", "UNICODE_CI")
+ )
+ fails.foreach(ct =>
+ assert(prepareILike(ct.s1, ct.s2, ct.collation)
+ .checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "UNEXPECTED_INPUT_TYPE",
+ messageParameters = Map(
+ "paramIndex" -> "first",
+ "requiredType" -> """"STRING"""",
+ "inputSql" -> s""""lower('${ct.s1}' collate ${ct.collation})"""",
+ "inputType" -> s""""STRING COLLATE ${ct.collation}""""
+ )
)
)
- })
+ )
}
test("Support RLike string expression with Collation") {
+ def prepareRLike(input: String,
+ regExp: String,
+ collation: String): Expression = {
+ val collationId = CollationFactory.collationNameToId(collation)
+ val inputExpr = Literal.create(input, StringType(collationId))
+ val regExpExpr = Literal.create(regExp, StringType(collationId))
+ RLike(inputExpr, regExpExpr)
+ }
// Supported collations
val checks = Seq(
CollationTestCase("ABC", ".B.", "UTF8_BINARY", true)
)
- checks.foreach(ct => {
- checkAnswer(sql(s"SELECT collate('${ct.s1}', '${ct.collation}') rlike " +
- s"collate('${ct.s2}', '${ct.collation}')"), Row(ct.expectedResult))
- })
+ checks.foreach(ct =>
+ checkEvaluation(prepareRLike(ct.s1, ct.s2, ct.collation),
ct.expectedResult)
+ )
// Unsupported collations
val fails = Seq(
- CollationTestCase("ABC", ".b.", "UTF8_BINARY_LCASE", false),
- CollationTestCase("ABC", ".B.", "UNICODE", true),
- CollationTestCase("ABC", ".b.", "UNICODE_CI", false)
- )
- fails.foreach(ct => {
- checkError(
- exception = intercept[ExtendedAnalysisException] {
- sql(s"SELECT collate('${ct.s1}', '${ct.collation}') rlike " +
- s"collate('${ct.s2}', '${ct.collation}')")
- },
- errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- sqlState = "42K09",
- parameters = Map(
- "sqlExpr" -> s"\"RLIKE(collate(${ct.s1}), collate(${ct.s2}))\"",
- "paramIndex" -> "first",
- "inputSql" -> s"\"collate(${ct.s1})\"",
- "inputType" -> s"\"STRING COLLATE ${ct.collation}\"",
- "requiredType" -> "\"STRING\""
- ),
- context = ExpectedContext(
- fragment = s"rlike collate('${ct.s2}', '${ct.collation}')",
- start = 26 + ct.collation.length,
- stop = 49 + 2 * ct.collation.length
+ CollationTestFail("ABC", ".b.", "UTF8_BINARY_LCASE"),
+ CollationTestFail("ABC", ".B.", "UNICODE"),
+ CollationTestFail("ABC", ".b.", "UNICODE_CI")
+ )
+ fails.foreach(ct =>
+ assert(prepareRLike(ct.s1, ct.s2, ct.collation)
+ .checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "UNEXPECTED_INPUT_TYPE",
+ messageParameters = Map(
+ "paramIndex" -> "first",
+ "requiredType" -> """"STRING"""",
+ "inputSql" -> s""""'${ct.s1}' collate ${ct.collation}"""",
+ "inputType" -> s""""STRING COLLATE ${ct.collation}""""
+ )
)
)
- })
+ )
}
test("Support StringSplit string expression with Collation") {
+ def prepareStringSplit(input: String,
+ splitBy: String,
+ collation: String): Expression = {
Review Comment:
```suggestion
def prepareStringSplit(
input: String,
splitBy: String,
collation: String): Expression = {
```
##########
sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala:
##########
@@ -20,421 +20,406 @@ package org.apache.spark.sql
import scala.collection.immutable.Seq
import org.apache.spark.SparkConf
-import org.apache.spark.sql.catalyst.ExtendedAnalysisException
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StringType
-class CollationRegexpExpressionsSuite extends QueryTest with
SharedSparkSession {
+class CollationRegexpExpressionsSuite
+ extends QueryTest
+ with SharedSparkSession
+ with ExpressionEvalHelper {
case class CollationTestCase[R](s1: String, s2: String, collation: String,
expectedResult: R)
case class CollationTestFail[R](s1: String, s2: String, collation: String)
test("Support Like string expression with Collation") {
+ def prepareLike(input: String,
+ regExp: String,
+ collation: String): Expression = {
+ val collationId = CollationFactory.collationNameToId(collation)
+ val inputExpr = Literal.create(input, StringType(collationId))
+ val regExpExpr = Literal.create(regExp, StringType(collationId))
+ Like(inputExpr, regExpExpr, '\\')
+ }
// Supported collations
val checks = Seq(
CollationTestCase("ABC", "%B%", "UTF8_BINARY", true)
)
- checks.foreach(ct => {
- checkAnswer(sql(s"SELECT collate('${ct.s1}', '${ct.collation}') like " +
- s"collate('${ct.s2}', '${ct.collation}')"), Row(ct.expectedResult))
- })
+ checks.foreach(ct =>
+ checkEvaluation(prepareLike(ct.s1, ct.s2, ct.collation),
ct.expectedResult))
// Unsupported collations
val fails = Seq(
- CollationTestCase("ABC", "%b%", "UTF8_BINARY_LCASE", false),
- CollationTestCase("ABC", "%B%", "UNICODE", true),
- CollationTestCase("ABC", "%b%", "UNICODE_CI", false)
- )
- fails.foreach(ct => {
- checkError(
- exception = intercept[ExtendedAnalysisException] {
- sql(s"SELECT collate('${ct.s1}', '${ct.collation}') like " +
- s"collate('${ct.s2}', '${ct.collation}')")
- },
- errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- sqlState = "42K09",
- parameters = Map(
- "sqlExpr" -> s"\"collate(${ct.s1}) LIKE collate(${ct.s2})\"",
- "paramIndex" -> "first",
- "inputSql" -> s"\"collate(${ct.s1})\"",
- "inputType" -> s"\"STRING COLLATE ${ct.collation}\"",
- "requiredType" -> "\"STRING\""
- ),
- context = ExpectedContext(
- fragment = s"like collate('${ct.s2}', '${ct.collation}')",
- start = 26 + ct.collation.length,
- stop = 48 + 2 * ct.collation.length
+ CollationTestFail("ABC", "%b%", "UTF8_BINARY_LCASE"),
+ CollationTestFail("ABC", "%B%", "UNICODE"),
+ CollationTestFail("ABC", "%b%", "UNICODE_CI")
+ )
+ fails.foreach(ct =>
+ assert(prepareLike(ct.s1, ct.s2, ct.collation)
+ .checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "UNEXPECTED_INPUT_TYPE",
+ messageParameters = Map(
+ "paramIndex" -> "first",
+ "requiredType" -> """"STRING"""",
+ "inputSql" -> s""""'${ct.s1}' collate ${ct.collation}"""",
+ "inputType" -> s""""STRING COLLATE ${ct.collation}""""
+ )
)
)
- })
+ )
}
test("Support ILike string expression with Collation") {
+ def prepareILike(input: String,
+ regExp: String,
+ collation: String): Expression = {
+ val collationId = CollationFactory.collationNameToId(collation)
+ val inputExpr = Literal.create(input, StringType(collationId))
+ val regExpExpr = Literal.create(regExp, StringType(collationId))
+ ILike(inputExpr, regExpExpr, '\\').replacement
+ }
+
// Supported collations
val checks = Seq(
CollationTestCase("ABC", "%b%", "UTF8_BINARY", true)
)
- checks.foreach(ct => {
- checkAnswer(sql(s"SELECT collate('${ct.s1}', '${ct.collation}') ilike " +
- s"collate('${ct.s2}', '${ct.collation}')"), Row(ct.expectedResult))
- })
+ checks.foreach(ct =>
+ checkEvaluation(prepareILike(ct.s1, ct.s2, ct.collation),
ct.expectedResult)
+ )
// Unsupported collations
val fails = Seq(
- CollationTestCase("ABC", "%b%", "UTF8_BINARY_LCASE", false),
- CollationTestCase("ABC", "%b%", "UNICODE", true),
- CollationTestCase("ABC", "%b%", "UNICODE_CI", false)
- )
- fails.foreach(ct => {
- checkError(
- exception = intercept[ExtendedAnalysisException] {
- sql(s"SELECT collate('${ct.s1}', '${ct.collation}') ilike " +
- s"collate('${ct.s2}', '${ct.collation}')")
- },
- errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- sqlState = "42K09",
- parameters = Map(
- "sqlExpr" -> s"\"ilike(collate(${ct.s1}), collate(${ct.s2}))\"",
- "paramIndex" -> "first",
- "inputSql" -> s"\"collate(${ct.s1})\"",
- "inputType" -> s"\"STRING COLLATE ${ct.collation}\"",
- "requiredType" -> "\"STRING\""
- ),
- context = ExpectedContext(
- fragment = s"ilike collate('${ct.s2}', '${ct.collation}')",
- start = 26 + ct.collation.length,
- stop = 49 + 2 * ct.collation.length
+ CollationTestFail("ABC", "%b%", "UTF8_BINARY_LCASE"),
+ CollationTestFail("ABC", "%b%", "UNICODE"),
+ CollationTestFail("ABC", "%b%", "UNICODE_CI")
+ )
+ fails.foreach(ct =>
+ assert(prepareILike(ct.s1, ct.s2, ct.collation)
+ .checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "UNEXPECTED_INPUT_TYPE",
+ messageParameters = Map(
+ "paramIndex" -> "first",
+ "requiredType" -> """"STRING"""",
+ "inputSql" -> s""""lower('${ct.s1}' collate ${ct.collation})"""",
+ "inputType" -> s""""STRING COLLATE ${ct.collation}""""
+ )
)
)
- })
+ )
}
test("Support RLike string expression with Collation") {
+ def prepareRLike(input: String,
+ regExp: String,
+ collation: String): Expression = {
+ val collationId = CollationFactory.collationNameToId(collation)
+ val inputExpr = Literal.create(input, StringType(collationId))
+ val regExpExpr = Literal.create(regExp, StringType(collationId))
+ RLike(inputExpr, regExpExpr)
+ }
// Supported collations
val checks = Seq(
CollationTestCase("ABC", ".B.", "UTF8_BINARY", true)
)
- checks.foreach(ct => {
- checkAnswer(sql(s"SELECT collate('${ct.s1}', '${ct.collation}') rlike " +
- s"collate('${ct.s2}', '${ct.collation}')"), Row(ct.expectedResult))
- })
+ checks.foreach(ct =>
+ checkEvaluation(prepareRLike(ct.s1, ct.s2, ct.collation),
ct.expectedResult)
+ )
// Unsupported collations
val fails = Seq(
- CollationTestCase("ABC", ".b.", "UTF8_BINARY_LCASE", false),
- CollationTestCase("ABC", ".B.", "UNICODE", true),
- CollationTestCase("ABC", ".b.", "UNICODE_CI", false)
- )
- fails.foreach(ct => {
- checkError(
- exception = intercept[ExtendedAnalysisException] {
- sql(s"SELECT collate('${ct.s1}', '${ct.collation}') rlike " +
- s"collate('${ct.s2}', '${ct.collation}')")
- },
- errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- sqlState = "42K09",
- parameters = Map(
- "sqlExpr" -> s"\"RLIKE(collate(${ct.s1}), collate(${ct.s2}))\"",
- "paramIndex" -> "first",
- "inputSql" -> s"\"collate(${ct.s1})\"",
- "inputType" -> s"\"STRING COLLATE ${ct.collation}\"",
- "requiredType" -> "\"STRING\""
- ),
- context = ExpectedContext(
- fragment = s"rlike collate('${ct.s2}', '${ct.collation}')",
- start = 26 + ct.collation.length,
- stop = 49 + 2 * ct.collation.length
+ CollationTestFail("ABC", ".b.", "UTF8_BINARY_LCASE"),
+ CollationTestFail("ABC", ".B.", "UNICODE"),
+ CollationTestFail("ABC", ".b.", "UNICODE_CI")
+ )
+ fails.foreach(ct =>
+ assert(prepareRLike(ct.s1, ct.s2, ct.collation)
+ .checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "UNEXPECTED_INPUT_TYPE",
+ messageParameters = Map(
+ "paramIndex" -> "first",
+ "requiredType" -> """"STRING"""",
+ "inputSql" -> s""""'${ct.s1}' collate ${ct.collation}"""",
+ "inputType" -> s""""STRING COLLATE ${ct.collation}""""
+ )
)
)
- })
+ )
}
test("Support StringSplit string expression with Collation") {
+ def prepareStringSplit(input: String,
+ splitBy: String,
+ collation: String): Expression = {
+ val collationId = CollationFactory.collationNameToId(collation)
+ val inputExpr = Literal.create(input, StringType(collationId))
+ val splitByExpr = Literal.create(splitBy, StringType(collationId))
+ StringSplit(inputExpr, splitByExpr, Literal(-1))
+ }
+
// Supported collations
val checks = Seq(
- CollationTestCase("ABC", "[B]", "UTF8_BINARY", 2)
+ CollationTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C"))
+ )
+ checks.foreach(ct =>
+ checkEvaluation(prepareStringSplit(ct.s1, ct.s2, ct.collation),
ct.expectedResult)
)
- checks.foreach(ct => {
- checkAnswer(sql(s"SELECT size(split(collate('${ct.s1}',
'${ct.collation}')" +
- s",collate('${ct.s2}', '${ct.collation}')))"), Row(ct.expectedResult))
- })
// Unsupported collations
val fails = Seq(
- CollationTestCase("ABC", "[b]", "UTF8_BINARY_LCASE", 0),
- CollationTestCase("ABC", "[B]", "UNICODE", 2),
- CollationTestCase("ABC", "[b]", "UNICODE_CI", 0)
- )
- fails.foreach(ct => {
- checkError(
- exception = intercept[ExtendedAnalysisException] {
- sql(s"SELECT size(split(collate('${ct.s1}', '${ct.collation}')" +
- s",collate('${ct.s2}', '${ct.collation}')))")
- },
- errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- sqlState = "42K09",
- parameters = Map(
- "sqlExpr" -> s"\"split(collate(${ct.s1}), collate(${ct.s2}), -1)\"",
- "paramIndex" -> "first",
- "inputSql" -> s"\"collate(${ct.s1})\"",
- "inputType" -> s"\"STRING COLLATE ${ct.collation}\"",
- "requiredType" -> "\"STRING\""
- ),
- context = ExpectedContext(
- fragment = s"split(collate('${ct.s1}', '${ct.collation}')," +
- s"collate('${ct.s2}', '${ct.collation}'))",
- start = 12,
- stop = 55 + 2 * ct.collation.length
+ CollationTestFail("ABC", "[b]", "UTF8_BINARY_LCASE"),
+ CollationTestFail("ABC", "[B]", "UNICODE"),
+ CollationTestFail("ABC", "[b]", "UNICODE_CI")
+ )
+ fails.foreach(ct =>
+ assert(prepareStringSplit(ct.s1, ct.s2, ct.collation)
+ .checkInputDataTypes() ==
+ DataTypeMismatch(
+ errorSubClass = "UNEXPECTED_INPUT_TYPE",
+ messageParameters = Map(
+ "paramIndex" -> "first",
+ "requiredType" -> """"STRING"""",
+ "inputSql" -> s""""'${ct.s1}' collate ${ct.collation}"""",
+ "inputType" -> s""""STRING COLLATE ${ct.collation}""""
+ )
)
)
- })
+ )
}
test("Support RegExpReplace string expression with Collation") {
+ def prepareRegExpReplace(input: String,
+ regExp: String,
+ collation: String): RegExpReplace = {
Review Comment:
```suggestion
def prepareRegExpReplace(
input: String,
regExp: String,
collation: String): RegExpReplace = {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]