(spark) branch master updated: [SPARK-47504][SQL] Resolve AbstractDataType simpleStrings for StringTypeCollated

wenchen Mon, 08 Apr 2024 09:06:07 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new abb7b046d50d [SPARK-47504][SQL] Resolve AbstractDataType simpleStrings 
for StringTypeCollated
abb7b046d50d is described below

commit abb7b046d50d3aff527834f2ed83618eaeb65a89
Author: Mihailo Milosevic <mihailo.milose...@databricks.com>
AuthorDate: Tue Apr 9 00:05:37 2024 +0800

    [SPARK-47504][SQL] Resolve AbstractDataType simpleStrings for 
StringTypeCollated
    
    ### What changes were proposed in this pull request?
    Renaming simpleString in StringTypeAnyCollation. This PR should only be 
merged after https://github.com/apache/spark/pull/45383 is merged.
    
    ### Why are the changes needed?
    [SPARK-47296](https://github.com/apache/spark/pull/45422) introduced a 
change to fail all unsupported functions. Because of this change expected 
inputTypes in ExpectsInputTypes had to be changed. This change introduced a 
change on user side which will print "STRING_ANY_COLLATION" in places where 
before we printed "STRING" when an error occurred. Concretely if we get an 
input of Int where StringTypeAnyCollation was expected, we will throw this 
faulty message for users.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes
    
    ### How was this patch tested?
    Existing tests were changed back to "STRING" notation.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #45694 from mihailom-db/SPARK-47504.
    
    Authored-by: Mihailo Milosevic <mihailo.milose...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../spark/sql/catalyst/expressions/StringTypeCollated.scala    |  4 +---
 .../sql/catalyst/expressions/StringExpressionsSuite.scala      |  2 +-
 .../src/test/scala/org/apache/spark/sql/CollationSuite.scala   |  2 +-
 .../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala   | 10 +++++-----
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala
index 2f66e5795634..67b65859e6bb 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala
@@ -24,13 +24,13 @@ import org.apache.spark.sql.types.{AbstractDataType, 
DataType, StringType}
  */
 abstract class StringTypeCollated extends AbstractDataType {
   override private[sql] def defaultConcreteType: DataType = StringType
+  override private[sql] def simpleString: String = "string"
 }
 
 /**
  * Use StringTypeBinary for expressions supporting only binary collation.
  */
 case object StringTypeBinary extends StringTypeCollated {
-  override private[sql] def simpleString: String = "string_binary"
   override private[sql] def acceptsType(other: DataType): Boolean =
     other.isInstanceOf[StringType] && 
other.asInstanceOf[StringType].supportsBinaryEquality
 }
@@ -39,7 +39,6 @@ case object StringTypeBinary extends StringTypeCollated {
  * Use StringTypeBinaryLcase for expressions supporting only binary and 
lowercase collation.
  */
 case object StringTypeBinaryLcase extends StringTypeCollated {
-  override private[sql] def simpleString: String = "string_binary_lcase"
   override private[sql] def acceptsType(other: DataType): Boolean =
     other.isInstanceOf[StringType] && 
(other.asInstanceOf[StringType].supportsBinaryEquality ||
       other.asInstanceOf[StringType].isUTF8BinaryLcaseCollation)
@@ -49,6 +48,5 @@ case object StringTypeBinaryLcase extends StringTypeCollated {
  * Use StringTypeAnyCollation for expressions supporting all possible 
collation types.
  */
 case object StringTypeAnyCollation extends StringTypeCollated {
-  override private[sql] def simpleString: String = "string_any_collation"
   override private[sql] def acceptsType(other: DataType): Boolean = 
other.isInstanceOf[StringType]
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index cda9676ca58b..1fbd1ac9a29f 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -70,7 +70,7 @@ class StringExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
         errorSubClass = "UNEXPECTED_INPUT_TYPE",
         messageParameters = Map(
           "paramIndex" -> ordinalNumber(0),
-          "requiredType" -> "(\"STRING_ANY_COLLATION\" or \"BINARY\" or 
\"ARRAY\")",
+          "requiredType" -> "(\"STRING\" or \"BINARY\" or \"ARRAY\")",
           "inputSql" -> "\"1\"",
           "inputType" -> "\"INT\""
         )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index 6163aff66288..62150eaeac54 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -127,7 +127,7 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
         "paramIndex" -> "first",
         "inputSql" -> "\"1\"",
         "inputType" -> "\"INT\"",
-        "requiredType" -> "\"STRING_ANY_COLLATION\""),
+        "requiredType" -> "\"STRING\""),
       context = ExpectedContext(
         fragment = s"collate(1, 'UTF8_BINARY')", start = 7, stop = 31))
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 5beac3370358..e42f397cbfc2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -1713,7 +1713,7 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
         "paramIndex" -> "second",
         "inputSql" -> "\"1\"",
         "inputType" -> "\"INT\"",
-        "requiredType" -> "\"STRING_ANY_COLLATION\""
+        "requiredType" -> "\"STRING\""
       ),
       queryContext = Array(ExpectedContext("", "", 0, 15, "array_join(x, 1)"))
     )
@@ -1727,7 +1727,7 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
         "paramIndex" -> "third",
         "inputSql" -> "\"1\"",
         "inputType" -> "\"INT\"",
-        "requiredType" -> "\"STRING_ANY_COLLATION\""
+        "requiredType" -> "\"STRING\""
       ),
       queryContext = Array(ExpectedContext("", "", 0, 21, "array_join(x, ', ', 
1)"))
     )
@@ -1987,7 +1987,7 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
         "paramIndex" -> "first",
         "inputSql" -> "\"struct(1, a)\"",
         "inputType" -> "\"STRUCT<col1: INT NOT NULL, col2: STRING NOT NULL>\"",
-        "requiredType" -> "(\"STRING_ANY_COLLATION\" or \"ARRAY\")"
+        "requiredType" -> "(\"STRING\" or \"ARRAY\")"
       ),
       queryContext = Array(ExpectedContext("", "", 7, 29, "reverse(struct(1, 
'a'))"))
     )
@@ -2002,7 +2002,7 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
         "paramIndex" -> "first",
         "inputSql" -> "\"map(1, a)\"",
         "inputType" -> "\"MAP<INT, STRING>\"",
-        "requiredType" -> "(\"STRING_ANY_COLLATION\" or \"ARRAY\")"
+        "requiredType" -> "(\"STRING\" or \"ARRAY\")"
       ),
       queryContext = Array(ExpectedContext("", "", 7, 26, "reverse(map(1, 
'a'))"))
     )
@@ -2552,7 +2552,7 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
       parameters = Map(
         "sqlExpr" -> "\"concat(map(1, 2), map(3, 4))\"",
         "paramIndex" -> "first",
-        "requiredType" -> "(\"STRING_ANY_COLLATION\" or \"BINARY\" or 
\"ARRAY\")",
+        "requiredType" -> "(\"STRING\" or \"BINARY\" or \"ARRAY\")",
         "inputSql" -> "\"map(1, 2)\"",
         "inputType" -> "\"MAP<INT, INT>\""
       ),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-47504][SQL] Resolve AbstractDataType simpleStrings for StringTypeCollated

Reply via email to