MaxGekk commented on code in PR #42755:
URL: https://github.com/apache/spark/pull/42755#discussion_r1324756847
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala:
##########
@@ -18,15 +18,22 @@
package org.apache.spark.sql.catalyst.expressions.aggregate
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Expression,
ExpressionDescription, ImplicitCastInputTypes}
-import org.apache.spark.sql.catalyst.trees.UnaryLike
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import
org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch,
TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.{Expression,
ExpressionDescription, ImplicitCastInputTypes, Literal}
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, UnaryLike}
+import org.apache.spark.sql.catalyst.types.PhysicalDataType
import org.apache.spark.sql.catalyst.util.GenericArrayData
-import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, ArrayType,
DataType}
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLExpr
+import org.apache.spark.sql.errors.DataTypeErrors.{toSQLId, toSQLType}
+import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, ArrayType,
BooleanType, DataType}
import org.apache.spark.util.collection.OpenHashMap
// scalastyle:off line.size.limit
@ExpressionDescription(
- usage = "_FUNC_(col) - Returns the most frequent value for the values within
`col`. NULL values are ignored. If all the values are NULL, or there are 0
rows, returns NULL.",
+ usage = """
+ _FUNC_(col[, deterministic]) - Returns the most frequent value for the
values within `col`. NULL values are ignored. If all the values are NULL, or
there are 0 rows, returns NULL.
+ When multiple values have the same greatest frequency then either any of
values is returned if 'deterministic' is false or is not defined, or the lowest
value is returned if 'deterministic' is true.""",
Review Comment:
to be consistent to other places:
```suggestion
When multiple values have the same greatest frequency then either any
of values is returned if `deterministic` is false or is not defined, or the
lowest value is returned if `deterministic` is true.""",
```
##########
sql/core/src/test/resources/sql-tests/results/group-by.sql.out:
##########
@@ -1121,3 +1121,108 @@ struct<d:int>
-- !query output
0
2
+
+
+-- !query
+SELECT mode(col) FROM VALUES (-10), (0), (10) AS tab(col)
+-- !query schema
+struct<mode(col, false):int>
+-- !query output
+0
+
+
+-- !query
+SELECT mode(col, false) FROM VALUES (-10), (0), (10) AS tab(col)
+-- !query schema
+struct<mode(col, false):int>
+-- !query output
+0
+
+
+-- !query
+SELECT mode(col, true) FROM VALUES (-10), (0), (10) AS tab(col)
+-- !query schema
+struct<mode(col, true):int>
+-- !query output
+-10
+
+
+-- !query
+SELECT mode(col, 'true') FROM VALUES (-10), (0), (10) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"true\"",
+ "inputType" : "\"STRING\"",
+ "paramIndex" : "2",
+ "requiredType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"mode(col, true)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 24,
+ "fragment" : "mode(col, 'true')"
+ } ]
+}
+
+
+-- !query
+SELECT mode(col, b) FROM VALUES (-10, false), (0, false), (10, false) AS
tab(col, b)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputExpr" : "\"b\"",
+ "inputName" : "deterministicResult",
+ "inputType" : "\"BOOLEAN\"",
+ "sqlExpr" : "\"mode(col, b)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 19,
+ "fragment" : "mode(col, b)"
+ } ]
+}
+
+
+-- !query
+SELECT mode(col) FROM VALUES (map(1, 'a')) AS tab(col)
+-- !query schema
+struct<mode(col, false):map<int,string>>
+-- !query output
+{1:"a"}
+
+
+-- !query
+SELECT mode(col, false) FROM VALUES (map(1, 'a')) AS tab(col)
+-- !query schema
+struct<mode(col, false):map<int,string>>
+-- !query output
+{1:"a"}
+
+
+-- !query
+SELECT mode(col, true) FROM VALUES (map(1, 'a')) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_2005",
+ "messageParameters" : {
+ "dataType" : "PhysicalMapType"
+ }
Review Comment:
We have the ticket SPARK-42841 for this already. Could be replaced
separately.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]