MaxGekk commented on code in PR #41486:
URL: https://github.com/apache/spark/pull/41486#discussion_r1243780136
##########
core/src/main/resources/error/error-classes.json:
##########
@@ -757,6 +757,21 @@
"The expression <sqlExpr> cannot be used as a grouping expression
because its data type <dataType> is not an orderable data type."
]
},
+ "HLL_INVALID_INPUT_SKETCH_BUFFER" : {
+ "message" : [
+ "Invalid call to <function>; only valid HLL sketch buffers are supported
as inputs (such as those produced by the 'hll_sketch_agg' function)."
+ ]
+ },
+ "HLL_INVALID_LG_K" : {
+ "message" : [
+ "Invalid call to <function>; the 'lgConfigK' value must be between <min>
and <max>, inclusive: <value>."
Review Comment:
Please, quote the parameter name by backticks `` (see `toSQLId`). We use ''
for string values.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala:
##########
@@ -189,21 +189,20 @@ object HllSketchAgg {
private val minLgConfigK = 4
private val maxLgConfigK = 21
- // Replicate Datasketche's HllUtil's checkLgK implementation, as we can't
reference it directly
+ // Replicate Datasketches' HllUtil's checkLgK implementation, as we can't
reference it directly.
def checkLgK(lgConfigK: Int): Unit = {
if (lgConfigK < minLgConfigK || lgConfigK > maxLgConfigK) {
- throw new SketchesArgumentException(
- s"Log K must be between $minLgConfigK and $maxLgConfigK, inclusive: "
+ lgConfigK)
+ throw QueryExecutionErrors.hllInvalidLgK(function = "hll_sketch_agg",
+ min = minLgConfigK.toString, max = maxLgConfigK.toString, value =
lgConfigK.toString)
Review Comment:
Please, pass the parameters as is, and use `toSQLValue` for quoting.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala:
##########
@@ -286,20 +285,15 @@ case class HllUnionAgg(
}
/**
- * Helper method to compare lgConfigKs and throw an exception if
- * allowDifferentLgConfigK isn't true and configs don't match
+ * Helper method to compare lgConfigKs and throw an exception if
'allowDifferentLgConfigK' isn't
Review Comment:
```suggestion
* Helper method to compare lgConfigKs and throw an exception if
`allowDifferentLgConfigK` isn't
```
##########
sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala:
##########
@@ -1850,7 +1850,7 @@ class DataFrameAggregateSuite extends QueryTest
)
checkAnswer(res, Nil)
}
- assert(error0.toString contains "SketchesArgumentException")
+ assert(error0.toString contains "'lgConfigK' value must be between")
Review Comment:
Could you invoke `checkError` to check error class name and its parameters.
##########
sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala:
##########
@@ -1859,7 +1859,7 @@ class DataFrameAggregateSuite extends QueryTest
)
checkAnswer(res, Nil)
}
- assert(error1.toString contains "SketchesArgumentException")
+ assert(error1.toString contains "'lgConfigK' value must be between")
Review Comment:
ditto: use `checkError` here and below.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala:
##########
@@ -98,15 +104,22 @@ case class HllUnion(first: Expression, second: Expression,
third: Expression)
override def dataType: DataType = BinaryType
override def nullSafeEval(value1: Any, value2: Any, value3: Any): Any = {
- val sketch1 =
HllSketch.heapify(Memory.wrap(value1.asInstanceOf[Array[Byte]]))
- val sketch2 =
HllSketch.heapify(Memory.wrap(value2.asInstanceOf[Array[Byte]]))
+ val sketch1 = try {
+ HllSketch.heapify(Memory.wrap(value1.asInstanceOf[Array[Byte]]))
+ } catch {
+ case _: java.lang.Error =>
+ throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
+ }
+ val sketch2 = try {
+ HllSketch.heapify(Memory.wrap(value2.asInstanceOf[Array[Byte]]))
+ } catch {
+ case _: java.lang.Error =>
+ throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
+ }
val allowDifferentLgConfigK = value3.asInstanceOf[Boolean]
if (!allowDifferentLgConfigK && sketch1.getLgConfigK !=
sketch2.getLgConfigK) {
- throw new UnsupportedOperationException(
- "Sketches have different lgConfigK values: " +
- s"${sketch1.getLgConfigK} and ${sketch2.getLgConfigK}. " +
- "Set allowDifferentLgConfigK to true to enable unions of " +
- "different lgConfigK values.")
+ throw QueryExecutionErrors.hllUnionDifferentLgK(
+ sketch1.getLgConfigK, sketch2.getLgConfigK, function = "hll_union")
Review Comment:
Use `prettyName()` for consistency.
##########
core/src/main/resources/error/error-classes.json:
##########
@@ -757,6 +757,21 @@
"The expression <sqlExpr> cannot be used as a grouping expression
because its data type <dataType> is not an orderable data type."
]
},
+ "HLL_INVALID_INPUT_SKETCH_BUFFER" : {
+ "message" : [
+ "Invalid call to <function>; only valid HLL sketch buffers are supported
as inputs (such as those produced by the 'hll_sketch_agg' function)."
+ ]
+ },
+ "HLL_INVALID_LG_K" : {
+ "message" : [
+ "Invalid call to <function>; the 'lgConfigK' value must be between <min>
and <max>, inclusive: <value>."
+ ]
+ },
+ "HLL_UNION_DIFFERENT_LG_K" : {
+ "message" : [
+ "Sketches have different 'lgConfigK' values: <left> and <right>. Set the
'allowDifferentLgConfigK' parameter to true to call <function> with different
'lgConfigK' values."
Review Comment:
ditto:
```suggestion
"Sketches have different `lgConfigK` values: <left> and <right>. Set
the `allowDifferentLgConfigK` parameter to true to call <function> with
different `lgConfigK` values."
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]