MaxGekk commented on code in PR #41486:
URL: https://github.com/apache/spark/pull/41486#discussion_r1243780136


##########
core/src/main/resources/error/error-classes.json:
##########
@@ -757,6 +757,21 @@
       "The expression <sqlExpr> cannot be used as a grouping expression 
because its data type <dataType> is not an orderable data type."
     ]
   },
+  "HLL_INVALID_INPUT_SKETCH_BUFFER" : {
+    "message" : [
+      "Invalid call to <function>; only valid HLL sketch buffers are supported 
as inputs (such as those produced by the 'hll_sketch_agg' function)."
+    ]
+  },
+  "HLL_INVALID_LG_K" : {
+    "message" : [
+      "Invalid call to <function>; the 'lgConfigK' value must be between <min> 
and <max>, inclusive: <value>."

Review Comment:
   Please, quote the parameter name by backticks `` (see `toSQLId`). We use '' 
for string values.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala:
##########
@@ -189,21 +189,20 @@ object HllSketchAgg {
   private val minLgConfigK = 4
   private val maxLgConfigK = 21
 
-  // Replicate Datasketche's HllUtil's checkLgK implementation, as we can't 
reference it directly
+  // Replicate Datasketches' HllUtil's checkLgK implementation, as we can't 
reference it directly.
   def checkLgK(lgConfigK: Int): Unit = {
     if (lgConfigK < minLgConfigK || lgConfigK > maxLgConfigK) {
-      throw new SketchesArgumentException(
-        s"Log K must be between $minLgConfigK and $maxLgConfigK, inclusive: " 
+ lgConfigK)
+      throw QueryExecutionErrors.hllInvalidLgK(function = "hll_sketch_agg",
+        min = minLgConfigK.toString, max = maxLgConfigK.toString, value = 
lgConfigK.toString)

Review Comment:
   Please, pass the parameters as is, and use `toSQLValue` for quoting.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala:
##########
@@ -286,20 +285,15 @@ case class HllUnionAgg(
   }
 
   /**
-   * Helper method to compare lgConfigKs and throw an exception if
-   * allowDifferentLgConfigK isn't true and configs don't match
+   * Helper method to compare lgConfigKs and throw an exception if 
'allowDifferentLgConfigK' isn't

Review Comment:
   ```suggestion
      * Helper method to compare lgConfigKs and throw an exception if 
`allowDifferentLgConfigK` isn't
   ```



##########
sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala:
##########
@@ -1850,7 +1850,7 @@ class DataFrameAggregateSuite extends QueryTest
         )
       checkAnswer(res, Nil)
     }
-    assert(error0.toString contains "SketchesArgumentException")
+    assert(error0.toString contains "'lgConfigK' value must be between")

Review Comment:
   Could you invoke `checkError` to check error class name and its parameters.



##########
sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala:
##########
@@ -1859,7 +1859,7 @@ class DataFrameAggregateSuite extends QueryTest
         )
       checkAnswer(res, Nil)
     }
-    assert(error1.toString contains "SketchesArgumentException")
+    assert(error1.toString contains "'lgConfigK' value must be between")

Review Comment:
   ditto: use `checkError` here and below.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala:
##########
@@ -98,15 +104,22 @@ case class HllUnion(first: Expression, second: Expression, 
third: Expression)
   override def dataType: DataType = BinaryType
 
   override def nullSafeEval(value1: Any, value2: Any, value3: Any): Any = {
-    val sketch1 = 
HllSketch.heapify(Memory.wrap(value1.asInstanceOf[Array[Byte]]))
-    val sketch2 = 
HllSketch.heapify(Memory.wrap(value2.asInstanceOf[Array[Byte]]))
+    val sketch1 = try {
+      HllSketch.heapify(Memory.wrap(value1.asInstanceOf[Array[Byte]]))
+    } catch {
+      case _: java.lang.Error =>
+        throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
+    }
+    val sketch2 = try {
+      HllSketch.heapify(Memory.wrap(value2.asInstanceOf[Array[Byte]]))
+    } catch {
+      case _: java.lang.Error =>
+        throw QueryExecutionErrors.hllInvalidInputSketchBuffer(prettyName)
+    }
     val allowDifferentLgConfigK = value3.asInstanceOf[Boolean]
     if (!allowDifferentLgConfigK && sketch1.getLgConfigK != 
sketch2.getLgConfigK) {
-      throw new UnsupportedOperationException(
-        "Sketches have different lgConfigK values: " +
-        s"${sketch1.getLgConfigK} and ${sketch2.getLgConfigK}. " +
-        "Set allowDifferentLgConfigK to true to enable unions of " +
-        "different lgConfigK values.")
+      throw QueryExecutionErrors.hllUnionDifferentLgK(
+        sketch1.getLgConfigK, sketch2.getLgConfigK, function = "hll_union")

Review Comment:
   Use `prettyName()` for consistency.



##########
core/src/main/resources/error/error-classes.json:
##########
@@ -757,6 +757,21 @@
       "The expression <sqlExpr> cannot be used as a grouping expression 
because its data type <dataType> is not an orderable data type."
     ]
   },
+  "HLL_INVALID_INPUT_SKETCH_BUFFER" : {
+    "message" : [
+      "Invalid call to <function>; only valid HLL sketch buffers are supported 
as inputs (such as those produced by the 'hll_sketch_agg' function)."
+    ]
+  },
+  "HLL_INVALID_LG_K" : {
+    "message" : [
+      "Invalid call to <function>; the 'lgConfigK' value must be between <min> 
and <max>, inclusive: <value>."
+    ]
+  },
+  "HLL_UNION_DIFFERENT_LG_K" : {
+    "message" : [
+      "Sketches have different 'lgConfigK' values: <left> and <right>. Set the 
'allowDifferentLgConfigK' parameter to true to call <function> with different 
'lgConfigK' values."

Review Comment:
   ditto:
   ```suggestion
         "Sketches have different `lgConfigK` values: <left> and <right>. Set 
the `allowDifferentLgConfigK` parameter to true to call <function> with 
different `lgConfigK` values."
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to