maropu commented on a change in pull request #28626:
URL: https://github.com/apache/spark/pull/28626#discussion_r429711739
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
##########
@@ -156,4 +158,74 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
}
}
}
+
+ test("Check whether should extend NullIntolerant") {
Review comment:
nit: `Check whether should extend NullIntolerant` -> `Check whether SQL
expressions should extend NullIntolerant`
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
##########
@@ -156,4 +158,74 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
}
}
}
+
+ test("Check whether should extend NullIntolerant") {
+ // Only check expressions extended from these expressions
+ val parentExpressionNames = Seq(classOf[UnaryExpression],
classOf[BinaryExpression],
+ classOf[TernaryExpression], classOf[QuaternaryExpression],
+ classOf[SeptenaryExpression]).map(_.getName)
+ // Do not check these expressions
+ val whiteList = Seq(
+ classOf[IntegralDivide], classOf[Divide], classOf[Remainder],
classOf[Pmod],
+ classOf[CheckOverflow], classOf[NormalizeNaNAndZero], classOf[InSet],
+ classOf[PrintToStderr],
classOf[CodegenFallbackExpression]).map(_.getName)
+
+ spark.sessionState.functionRegistry.listFunction()
+ .map(spark.sessionState.catalog.lookupFunctionInfo).map(_.getClassName)
+ .filterNot(c => whiteList.exists(_.equals(c))).foreach { className =>
+ if (needToCheckNullIntolerant(className)) {
+ val evalExist = checkIfEvalOverrode(className)
+ val nullIntolerantExist = checkIfNullIntolerantMixedIn(className)
+ if (evalExist && nullIntolerantExist) {
+ fail(s"$className should not extend
${classOf[NullIntolerant].getSimpleName}")
+ } else if (!evalExist && !nullIntolerantExist) {
+ fail(s"$className should extend
${classOf[NullIntolerant].getSimpleName}")
+ } else {
+ assert((!evalExist && nullIntolerantExist) || (evalExist &&
!nullIntolerantExist))
+ }
+ }
+ }
+
+ def needToCheckNullIntolerant(className: String): Boolean = {
Review comment:
Could you leave some comments about which type of expressions should be
checked here? Does this check target at non-aggregate SQL exprs?
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
##########
@@ -156,4 +158,74 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
}
}
}
+
+ test("Check whether should extend NullIntolerant") {
+ // Only check expressions extended from these expressions
+ val parentExpressionNames = Seq(classOf[UnaryExpression],
classOf[BinaryExpression],
+ classOf[TernaryExpression], classOf[QuaternaryExpression],
+ classOf[SeptenaryExpression]).map(_.getName)
+ // Do not check these expressions
+ val whiteList = Seq(
+ classOf[IntegralDivide], classOf[Divide], classOf[Remainder],
classOf[Pmod],
+ classOf[CheckOverflow], classOf[NormalizeNaNAndZero], classOf[InSet],
+ classOf[PrintToStderr],
classOf[CodegenFallbackExpression]).map(_.getName)
Review comment:
Could you leave some comments about why these exprs ignored like the
others?
https://github.com/apache/spark/blob/master/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala#L121
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
##########
@@ -156,4 +158,74 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
}
}
}
+
+ test("Check whether should extend NullIntolerant") {
+ // Only check expressions extended from these expressions
+ val parentExpressionNames = Seq(classOf[UnaryExpression],
classOf[BinaryExpression],
+ classOf[TernaryExpression], classOf[QuaternaryExpression],
+ classOf[SeptenaryExpression]).map(_.getName)
+ // Do not check these expressions
+ val whiteList = Seq(
+ classOf[IntegralDivide], classOf[Divide], classOf[Remainder],
classOf[Pmod],
+ classOf[CheckOverflow], classOf[NormalizeNaNAndZero], classOf[InSet],
+ classOf[PrintToStderr],
classOf[CodegenFallbackExpression]).map(_.getName)
+
+ spark.sessionState.functionRegistry.listFunction()
+ .map(spark.sessionState.catalog.lookupFunctionInfo).map(_.getClassName)
+ .filterNot(c => whiteList.exists(_.equals(c))).foreach { className =>
+ if (needToCheckNullIntolerant(className)) {
+ val evalExist = checkIfEvalOverrode(className)
+ val nullIntolerantExist = checkIfNullIntolerantMixedIn(className)
+ if (evalExist && nullIntolerantExist) {
+ fail(s"$className should not extend
${classOf[NullIntolerant].getSimpleName}")
Review comment:
Is this check true? We always cannot mix-in `NullIntolerant` when
overriding `eval`?
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
##########
@@ -156,4 +158,74 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
}
}
}
+
+ test("Check whether should extend NullIntolerant") {
+ // Only check expressions extended from these expressions
+ val parentExpressionNames = Seq(classOf[UnaryExpression],
classOf[BinaryExpression],
+ classOf[TernaryExpression], classOf[QuaternaryExpression],
+ classOf[SeptenaryExpression]).map(_.getName)
+ // Do not check these expressions
+ val whiteList = Seq(
+ classOf[IntegralDivide], classOf[Divide], classOf[Remainder],
classOf[Pmod],
+ classOf[CheckOverflow], classOf[NormalizeNaNAndZero], classOf[InSet],
+ classOf[PrintToStderr],
classOf[CodegenFallbackExpression]).map(_.getName)
+
+ spark.sessionState.functionRegistry.listFunction()
+ .map(spark.sessionState.catalog.lookupFunctionInfo).map(_.getClassName)
+ .filterNot(c => whiteList.exists(_.equals(c))).foreach { className =>
+ if (needToCheckNullIntolerant(className)) {
+ val evalExist = checkIfEvalOverrode(className)
+ val nullIntolerantExist = checkIfNullIntolerantMixedIn(className)
+ if (evalExist && nullIntolerantExist) {
+ fail(s"$className should not extend
${classOf[NullIntolerant].getSimpleName}")
+ } else if (!evalExist && !nullIntolerantExist) {
+ fail(s"$className should extend
${classOf[NullIntolerant].getSimpleName}")
+ } else {
+ assert((!evalExist && nullIntolerantExist) || (evalExist &&
!nullIntolerantExist))
+ }
+ }
+ }
+
+ def needToCheckNullIntolerant(className: String): Boolean = {
+ var clazz: Class[_] = Utils.classForName(className)
+ val isNonSQLExpr =
+
clazz.getInterfaces.exists(_.getName.equals(classOf[NonSQLExpression].getName))
+ var checkNullIntolerant: Boolean = false
+ while (!checkNullIntolerant && clazz.getSuperclass != null) {
+ checkNullIntolerant =
parentExpressionNames.exists(_.equals(clazz.getSuperclass.getName))
+ if (!checkNullIntolerant) {
+ clazz = clazz.getSuperclass
+ }
+ }
+ checkNullIntolerant && !isNonSQLExpr
+ }
+
+ def checkIfNullIntolerantMixedIn(className: String): Boolean = {
+ val nullIntolerantName = classOf[NullIntolerant].getName
+ var clazz: Class[_] = Utils.classForName(className)
+ var nullIntolerantMixedIn = false
+ while (!nullIntolerantMixedIn &&
!parentExpressionNames.exists(_.equals(clazz.getName))) {
Review comment:
It seems the check
`!parentExpressionNames.exists(_.equals(clazz.getName))` is redundant since
`checkIfEvalOverrode` has the same check. Could you filter candidate
expressions in a first section, then check if they extend `NullIntolerant` or
not?
```
val candidateExprsToCheck =
spark.sessionState.functionRegistry.listFunction()
.map(spark.sessionState.catalog.lookupFunctionInfo).map(_.getClassName)
.filter(/* filter candidate exprs here */)
candidateExprsToCheck.foreach { className =>
/* Check if they extend `NullIntolerant` or not */
}
```
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
##########
@@ -156,4 +158,74 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
}
}
}
+
+ test("Check whether should extend NullIntolerant") {
+ // Only check expressions extended from these expressions
+ val parentExpressionNames = Seq(classOf[UnaryExpression],
classOf[BinaryExpression],
+ classOf[TernaryExpression], classOf[QuaternaryExpression],
+ classOf[SeptenaryExpression]).map(_.getName)
+ // Do not check these expressions
+ val whiteList = Seq(
Review comment:
not `whileList` but `blackList`.
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
##########
@@ -156,4 +158,74 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
}
}
}
+
+ test("Check whether should extend NullIntolerant") {
+ // Only check expressions extended from these expressions
+ val parentExpressionNames = Seq(classOf[UnaryExpression],
classOf[BinaryExpression],
Review comment:
nit: How about `parentExpressionNames` -> `exprTypesToCheck`?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]