kiszk commented on a change in pull request #23171: [SPARK-26205][SQL] Optimize
In for bytes, shorts, ints
URL: https://github.com/apache/spark/pull/23171#discussion_r260899577
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
##########
@@ -241,6 +242,52 @@ class PredicateSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
}
+ test("SPARK-26205: Optimize InSet for bytes, shorts, ints, dates using
switch statements") {
+ val byteValues = Set[Any](1.toByte, 2.toByte, Byte.MinValue, Byte.MaxValue)
+ val shortValues = Set[Any](-10.toShort, 20.toShort, Short.MinValue,
Short.MaxValue)
+ val intValues = Set[Any](20, -100, 30, Int.MinValue, Int.MaxValue)
+ val dateValues = Set[Any](
+ CatalystTypeConverters.convertToCatalyst(Date.valueOf("2017-01-01")),
+ CatalystTypeConverters.convertToCatalyst(Date.valueOf("1950-01-02")))
+
+ def check(presentValue: Expression, absentValue: Expression, values:
Set[Any]): Unit = {
+ require(presentValue.dataType == absentValue.dataType)
+
+ val nullLiteral = Literal(null, presentValue.dataType)
+
+ checkEvaluation(InSet(nullLiteral, values), expected = null)
+ checkEvaluation(InSet(nullLiteral, values + null), expected = null)
+ checkEvaluation(InSet(presentValue, values), expected = true)
+ checkEvaluation(InSet(presentValue, values + null), expected = true)
+ checkEvaluation(InSet(absentValue, values), expected = false)
+ checkEvaluation(InSet(absentValue, values + null), expected = null)
+ }
+
+ def checkAllTypes(): Unit = {
+ check(presentValue = Literal(2.toByte), absentValue = Literal(3.toByte),
byteValues)
+ check(presentValue = Literal(Byte.MinValue), absentValue =
Literal(5.toByte), byteValues)
+ check(presentValue = Literal(20.toShort), absentValue =
Literal(-14.toShort), shortValues)
+ check(presentValue = Literal(Short.MaxValue), absentValue =
Literal(30.toShort), shortValues)
+ check(presentValue = Literal(20), absentValue = Literal(-14), intValues)
+ check(presentValue = Literal(Int.MinValue), absentValue = Literal(2),
intValues)
+ check(
+ presentValue = Literal(Date.valueOf("2017-01-01")),
+ absentValue = Literal(Date.valueOf("2017-01-02")),
+ dateValues)
+ check(
+ presentValue = Literal(Date.valueOf("1950-01-02")),
+ absentValue = Literal(Date.valueOf("2017-10-02")),
+ dateValues)
+ }
+
+ withSQLConf(SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> "0") {
+ checkAllTypes()
+ }
+ withSQLConf(SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> "20") {
+ checkAllTypes()
+ }
+ }
Review comment:
Could you please add a test case that
`spark.sql.optimizer.inSetSwitchThreshold` has maximum value and this
optimization calls `genCodeWithSwitch()`?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]