aokolnychyi commented on a change in pull request #23171: [SPARK-26205][SQL]
Optimize InSet Expression for bytes, shorts, ints, dates
URL: https://github.com/apache/spark/pull/23171#discussion_r261241471
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
##########
@@ -241,6 +242,52 @@ class PredicateSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
}
+ test("SPARK-26205: Optimize InSet for bytes, shorts, ints, dates using
switch statements") {
+ val byteValues = Set[Any](1.toByte, 2.toByte, Byte.MinValue, Byte.MaxValue)
+ val shortValues = Set[Any](-10.toShort, 20.toShort, Short.MinValue,
Short.MaxValue)
+ val intValues = Set[Any](20, -100, 30, Int.MinValue, Int.MaxValue)
+ val dateValues = Set[Any](
+ CatalystTypeConverters.convertToCatalyst(Date.valueOf("2017-01-01")),
+ CatalystTypeConverters.convertToCatalyst(Date.valueOf("1950-01-02")))
+
+ def check(presentValue: Expression, absentValue: Expression, values:
Set[Any]): Unit = {
+ require(presentValue.dataType == absentValue.dataType)
+
+ val nullLiteral = Literal(null, presentValue.dataType)
+
+ checkEvaluation(InSet(nullLiteral, values), expected = null)
+ checkEvaluation(InSet(nullLiteral, values + null), expected = null)
+ checkEvaluation(InSet(presentValue, values), expected = true)
+ checkEvaluation(InSet(presentValue, values + null), expected = true)
+ checkEvaluation(InSet(absentValue, values), expected = false)
+ checkEvaluation(InSet(absentValue, values + null), expected = null)
+ }
+
+ def checkAllTypes(): Unit = {
+ check(presentValue = Literal(2.toByte), absentValue = Literal(3.toByte),
byteValues)
+ check(presentValue = Literal(Byte.MinValue), absentValue =
Literal(5.toByte), byteValues)
+ check(presentValue = Literal(20.toShort), absentValue =
Literal(-14.toShort), shortValues)
+ check(presentValue = Literal(Short.MaxValue), absentValue =
Literal(30.toShort), shortValues)
+ check(presentValue = Literal(20), absentValue = Literal(-14), intValues)
+ check(presentValue = Literal(Int.MinValue), absentValue = Literal(2),
intValues)
+ check(
+ presentValue = Literal(Date.valueOf("2017-01-01")),
+ absentValue = Literal(Date.valueOf("2017-01-02")),
+ dateValues)
+ check(
+ presentValue = Literal(Date.valueOf("1950-01-02")),
+ absentValue = Literal(Date.valueOf("2017-10-02")),
+ dateValues)
+ }
+
+ withSQLConf(SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> "0") {
+ checkAllTypes()
+ }
+ withSQLConf(SQLConf.OPTIMIZER_INSET_SWITCH_THRESHOLD.key -> "20") {
+ checkAllTypes()
+ }
+ }
Review comment:
Do you mean testing that if the set size is 100 and `
spark.sql.optimizer.inSetSwitchThreshold` is 100, then `genCodeWithSwitch`
is still applied?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]