maropu commented on issue #26294: [SPARK-28477] [SQL] Rewrite CaseWhen with single branch to If URL: https://github.com/apache/spark/pull/26294#issuecomment-548188835 I tried in the current master though, I couldn't see much difference... (do I miss something?); ``` $ java -version java version "1.8.0_181" Java(TM) SE Runtime Environment (build 1.8.0_181-b13) Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode) $ ./bin/spark-shell --master=local[1] --conf spark.driver.memory=8g --conf spark.sql.shuffle.partitions=1 -v scala> val df = spark.range(10000000000L) scala> val whenVer = df.withColumn("r", when($"id" % lit(2) === lit(0), lit(1)).otherwise(lit(0))).agg(sum($"r")) scala> val ifVer = df.withColumn("r", expr("if(id % 2 = 0, 1, 0)")).agg(sum($"r")) scala> timer { whenVer.write.format("noop").mode("overwrite").save() } Elapsed time: 15.130525497s scala> timer { ifVer.write.format("noop").mode("overwrite").save() } Elapsed time: 13.953640182s ``` ``` // Benchmarks w/o aggregates scala> val df = spark.range(400000000L) scala> val whenVer = df.withColumn("r", when($"id" % lit(2) === lit(0), lit(1)).otherwise(lit(0))) scala> val ifVer = df.withColumn("r", expr("if(id % 2 = 0, 1, 0)")) scala> timer { whenVer.write.format("noop").mode("overwrite").save() } Elapsed time: 8.498007927s scala> timer { ifVer.write.format("noop").mode("overwrite").save() } Elapsed time: 8.375774959s ``` FYI: the generated code for them; ``` // whenVer ... /* 035 */ byte project_caseWhenResultState_0 = -1; /* 036 */ do { /* 037 */ boolean project_isNull_3 = true; /* 038 */ boolean project_value_3 = false; /* 039 */ boolean project_isNull_4 = false; /* 040 */ long project_value_4 = -1L; /* 041 */ if (2L == 0) { /* 042 */ project_isNull_4 = true; /* 043 */ } else { /* 044 */ project_value_4 = (long)(project_expr_0_0 % 2L); /* 045 */ } /* 046 */ if (!project_isNull_4) { /* 047 */ project_isNull_3 = false; // resultCode could change nullability. /* 048 */ project_value_3 = project_value_4 == 0L; /* 049 */ /* 050 */ } /* 051 */ if (!project_isNull_3 && project_value_3) { /* 052 */ project_caseWhenResultState_0 = (byte)(false ? 1 : 0); /* 053 */ project_project_value_2_0 = 1; /* 054 */ continue; /* 055 */ } /* 056 */ /* 057 */ project_caseWhenResultState_0 = (byte)(false ? 1 : 0); /* 058 */ project_project_value_2_0 = 0; /* 059 */ /* 060 */ } while (false); /* 061 */ // TRUE if any condition is met and the result is null, or no any condition is met. /* 062 */ final boolean project_isNull_2 = (project_caseWhenResultState_0 != 0); ... // ifVer ... /* 034 */ boolean project_isNull_3 = true; /* 035 */ boolean project_value_3 = false; /* 036 */ boolean project_isNull_4 = false; /* 037 */ long project_value_4 = -1L; /* 038 */ if (2L == 0) { /* 039 */ project_isNull_4 = true; /* 040 */ } else { /* 041 */ project_value_4 = (long)(project_expr_0_0 % 2L); /* 042 */ } /* 043 */ if (!project_isNull_4) { /* 044 */ project_isNull_3 = false; // resultCode could change nullability. /* 045 */ project_value_3 = project_value_4 == 0L; /* 046 */ /* 047 */ } /* 048 */ boolean project_isNull_2 = false; /* 049 */ int project_value_2 = -1; /* 050 */ if (!project_isNull_3 && project_value_3) { /* 051 */ project_isNull_2 = false; /* 052 */ project_value_2 = 1; /* 053 */ } else { /* 054 */ project_isNull_2 = false; /* 055 */ project_value_2 = 0; /* 056 */ } ... ```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
