maropu commented on issue #26294: [SPARK-28477] [SQL] Rewrite CaseWhen with 
single branch to If
URL: https://github.com/apache/spark/pull/26294#issuecomment-548188835
 
 
   I tried in the current master though, I couldn't see much difference... (do 
I miss something?);
   ```
   $ java -version
   java version "1.8.0_181"
   Java(TM) SE Runtime Environment (build 1.8.0_181-b13)
   Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)
   $ ./bin/spark-shell --master=local[1] --conf spark.driver.memory=8g --conf 
spark.sql.shuffle.partitions=1 -v
   
   scala> val df = spark.range(10000000000L)
   scala> val whenVer = df.withColumn("r", when($"id" % lit(2) === lit(0), 
lit(1)).otherwise(lit(0))).agg(sum($"r"))
   scala> val ifVer = df.withColumn("r", expr("if(id % 2 = 0, 1, 
0)")).agg(sum($"r"))
   
   scala> timer { whenVer.write.format("noop").mode("overwrite").save() }
   Elapsed time: 15.130525497s                                                  
   
   
   scala> timer { ifVer.write.format("noop").mode("overwrite").save() }
   Elapsed time: 13.953640182s  
   ```
   ```
   // Benchmarks w/o aggregates
   scala> val df = spark.range(400000000L)
   scala> val whenVer = df.withColumn("r", when($"id" % lit(2) === lit(0), 
lit(1)).otherwise(lit(0)))
   scala> val ifVer = df.withColumn("r", expr("if(id % 2 = 0, 1, 0)"))
   
   scala> timer { whenVer.write.format("noop").mode("overwrite").save() }
   Elapsed time: 8.498007927s                                                   
   
   
   scala> timer { ifVer.write.format("noop").mode("overwrite").save() }
   Elapsed time: 8.375774959s                                                   
   
   ```
   FYI: the generated code for them;
   ```
   // whenVer
   ...
   /* 035 */     byte project_caseWhenResultState_0 = -1;
   /* 036 */     do {
   /* 037 */       boolean project_isNull_3 = true;
   /* 038 */       boolean project_value_3 = false;
   /* 039 */       boolean project_isNull_4 = false;
   /* 040 */       long project_value_4 = -1L;
   /* 041 */       if (2L == 0) {
   /* 042 */         project_isNull_4 = true;
   /* 043 */       } else {
   /* 044 */         project_value_4 = (long)(project_expr_0_0 % 2L);
   /* 045 */       }
   /* 046 */       if (!project_isNull_4) {
   /* 047 */         project_isNull_3 = false; // resultCode could change 
nullability.
   /* 048 */         project_value_3 = project_value_4 == 0L;
   /* 049 */
   /* 050 */       }
   /* 051 */       if (!project_isNull_3 && project_value_3) {
   /* 052 */         project_caseWhenResultState_0 = (byte)(false ? 1 : 0);
   /* 053 */         project_project_value_2_0 = 1;
   /* 054 */         continue;
   /* 055 */       }
   /* 056 */
   /* 057 */       project_caseWhenResultState_0 = (byte)(false ? 1 : 0);
   /* 058 */       project_project_value_2_0 = 0;
   /* 059 */
   /* 060 */     } while (false);
   /* 061 */     // TRUE if any condition is met and the result is null, or no 
any condition is met.
   /* 062 */     final boolean project_isNull_2 = 
(project_caseWhenResultState_0 != 0);
   ...
   
   // ifVer
   ...
   /* 034 */     boolean project_isNull_3 = true;
   /* 035 */     boolean project_value_3 = false;
   /* 036 */     boolean project_isNull_4 = false;
   /* 037 */     long project_value_4 = -1L;
   /* 038 */     if (2L == 0) {
   /* 039 */       project_isNull_4 = true;
   /* 040 */     } else {
   /* 041 */       project_value_4 = (long)(project_expr_0_0 % 2L);
   /* 042 */     }
   /* 043 */     if (!project_isNull_4) {
   /* 044 */       project_isNull_3 = false; // resultCode could change 
nullability.
   /* 045 */       project_value_3 = project_value_4 == 0L;
   /* 046 */
   /* 047 */     }
   /* 048 */     boolean project_isNull_2 = false;
   /* 049 */     int project_value_2 = -1;
   /* 050 */     if (!project_isNull_3 && project_value_3) {
   /* 051 */       project_isNull_2 = false;
   /* 052 */       project_value_2 = 1;
   /* 053 */     } else {
   /* 054 */       project_isNull_2 = false;
   /* 055 */       project_value_2 = 0;
   /* 056 */     }
   ...
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to