Github user dbtsai commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21850#discussion_r204933164
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
 ---
    @@ -414,6 +414,9 @@ object SimplifyConditionals extends Rule[LogicalPlan] 
with PredicateHelper {
             // these branches can be pruned away
             val (h, t) = branches.span(_._1 != TrueLiteral)
             CaseWhen( h :+ t.head, None)
    +
    +      case CaseWhen((cond, branchValue) :: Nil, elseValue) =>
    +        If(cond, branchValue, elseValue.getOrElse(Literal(null, 
branchValue.dataType)))
    --- End diff --
    
    Before:
    
    ```
    == Parsed Logical Plan ==
    'Project [CASE WHEN isnull('a) THEN 1 END AS col1#181]
    +- 'UnresolvedRelation
    
    == Optimized Logical Plan ==
    Project [CASE WHEN isnull(a#182) THEN 1 END AS col1#181]
    +- Relation[a#182] parquet
    ```
    Generated Java code
    
    ```java
    /* 043 */   protected void processNext() throws java.io.IOException {
    /* 044 */     if (scan_mutableStateArray_1[0] == null) {
    /* 045 */       scan_nextBatch_0();
    /* 046 */     }
    /* 047 */     while (scan_mutableStateArray_1[0] != null) {
    /* 048 */       int scan_numRows_0 = scan_mutableStateArray_1[0].numRows();
    /* 049 */       int scan_localEnd_0 = scan_numRows_0 - scan_batchIdx_0;
    /* 050 */       for (int scan_localIdx_0 = 0; scan_localIdx_0 < 
scan_localEnd_0; scan_localIdx_0++) {
    /* 051 */         int scan_rowIdx_0 = scan_batchIdx_0 + scan_localIdx_0;
    /* 052 */         byte project_caseWhenResultState_0 = -1;
    /* 053 */         do {
    /* 054 */           boolean scan_isNull_0 = 
scan_mutableStateArray_2[0].isNullAt(scan_rowIdx_0);
    /* 055 */           int scan_value_0 = scan_isNull_0 ? -1 : 
(scan_mutableStateArray_2[0].getInt(scan_rowIdx_0));
    /* 056 */           if (!false && scan_isNull_0) {
    /* 057 */             project_caseWhenResultState_0 = (byte)(false ? 1 : 0);
    /* 058 */             project_project_value_0_0 = 1;
    /* 059 */             continue;
    /* 060 */           }
    /* 061 */
    /* 062 */         } while (false);
    /* 063 */         // TRUE if any condition is met and the result is null, 
or no any condition is met.
    /* 064 */         final boolean project_isNull_0 = 
(project_caseWhenResultState_0 != 0);
    /* 065 */         scan_mutableStateArray_3[1].reset();
    /* 066 */
    /* 067 */         scan_mutableStateArray_3[1].zeroOutNullBytes();
    /* 068 */
    /* 069 */         if (project_isNull_0) {
    /* 070 */           scan_mutableStateArray_3[1].setNullAt(0);
    /* 071 */         } else {
    /* 072 */           scan_mutableStateArray_3[1].write(0, 
project_project_value_0_0);
    /* 073 */         }
    /* 074 */         append((scan_mutableStateArray_3[1].getRow()));
    /* 075 */         if (shouldStop()) { scan_batchIdx_0 = scan_rowIdx_0 + 1; 
return; }
    /* 076 */       }
    /* 077 */       scan_batchIdx_0 = scan_numRows_0;
    /* 078 */       scan_mutableStateArray_1[0] = null;
    /* 079 */       scan_nextBatch_0();
    /* 080 */     }
    /* 081 */     ((org.apache.spark.sql.execution.metric.SQLMetric) 
references[1] /* scanTime */).add(scan_scanTime_0 / (1000 * 1000));
    /* 082 */     scan_scanTime_0 = 0;
    /* 083 */   }
    ```
    
    After:
    
    ```
    == Parsed Logical Plan ==
    'Project [CASE WHEN isnull('a) THEN 1 END AS b#186]
    +- 'UnresolvedRelation `tddddd`
    
    == Optimized Logical Plan ==
    Project [if (isnull(a#187)) 1 else null AS b#186]
    +- Relation[a#187,b#188] parquet
    ```
    
    Generated Java code:
    
    ```java
    /* 042 */   protected void processNext() throws java.io.IOException {
    /* 043 */     if (scan_mutableStateArray_1[0] == null) {
    /* 044 */       scan_nextBatch_0();
    /* 045 */     }
    /* 046 */     while (scan_mutableStateArray_1[0] != null) {
    /* 047 */       int scan_numRows_0 = scan_mutableStateArray_1[0].numRows();
    /* 048 */       int scan_localEnd_0 = scan_numRows_0 - scan_batchIdx_0;
    /* 049 */       for (int scan_localIdx_0 = 0; scan_localIdx_0 < 
scan_localEnd_0; scan_localIdx_0++) {
    /* 050 */         int scan_rowIdx_0 = scan_batchIdx_0 + scan_localIdx_0;
    /* 051 */         boolean scan_isNull_0 = 
scan_mutableStateArray_2[0].isNullAt(scan_rowIdx_0);
    /* 052 */         int scan_value_0 = scan_isNull_0 ? -1 : 
(scan_mutableStateArray_2[0].getInt(scan_rowIdx_0));
    /* 053 */         boolean project_isNull_0 = false;
    /* 054 */         int project_value_0 = -1;
    /* 055 */         if (!false && scan_isNull_0) {
    /* 056 */           project_isNull_0 = false;
    /* 057 */           project_value_0 = 1;
    /* 058 */         } else {
    /* 059 */           project_isNull_0 = true;
    /* 060 */           project_value_0 = -1;
    /* 061 */         }
    /* 062 */         scan_mutableStateArray_3[1].reset();
    /* 063 */
    /* 064 */         scan_mutableStateArray_3[1].zeroOutNullBytes();
    /* 065 */
    /* 066 */         if (project_isNull_0) {
    /* 067 */           scan_mutableStateArray_3[1].setNullAt(0);
    /* 068 */         } else {
    /* 069 */           scan_mutableStateArray_3[1].write(0, project_value_0);
    /* 070 */         }
    /* 071 */         append((scan_mutableStateArray_3[1].getRow()));
    /* 072 */         if (shouldStop()) { scan_batchIdx_0 = scan_rowIdx_0 + 1; 
return; }
    /* 073 */       }
    /* 074 */       scan_batchIdx_0 = scan_numRows_0;
    /* 075 */       scan_mutableStateArray_1[0] = null;
    /* 076 */       scan_nextBatch_0();
    /* 077 */     }
    /* 078 */     ((org.apache.spark.sql.execution.metric.SQLMetric) 
references[1] /* scanTime */).add(scan_scanTime_0 / (1000 * 1000));
    /* 079 */     scan_scanTime_0 = 0;
    /* 080 */   }
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to