This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 9d0d9eb  [SPARK-33948][SQL] Fix CodeGen error of MapObjects.doGenCode 
method in Scala 2.13
9d0d9eb is described below

commit 9d0d9eb7d89e21f7c7480a3096ebd92b2ae20802
Author: yangjie01 <[email protected]>
AuthorDate: Tue Jan 5 23:11:23 2021 -0800

    [SPARK-33948][SQL] Fix CodeGen error of MapObjects.doGenCode method in 
Scala 2.13
    
    ### What changes were proposed in this pull request?
    `MapObjects.doGenCode` method will generate wrong code when `inputDataType` 
is `ArrayBuffer`.
    
    For example `encode/decode for Tuple2: (ArrayBuffer[(String, 
String)],ArrayBuffer((a,b))) (codegen path)` in `ExpressionEncoderSuite`, the 
error generated code part as follow:
    
    ```
    /* 126 */   private scala.collection.mutable.ArrayBuffer 
MapObjects_0(InternalRow i) {
    /* 127 */     boolean isNull_4 = i.isNullAt(1);
    /* 128 */     ArrayData value_4 = isNull_4 ?
    /* 129 */     null : (i.getArray(1));
    /* 130 */     scala.collection.mutable.ArrayBuffer value_3 = null;
    /* 131 */
    /* 132 */     if (!isNull_4) {
    /* 133 */
    /* 134 */       int dataLength_0 = value_4.numElements();
    /* 135 */
    /* 136 */       scala.Tuple2[] convertedArray_0 = null;
    /* 137 */       convertedArray_0 = new scala.Tuple2[dataLength_0];
    /* 138 */
    /* 139 */
    /* 140 */       int loopIndex_0 = 0;
    /* 141 */
    /* 142 */       while (loopIndex_0 < dataLength_0) {
    /* 143 */         value_MapObject_lambda_variable_1 = (InternalRow) 
(value_4.getStruct(loopIndex_0, 2));
    /* 144 */         isNull_MapObject_lambda_variable_1 = 
value_4.isNullAt(loopIndex_0);
    /* 145 */
    /* 146 */         boolean isNull_5 = false;
    /* 147 */         scala.Tuple2 value_5 = null;
    /* 148 */         if (!false && isNull_MapObject_lambda_variable_1) {
    /* 149 */
    /* 150 */           isNull_5 = true;
    /* 151 */           value_5 = ((scala.Tuple2)null);
    /* 152 */         } else {
    /* 153 */           scala.Tuple2 value_13 = NewInstance_0(i);
    /* 154 */           isNull_5 = false;
    /* 155 */           value_5 = value_13;
    /* 156 */         }
    /* 157 */         if (isNull_5) {
    /* 158 */           convertedArray_0[loopIndex_0] = null;
    /* 159 */         } else {
    /* 160 */           convertedArray_0[loopIndex_0] = value_5;
    /* 161 */         }
    /* 162 */
    /* 163 */         loopIndex_0 += 1;
    /* 164 */       }
    /* 165 */
    /* 166 */       value_3 = new 
org.apache.spark.sql.catalyst.util.GenericArrayData(convertedArray_0);
    /* 167 */     }
    /* 168 */     globalIsNull_0 = isNull_4;
    /* 169 */     return value_3;
    /* 170 */   }
    
    ```
    
    Line 166 in generated code try to assign `GenericArrayData`  to 
`value_3(ArrayBuffer)` because `ArrayBuffer` type can't match `s.c.i.Seq` 
branch in Scala 2.13 in `MapObjects.doGenCode` method now.
    
    So this pr change to use `s.c.Seq` instead of `Seq` alias to let 
`ArrayBuffer` type can enter  the same branch as Scala 2.12.
    
    After this pr the generate code when `inputDataType` is `ArrayBuffer` as 
follow:
    
    ```
    /* 126 */   private scala.collection.mutable.ArrayBuffer 
MapObjects_0(InternalRow i) {
    /* 127 */     boolean isNull_4 = i.isNullAt(1);
    /* 128 */     ArrayData value_4 = isNull_4 ?
    /* 129 */     null : (i.getArray(1));
    /* 130 */     scala.collection.mutable.ArrayBuffer value_3 = null;
    /* 131 */
    /* 132 */     if (!isNull_4) {
    /* 133 */
    /* 134 */       int dataLength_0 = value_4.numElements();
    /* 135 */
    /* 136 */       scala.collection.mutable.Builder collectionBuilder_0 = 
scala.collection.mutable.ArrayBuffer$.MODULE$.newBuilder();
    /* 137 */       collectionBuilder_0.sizeHint(dataLength_0);
    /* 138 */
    /* 139 */
    /* 140 */       int loopIndex_0 = 0;
    /* 141 */
    /* 142 */       while (loopIndex_0 < dataLength_0) {
    /* 143 */         value_MapObject_lambda_variable_1 = (InternalRow) 
(value_4.getStruct(loopIndex_0, 2));
    /* 144 */         isNull_MapObject_lambda_variable_1 = 
value_4.isNullAt(loopIndex_0);
    /* 145 */
    /* 146 */         boolean isNull_5 = false;
    /* 147 */         scala.Tuple2 value_5 = null;
    /* 148 */         if (!false && isNull_MapObject_lambda_variable_1) {
    /* 149 */
    /* 150 */           isNull_5 = true;
    /* 151 */           value_5 = ((scala.Tuple2)null);
    /* 152 */         } else {
    /* 153 */           scala.Tuple2 value_13 = NewInstance_0(i);
    /* 154 */           isNull_5 = false;
    /* 155 */           value_5 = value_13;
    /* 156 */         }
    /* 157 */         if (isNull_5) {
    /* 158 */           collectionBuilder_0.$plus$eq(null);
    /* 159 */         } else {
    /* 160 */           collectionBuilder_0.$plus$eq(value_5);
    /* 161 */         }
    /* 162 */
    /* 163 */         loopIndex_0 += 1;
    /* 164 */       }
    /* 165 */
    /* 166 */       value_3 = (scala.collection.mutable.ArrayBuffer) 
collectionBuilder_0.result();
    /* 167 */     }
    /* 168 */     globalIsNull_0 = isNull_4;
    /* 169 */     return value_3;
    /* 170 */   }
    ```
    
    ### Why are the changes needed?
    Bug fix in Scala 2.13
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    
    - Pass the Jenkins or GitHub Action
    - Manual test `sql/catalyst` and `sql/core` in Scala 2.13 passed
    
    ```
    mvn clean test -pl sql/catalyst -Pscala-2.13
    
    Run completed in 11 minutes, 23 seconds.
    Total number of tests run: 4711
    Suites: completed 261, aborted 0
    Tests: succeeded 4711, failed 0, canceled 0, ignored 5, pending 0
    All tests passed.
    ```
    
    - Manual cherry-pick this pr to branch 3.1 and  test`sql/catalyst`  in 
Scala 2.13 passed
    
    ```
    mvn clean test -pl sql/catalyst -Pscala-2.13
    
    Run completed in 11 minutes, 18 seconds.
    Total number of tests run: 4655
    Suites: completed 256, aborted 0
    Tests: succeeded 4655, failed 0, canceled 0, ignored 5, pending 0
    ```
    
    Closes #31055 from LuciferYang/SPARK-33948.
    
    Authored-by: yangjie01 <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 45a4ff8e5472ed724b1bba40ce4ee5d314bf72c2)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../org/apache/spark/sql/catalyst/expressions/objects/objects.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 9303df7..f391b31 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -954,7 +954,7 @@ case class MapObjects private(
           } else {
             doCodeGenForScala213
           }
-        case Some(cls) if classOf[Seq[_]].isAssignableFrom(cls) ||
+        case Some(cls) if 
classOf[scala.collection.Seq[_]].isAssignableFrom(cls) ||
           classOf[scala.collection.Set[_]].isAssignableFrom(cls) =>
           // Scala sequence or set
           val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to