GitHub user kiszk opened a pull request:

    https://github.com/apache/spark/pull/15745

    [SPARK-18207][SQL] Fix a compilation error due to HashExpression.doGenCode

    ## What changes were proposed in this pull request?
    
    This PR avoids a compilation error due to more than 64KB Java byte code 
size. This error occur since  generate java code for computing a hash value for 
a row is too big. This PR fixes this compilation error by splitting a big code 
chunk into multiple methods by calling `CodegenContext.splitExpression` at 
`HashExpression.doGenCode`
    
    The test case requires a calculation of hash code for a row that includes 
1000 String fields. `HashExpression.doGenCode` generate a lot of Java code for 
this computation into one function. As a result, the size of the corresponding 
Java bytecode is more than 64 KB.
    
    Generated code without this PR
    ````java
    /* 027 */   public UnsafeRow apply(InternalRow i) {
    /* 028 */     boolean isNull = false;
    /* 029 */
    /* 030 */     int value1 = 42;
    /* 031 */
    /* 032 */     boolean isNull2 = i.isNullAt(0);
    /* 033 */     UTF8String value2 = isNull2 ? null : (i.getUTF8String(0));
    /* 034 */     if (!isNull2) {
    /* 035 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value2.getBaseObject(),
 value2.getBaseOffset(), value2.numBytes(), value1);
    /* 036 */     }
    /* 037 */
    /* 038 */
    /* 039 */     boolean isNull3 = i.isNullAt(1);
    /* 040 */     UTF8String value3 = isNull3 ? null : (i.getUTF8String(1));
    /* 041 */     if (!isNull3) {
    /* 042 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value3.getBaseObject(),
 value3.getBaseOffset(), value3.numBytes(), value1);
    /* 043 */     }
    /* 044 */
    /* 045 */
    ...
    /* 7024 */
    /* 7025 */     boolean isNull1001 = i.isNullAt(999);
    /* 7026 */     UTF8String value1001 = isNull1001 ? null : 
(i.getUTF8String(999));
    /* 7027 */     if (!isNull1001) {
    /* 7028 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1001.getBaseObject(),
 value1001.getBaseOffset(), value1001.numBytes(), value1);
    /* 7029 */     }
    /* 7030 */
    /* 7031 */
    /* 7032 */     boolean isNull1002 = i.isNullAt(1000);
    /* 7033 */     UTF8String value1002 = isNull1002 ? null : 
(i.getUTF8String(1000));
    /* 7034 */     if (!isNull1002) {
    /* 7035 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1002.getBaseObject(),
 value1002.getBaseOffset(), value1002.numBytes(), value1);
    /* 7036 */     }
    ````
    
    Generated code with this PR
    ````java
    /* 3807 */   private void apply_249(InternalRow i) {
    /* 3808 */
    /* 3809 */     boolean isNull998 = i.isNullAt(996);
    /* 3810 */     UTF8String value998 = isNull998 ? null : 
(i.getUTF8String(996));
    /* 3811 */     if (!isNull998) {
    /* 3812 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value998.getBaseObject(),
 value998.getBaseOffset(), value998.numBytes(), value1);
    /* 3813 */     }
    /* 3814 */
    /* 3815 */     boolean isNull999 = i.isNullAt(997);
    /* 3816 */     UTF8String value999 = isNull999 ? null : 
(i.getUTF8String(997));
    /* 3817 */     if (!isNull999) {
    /* 3818 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value999.getBaseObject(),
 value999.getBaseOffset(), value999.numBytes(), value1);
    /* 3819 */     }
    /* 3820 */
    /* 3821 */     boolean isNull1000 = i.isNullAt(998);
    /* 3822 */     UTF8String value1000 = isNull1000 ? null : 
(i.getUTF8String(998));
    /* 3823 */     if (!isNull1000) {
    /* 3824 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1000.getBaseObject(),
 value1000.getBaseOffset(), value1000.numBytes(), value1);
    /* 3825 */     }
    /* 3826 */
    /* 3827 */     boolean isNull1001 = i.isNullAt(999);
    /* 3828 */     UTF8String value1001 = isNull1001 ? null : 
(i.getUTF8String(999));
    /* 3829 */     if (!isNull1001) {
    /* 3830 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1001.getBaseObject(),
 value1001.getBaseOffset(), value1001.numBytes(), value1);
    /* 3831 */     }
    /* 3832 */
    /* 3833 */   }
    /* 3834 */
    ...
    /* 4532 */   private void apply_0(InternalRow i) {
    /* 4533 */
    /* 4534 */     boolean isNull2 = i.isNullAt(0);
    /* 4535 */     UTF8String value2 = isNull2 ? null : (i.getUTF8String(0));
    /* 4536 */     if (!isNull2) {
    /* 4537 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value2.getBaseObject(),
 value2.getBaseOffset(), value2.numBytes(), value1);
    /* 4538 */     }
    /* 4539 */
    /* 4540 */     boolean isNull3 = i.isNullAt(1);
    /* 4541 */     UTF8String value3 = isNull3 ? null : (i.getUTF8String(1));
    /* 4542 */     if (!isNull3) {
    /* 4543 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value3.getBaseObject(),
 value3.getBaseOffset(), value3.numBytes(), value1);
    /* 4544 */     }
    /* 4545 */
    /* 4546 */     boolean isNull4 = i.isNullAt(2);
    /* 4547 */     UTF8String value4 = isNull4 ? null : (i.getUTF8String(2));
    /* 4548 */     if (!isNull4) {
    /* 4549 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value4.getBaseObject(),
 value4.getBaseOffset(), value4.numBytes(), value1);
    /* 4550 */     }
    /* 4551 */
    /* 4552 */     boolean isNull5 = i.isNullAt(3);
    /* 4553 */     UTF8String value5 = isNull5 ? null : (i.getUTF8String(3));
    /* 4554 */     if (!isNull5) {
    /* 4555 */       value1 = 
org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value5.getBaseObject(),
 value5.getBaseOffset(), value5.numBytes(), value1);
    /* 4556 */     }
    /* 4557 */
    /* 4558 */   }
    ...
    /* 7344 */   public UnsafeRow apply(InternalRow i) {
    /* 7345 */     boolean isNull = false;
    /* 7346 */
    /* 7347 */     value1 = 42;
    /* 7348 */     apply_0(i);
    /* 7349 */     apply_1(i);
    ...
    /* 7596 */     apply_248(i);
    /* 7597 */     apply_249(i);
    /* 7598 */     apply_250(i);
    /* 7599 */     apply_251(i);
    ...
    ````
    
    
    ## How was this patch tested?
    
    Add a new test in `DataFrameSuite`


You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/kiszk/spark SPARK-18207

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/spark/pull/15745.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #15745
    
----
commit d61b735bd62ca8cbbdd3a383297d954ce5f72641
Author: Kazuaki Ishizaki <[email protected]>
Date:   2016-11-03T02:16:56Z

    initial commit

----


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to