[GitHub] spark pull request #22976: [SPARK-25974][SQL]Optimizes Generates bytecode fo...

heary-cao Thu, 08 Nov 2018 02:40:38 -0800

GitHub user heary-cao opened a pull request:

    https://github.com/apache/spark/pull/22976


    [SPARK-25974][SQL]Optimizes Generates bytecode for ordering based on the 
given order

    ## What changes were proposed in this pull request?
    
    Currently, when generates the code for ordering based on the given order, 
too many variables and assignment statements will be generated, which is not 
necessary. This PR will eliminate redundant variables. Optimizes Generates 
bytecode for ordering based on the given order.
    The generated code looks like:
    
    spark.range(1).selectExpr(
         "id as key",
         "(id & 1023) as value1",
    "cast(id & 1023 as double) as value2",
    "cast(id & 1023 as int) as value3"
    ).select("value1", "value2", "value3").orderBy("value1", "value2").collect()
    
    
    before PR(codegen size: 178)
    
    Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1, 
double, false] ASC NULLS FIRST:
    /* 001 */ public SpecificOrdering generate(Object[] references) {
    /* 002 */   return new SpecificOrdering(references);
    /* 003 */ }
    /* 004 */
    /* 005 */ class SpecificOrdering extends 
org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering {
    /* 006 */
    /* 007 */   private Object[] references;
    /* 008 */
    /* 009 */
    /* 010 */   public SpecificOrdering(Object[] references) {
    /* 011 */     this.references = references;
    /* 012 */
    /* 013 */   }
    /* 014 */
    /* 015 */   public int compare(InternalRow a, InternalRow b) {
    /* 016 */
    /* 017 */     InternalRow i = null;
    /* 018 */
    /* 019 */     i = a;
    /* 020 */     boolean isNullA_0;
    /* 021 */     long primitiveA_0;
    /* 022 */     {
    /* 023 */       long value_0 = i.getLong(0);
    /* 024 */       isNullA_0 = false;
    /* 025 */       primitiveA_0 = value_0;
    /* 026 */     }
    /* 027 */     i = b;
    /* 028 */     boolean isNullB_0;
    /* 029 */     long primitiveB_0;
    /* 030 */     {
    /* 031 */       long value_0 = i.getLong(0);
    /* 032 */       isNullB_0 = false;
    /* 033 */       primitiveB_0 = value_0;
    /* 034 */     }
    /* 035 */     if (isNullA_0 && isNullB_0) {
    /* 036 */       // Nothing
    /* 037 */     } else if (isNullA_0) {
    /* 038 */       return -1;
    /* 039 */     } else if (isNullB_0) {
    /* 040 */       return 1;
    /* 041 */     } else {
    /* 042 */       int comp = (primitiveA_0 > primitiveB_0 ? 1 : primitiveA_0 
< primitiveB_0 ? -1 : 0);
    /* 043 */       if (comp != 0) {
    /* 044 */         return comp;
    /* 045 */       }
    /* 046 */     }
    /* 047 */
    /* 048 */     i = a;
    /* 049 */     boolean isNullA_1;
    /* 050 */     double primitiveA_1;
    /* 051 */     {
    /* 052 */       double value_1 = i.getDouble(1);
    /* 053 */       isNullA_1 = false;
    /* 054 */       primitiveA_1 = value_1;
    /* 055 */     }
    /* 056 */     i = b;
    /* 057 */     boolean isNullB_1;
    /* 058 */     double primitiveB_1;
    /* 059 */     {
    /* 060 */       double value_1 = i.getDouble(1);
    /* 061 */       isNullB_1 = false;
    /* 062 */       primitiveB_1 = value_1;
    /* 063 */     }
    /* 064 */     if (isNullA_1 && isNullB_1) {
    /* 065 */       // Nothing
    /* 066 */     } else if (isNullA_1) {
    /* 067 */       return -1;
    /* 068 */     } else if (isNullB_1) {
    /* 069 */       return 1;
    /* 070 */     } else {
    /* 071 */       int comp = 
org.apache.spark.util.Utils.nanSafeCompareDoubles(primitiveA_1, primitiveB_1);
    /* 072 */       if (comp != 0) {
    /* 073 */         return comp;
    /* 074 */       }
    /* 075 */     }
    /* 076 */
    /* 077 */
    /* 078 */     return 0;
    /* 079 */   }
    /* 080 */
    /* 081 */
    /* 082 */ }
    
    After PR(codegen size: 89)
    Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1, 
double, false] ASC NULLS FIRST:
    /* 001 */ public SpecificOrdering generate(Object[] references) {
    /* 002 */   return new SpecificOrdering(references);
    /* 003 */ }
    /* 004 */
    /* 005 */ class SpecificOrdering extends 
org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering {
    /* 006 */
    /* 007 */   private Object[] references;
    /* 008 */
    /* 009 */
    /* 010 */   public SpecificOrdering(Object[] references) {
    /* 011 */     this.references = references;
    /* 012 */
    /* 013 */   }
    /* 014 */
    /* 015 */   public int compare(InternalRow a, InternalRow b) {
    /* 016 */
    /* 017 */
    /* 018 */     long value_0 = a.getLong(0);
    /* 019 */     long value_2 = b.getLong(0);
    /* 020 */     if (false && false) {
    /* 021 */       // Nothing
    /* 022 */     } else if (false) {
    /* 023 */       return -1;
    /* 024 */     } else if (false) {
    /* 025 */       return 1;
    /* 026 */     } else {
    /* 027 */       int comp = (value_0 > value_2 ? 1 : value_0 < value_2 ? -1 
: 0);
    /* 028 */       if (comp != 0) {
    /* 029 */         return comp;
    /* 030 */       }
    /* 031 */     }
    /* 032 */
    /* 033 */     double value_1 = a.getDouble(1);
    /* 034 */     double value_3 = b.getDouble(1);
    /* 035 */     if (false && false) {
    /* 036 */       // Nothing
    /* 037 */     } else if (false) {
    /* 038 */       return -1;
    /* 039 */     } else if (false) {
    /* 040 */       return 1;
    /* 041 */     } else {
    /* 042 */       int comp = 
org.apache.spark.util.Utils.nanSafeCompareDoubles(value_1, value_3);
    /* 043 */       if (comp != 0) {
    /* 044 */         return comp;
    /* 045 */       }
    /* 046 */     }
    /* 047 */
    /* 048 */
    /* 049 */     return 0;
    /* 050 */   }
    /* 051 */
    /* 052 */
    /* 053 */ }
    
    ## How was this patch tested?
    
    the existed test cases.


You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/heary-cao/spark GenArrayData

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/spark/pull/22976.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #22976
    
----
commit a42c8a70c68abed0a553b1e1232add4bbae079bb
Author: caoxuewen <cao.xuewen@...>
Date:   2018-11-08T10:24:17Z

    Optimizes Generates bytecode for ordering based on the given order

----


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #22976: [SPARK-25974][SQL]Optimizes Generates bytecode fo...

Reply via email to