[
https://issues.apache.org/jira/browse/SPARK-25974?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16679581#comment-16679581
]
Apache Spark commented on SPARK-25974:
--------------------------------------
User 'heary-cao' has created a pull request for this issue:
https://github.com/apache/spark/pull/22976
> Optimizes Generates bytecode for ordering based on the given order
> ------------------------------------------------------------------
>
> Key: SPARK-25974
> URL: https://issues.apache.org/jira/browse/SPARK-25974
> Project: Spark
> Issue Type: Improvement
> Components: SQL
> Affects Versions: 2.4.1
> Reporter: caoxuewen
> Priority: Major
>
> Currently, when generates the code for ordering based on the given order, too
> many variables and assignment statements will be generated, which is not
> necessary. This PR will eliminate redundant variables. Optimizes Generates
> bytecode for ordering based on the given order.
> The generated code looks like:
> spark.range(1).selectExpr(
> "id as key",
> "(id & 1023) as value1",
> "cast(id & 1023 as double) as value2",
> "cast(id & 1023 as int) as value3"
> ).select("value1", "value2", "value3").orderBy("value1", "value2").collect()
> before PR(codegen size: 178)
> Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1,
> double, false] ASC NULLS FIRST:
> /* 001 */ public SpecificOrdering generate(Object[] references) {
> /* 002 */ return new SpecificOrdering(references);
> /* 003 */ }
> /* 004 */
> /* 005 */ class SpecificOrdering extends
> org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering {
> /* 006 */
> /* 007 */ private Object[] references;
> /* 008 */
> /* 009 */
> /* 010 */ public SpecificOrdering(Object[] references) {
> /* 011 */ this.references = references;
> /* 012 */
> /* 013 */ }
> /* 014 */
> /* 015 */ public int compare(InternalRow a, InternalRow b) {
> /* 016 */
> /* 017 */ InternalRow i = null;
> /* 018 */
> /* 019 */ i = a;
> /* 020 */ boolean isNullA_0;
> /* 021 */ long primitiveA_0;
> /* 022 */ {
> /* 023 */ long value_0 = i.getLong(0);
> /* 024 */ isNullA_0 = false;
> /* 025 */ primitiveA_0 = value_0;
> /* 026 */ }
> /* 027 */ i = b;
> /* 028 */ boolean isNullB_0;
> /* 029 */ long primitiveB_0;
> /* 030 */ {
> /* 031 */ long value_0 = i.getLong(0);
> /* 032 */ isNullB_0 = false;
> /* 033 */ primitiveB_0 = value_0;
> /* 034 */ }
> /* 035 */ if (isNullA_0 && isNullB_0) {
> /* 036 */ // Nothing
> /* 037 */ } else if (isNullA_0) {
> /* 038 */ return -1;
> /* 039 */ } else if (isNullB_0) {
> /* 040 */ return 1;
> /* 041 */ } else {
> /* 042 */ int comp = (primitiveA_0 > primitiveB_0 ? 1 : primitiveA_0 <
> primitiveB_0 ? -1 : 0);
> /* 043 */ if (comp != 0) {
> /* 044 */ return comp;
> /* 045 */ }
> /* 046 */ }
> /* 047 */
> /* 048 */ i = a;
> /* 049 */ boolean isNullA_1;
> /* 050 */ double primitiveA_1;
> /* 051 */ {
> /* 052 */ double value_1 = i.getDouble(1);
> /* 053 */ isNullA_1 = false;
> /* 054 */ primitiveA_1 = value_1;
> /* 055 */ }
> /* 056 */ i = b;
> /* 057 */ boolean isNullB_1;
> /* 058 */ double primitiveB_1;
> /* 059 */ {
> /* 060 */ double value_1 = i.getDouble(1);
> /* 061 */ isNullB_1 = false;
> /* 062 */ primitiveB_1 = value_1;
> /* 063 */ }
> /* 064 */ if (isNullA_1 && isNullB_1) {
> /* 065 */ // Nothing
> /* 066 */ } else if (isNullA_1) {
> /* 067 */ return -1;
> /* 068 */ } else if (isNullB_1) {
> /* 069 */ return 1;
> /* 070 */ } else {
> /* 071 */ int comp =
> org.apache.spark.util.Utils.nanSafeCompareDoubles(primitiveA_1, primitiveB_1);
> /* 072 */ if (comp != 0) {
> /* 073 */ return comp;
> /* 074 */ }
> /* 075 */ }
> /* 076 */
> /* 077 */
> /* 078 */ return 0;
> /* 079 */ }
> /* 080 */
> /* 081 */
> /* 082 */ }
> After PR(codegen size: 89)
> Generated Ordering by input[0, bigint, false] ASC NULLS FIRST,input[1,
> double, false] ASC NULLS FIRST:
> /* 001 */ public SpecificOrdering generate(Object[] references) {
> /* 002 */ return new SpecificOrdering(references);
> /* 003 */ }
> /* 004 */
> /* 005 */ class SpecificOrdering extends
> org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering {
> /* 006 */
> /* 007 */ private Object[] references;
> /* 008 */
> /* 009 */
> /* 010 */ public SpecificOrdering(Object[] references) {
> /* 011 */ this.references = references;
> /* 012 */
> /* 013 */ }
> /* 014 */
> /* 015 */ public int compare(InternalRow a, InternalRow b) {
> /* 016 */
> /* 017 */
> /* 018 */ long value_0 = a.getLong(0);
> /* 019 */ long value_2 = b.getLong(0);
> /* 020 */ if (false && false) {
> /* 021 */ // Nothing
> /* 022 */ } else if (false) {
> /* 023 */ return -1;
> /* 024 */ } else if (false) {
> /* 025 */ return 1;
> /* 026 */ } else {
> /* 027 */ int comp = (value_0 > value_2 ? 1 : value_0 < value_2 ? -1 :
> 0);
> /* 028 */ if (comp != 0) {
> /* 029 */ return comp;
> /* 030 */ }
> /* 031 */ }
> /* 032 */
> /* 033 */ double value_1 = a.getDouble(1);
> /* 034 */ double value_3 = b.getDouble(1);
> /* 035 */ if (false && false) {
> /* 036 */ // Nothing
> /* 037 */ } else if (false) {
> /* 038 */ return -1;
> /* 039 */ } else if (false) {
> /* 040 */ return 1;
> /* 041 */ } else {
> /* 042 */ int comp =
> org.apache.spark.util.Utils.nanSafeCompareDoubles(value_1, value_3);
> /* 043 */ if (comp != 0) {
> /* 044 */ return comp;
> /* 045 */ }
> /* 046 */ }
> /* 047 */
> /* 048 */
> /* 049 */ return 0;
> /* 050 */ }
> /* 051 */
> /* 052 */
> /* 053 */ }
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]