[jira] [Updated] (SPARK-33853) [SPARK-33853][SQL] EXPLAIN CODEGEN doesn't show subquery code

Kousuke Saruta (Jira) Sun, 20 Dec 2020 10:00:35 -0800


     [ 
https://issues.apache.org/jira/browse/SPARK-33853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


Kousuke Saruta updated SPARK-33853:
-----------------------------------
    Description: 
{{EXPLAIN CODEGEN}} and {{BenchmarkQueryTest}} don't show the corresponding 
code for subqueries.

The following example is about {{EXPLAIN CODEGEN}}.
{code:java}
spark.conf.set("spark.sql.adaptive.enabled", "false")
val df = spark.range(1, 100)
df.createTempView("df")
spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")

scala> spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")
Found 1 WholeStageCodegen subtrees.
== Subtree 1 / 1 (maxMethodCodeSize:55; maxConstantPoolSize:97(0.15% used); 
numInnerClasses:0) ==
*(1) Project [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L]
:  +- Subquery scalar-subquery#3, [id=#24]
:     +- *(2) HashAggregate(keys=[], functions=[min(id#0L)], output=[v#2L])
:        +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#20]
:           +- *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], 
output=[min#8L])
:              +- *(1) Range (1, 100, step=1, splits=12)
+- *(1) Scan OneRowRelation[]

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends 
org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */   private Object[] references;
/* 008 */   private scala.collection.Iterator[] inputs;
/* 009 */   private scala.collection.Iterator rdd_input_0;
/* 010 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] 
project_mutableStateArray_0 = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
/* 011 */
/* 012 */   public GeneratedIteratorForCodegenStage1(Object[] references) {
/* 013 */     this.references = references;
/* 014 */   }
/* 015 */
/* 016 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 017 */     partitionIndex = index;
/* 018 */     this.inputs = inputs;
/* 019 */     rdd_input_0 = inputs[0];
/* 020 */     project_mutableStateArray_0[0] = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 021 */
/* 022 */   }
/* 023 */
/* 024 */   private void project_doConsume_0() throws java.io.IOException {
/* 025 */     // common sub-expressions
/* 026 */
/* 027 */     project_mutableStateArray_0[0].reset();
/* 028 */
/* 029 */     if (false) {
/* 030 */       project_mutableStateArray_0[0].setNullAt(0);
/* 031 */     } else {
/* 032 */       project_mutableStateArray_0[0].write(0, 1L);
/* 033 */     }
/* 034 */     append((project_mutableStateArray_0[0].getRow()));
/* 035 */
/* 036 */   }
/* 037 */
/* 038 */   protected void processNext() throws java.io.IOException {
/* 039 */     while ( rdd_input_0.hasNext()) {
/* 040 */       InternalRow rdd_row_0 = (InternalRow) rdd_input_0.next();
/* 041 */       ((org.apache.spark.sql.execution.metric.SQLMetric) 
references[0] /* numOutputRows */).add(1);
/* 042 */       project_doConsume_0();
/* 043 */       if (shouldStop()) return;
/* 044 */     }
/* 045 */   }
/* 046 */
/* 047 */ }
{code}

  was:
EXPLAIN CODEGEN doesn't show the corresponding code for subqueries.

{code}
spark.conf.set("spark.sql.adaptive.enabled", "false")
val df = spark.range(1, 100)
df.createTempView("df")
spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")

scala> spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")
Found 1 WholeStageCodegen subtrees.
== Subtree 1 / 1 (maxMethodCodeSize:55; maxConstantPoolSize:97(0.15% used); 
numInnerClasses:0) ==
*(1) Project [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L]
:  +- Subquery scalar-subquery#3, [id=#24]
:     +- *(2) HashAggregate(keys=[], functions=[min(id#0L)], output=[v#2L])
:        +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#20]
:           +- *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], 
output=[min#8L])
:              +- *(1) Range (1, 100, step=1, splits=12)
+- *(1) Scan OneRowRelation[]

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends 
org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */   private Object[] references;
/* 008 */   private scala.collection.Iterator[] inputs;
/* 009 */   private scala.collection.Iterator rdd_input_0;
/* 010 */   private 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] 
project_mutableStateArray_0 = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
/* 011 */
/* 012 */   public GeneratedIteratorForCodegenStage1(Object[] references) {
/* 013 */     this.references = references;
/* 014 */   }
/* 015 */
/* 016 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 017 */     partitionIndex = index;
/* 018 */     this.inputs = inputs;
/* 019 */     rdd_input_0 = inputs[0];
/* 020 */     project_mutableStateArray_0[0] = new 
org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 021 */
/* 022 */   }
/* 023 */
/* 024 */   private void project_doConsume_0() throws java.io.IOException {
/* 025 */     // common sub-expressions
/* 026 */
/* 027 */     project_mutableStateArray_0[0].reset();
/* 028 */
/* 029 */     if (false) {
/* 030 */       project_mutableStateArray_0[0].setNullAt(0);
/* 031 */     } else {
/* 032 */       project_mutableStateArray_0[0].write(0, 1L);
/* 033 */     }
/* 034 */     append((project_mutableStateArray_0[0].getRow()));
/* 035 */
/* 036 */   }
/* 037 */
/* 038 */   protected void processNext() throws java.io.IOException {
/* 039 */     while ( rdd_input_0.hasNext()) {
/* 040 */       InternalRow rdd_row_0 = (InternalRow) rdd_input_0.next();
/* 041 */       ((org.apache.spark.sql.execution.metric.SQLMetric) 
references[0] /* numOutputRows */).add(1);
/* 042 */       project_doConsume_0();
/* 043 */       if (shouldStop()) return;
/* 044 */     }
/* 045 */   }
/* 046 */
/* 047 */ }
{code}


> [SPARK-33853][SQL] EXPLAIN CODEGEN doesn't show subquery code
> -------------------------------------------------------------
>
>                 Key: SPARK-33853
>                 URL: https://issues.apache.org/jira/browse/SPARK-33853
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.0.0, 3.0.1, 3.1.0, 3.2.0
>            Reporter: Kousuke Saruta
>            Assignee: Kousuke Saruta
>            Priority: Major
>
> {{EXPLAIN CODEGEN}} and {{BenchmarkQueryTest}} don't show the corresponding 
> code for subqueries.
> The following example is about {{EXPLAIN CODEGEN}}.
> {code:java}
> spark.conf.set("spark.sql.adaptive.enabled", "false")
> val df = spark.range(1, 100)
> df.createTempView("df")
> spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")
> scala> spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")
> Found 1 WholeStageCodegen subtrees.
> == Subtree 1 / 1 (maxMethodCodeSize:55; maxConstantPoolSize:97(0.15% used); 
> numInnerClasses:0) ==
> *(1) Project [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L]
> :  +- Subquery scalar-subquery#3, [id=#24]
> :     +- *(2) HashAggregate(keys=[], functions=[min(id#0L)], output=[v#2L])
> :        +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#20]
> :           +- *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], 
> output=[min#8L])
> :              +- *(1) Range (1, 100, step=1, splits=12)
> +- *(1) Scan OneRowRelation[]
> Generated code:
> /* 001 */ public Object generate(Object[] references) {
> /* 002 */   return new GeneratedIteratorForCodegenStage1(references);
> /* 003 */ }
> /* 004 */
> /* 005 */ // codegenStageId=1
> /* 006 */ final class GeneratedIteratorForCodegenStage1 extends 
> org.apache.spark.sql.execution.BufferedRowIterator {
> /* 007 */   private Object[] references;
> /* 008 */   private scala.collection.Iterator[] inputs;
> /* 009 */   private scala.collection.Iterator rdd_input_0;
> /* 010 */   private 
> org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] 
> project_mutableStateArray_0 = new 
> org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
> /* 011 */
> /* 012 */   public GeneratedIteratorForCodegenStage1(Object[] references) {
> /* 013 */     this.references = references;
> /* 014 */   }
> /* 015 */
> /* 016 */   public void init(int index, scala.collection.Iterator[] inputs) {
> /* 017 */     partitionIndex = index;
> /* 018 */     this.inputs = inputs;
> /* 019 */     rdd_input_0 = inputs[0];
> /* 020 */     project_mutableStateArray_0[0] = new 
> org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
> /* 021 */
> /* 022 */   }
> /* 023 */
> /* 024 */   private void project_doConsume_0() throws java.io.IOException {
> /* 025 */     // common sub-expressions
> /* 026 */
> /* 027 */     project_mutableStateArray_0[0].reset();
> /* 028 */
> /* 029 */     if (false) {
> /* 030 */       project_mutableStateArray_0[0].setNullAt(0);
> /* 031 */     } else {
> /* 032 */       project_mutableStateArray_0[0].write(0, 1L);
> /* 033 */     }
> /* 034 */     append((project_mutableStateArray_0[0].getRow()));
> /* 035 */
> /* 036 */   }
> /* 037 */
> /* 038 */   protected void processNext() throws java.io.IOException {
> /* 039 */     while ( rdd_input_0.hasNext()) {
> /* 040 */       InternalRow rdd_row_0 = (InternalRow) rdd_input_0.next();
> /* 041 */       ((org.apache.spark.sql.execution.metric.SQLMetric) 
> references[0] /* numOutputRows */).add(1);
> /* 042 */       project_doConsume_0();
> /* 043 */       if (shouldStop()) return;
> /* 044 */     }
> /* 045 */   }
> /* 046 */
> /* 047 */ }
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[jira] [Updated] (SPARK-33853) [SPARK-33853][SQL] EXPLAIN CODEGEN doesn't show subquery code

Reply via email to