[
https://issues.apache.org/jira/browse/SPARK-15205?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Kousuke Saruta updated SPARK-15205:
-----------------------------------
Description:
Sometimes, we have generated codes they are equal except for comments.
One example is here.
{code}
val df = sc.parallelize(1 to 10).toDF
df.selectExpr("value + 1").show // query1
df.selectExpr("value + 2").show // query2
{code}
The following code is one of generated code when query1 above is executed.
{code}
/* 001 */
/* 002 */ public java.lang.Object generate(Object[] references) {
/* 003 */ return new SpecificSafeProjection(references);
/* 004 */ }
/* 005 */
/* 006 */ class SpecificSafeProjection extends
org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 007 */
/* 008 */ private Object[] references;
/* 009 */ private MutableRow mutableRow;
/* 010 */ private Object[] values;
/* 011 */ private org.apache.spark.sql.types.StructType schema;
/* 012 */
/* 013 */
/* 014 */ public SpecificSafeProjection(Object[] references) {
/* 015 */ this.references = references;
/* 016 */ mutableRow = (MutableRow) references[references.length - 1];
/* 017 */
/* 018 */ this.schema = (org.apache.spark.sql.types.StructType)
references[0];
/* 019 */ }
/* 020 */
/* 021 */ public java.lang.Object apply(java.lang.Object _i) {
/* 022 */ InternalRow i = (InternalRow) _i;
/* 023 */ /* createexternalrow(if (isnull(input[0, int])) null else
input[0, int], StructField((value + 1),IntegerType,false)) */
/* 024 */ values = new Object[1];
/* 025 */ /* if (isnull(input[0, int])) null else input[0, int] */
/* 026 */ /* isnull(input[0, int]) */
/* 027 */ /* input[0, int] */
/* 028 */ int value3 = i.getInt(0);
/* 029 */ boolean isNull1 = false;
/* 030 */ int value1 = -1;
/* 031 */ if (!false && false) {
/* 032 */ /* null */
/* 033 */ final int value4 = -1;
/* 034 */ isNull1 = true;
/* 035 */ value1 = value4;
/* 036 */ } else {
/* 037 */ /* input[0, int] */
/* 038 */ int value5 = i.getInt(0);
/* 039 */ isNull1 = false;
/* 040 */ value1 = value5;
/* 041 */ }
/* 042 */ if (isNull1) {
/* 043 */ values[0] = null;
/* 044 */ } else {
/* 045 */ values[0] = value1;
/* 046 */ }
/* 047 */
/* 048 */ final org.apache.spark.sql.Row value = new
org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values,
this.schema);
/* 049 */ if (false) {
/* 050 */ mutableRow.setNullAt(0);
/* 051 */ } else {
/* 052 */
/* 053 */ mutableRow.update(0, value);
/* 054 */ }
/* 055 */
/* 056 */ return mutableRow;
/* 057 */ }
/* 058 */ }
/* 059 */
{code}
On the other hand, the following code is for query2.
{code}
/* 001 */
/* 002 */ public java.lang.Object generate(Object[] references) {
/* 003 */ return new SpecificSafeProjection(references);
/* 004 */ }
/* 005 */
/* 006 */ class SpecificSafeProjection extends
org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 007 */
/* 008 */ private Object[] references;
/* 009 */ private MutableRow mutableRow;
/* 010 */ private Object[] values;
/* 011 */ private org.apache.spark.sql.types.StructType schema;
/* 012 */
/* 013 */
/* 014 */ public SpecificSafeProjection(Object[] references) {
/* 015 */ this.references = references;
/* 016 */ mutableRow = (MutableRow) references[references.length - 1];
/* 017 */
/* 018 */ this.schema = (org.apache.spark.sql.types.StructType)
references[0];
/* 019 */ }
/* 020 */
/* 021 */ public java.lang.Object apply(java.lang.Object _i) {
/* 022 */ InternalRow i = (InternalRow) _i;
/* 023 */ /* createexternalrow(if (isnull(input[0, int])) null else
input[0, int], StructField((value + 2),IntegerType,false)) */
/* 024 */ values = new Object[1];
/* 025 */ /* if (isnull(input[0, int])) null else input[0, int] */
/* 026 */ /* isnull(input[0, int]) */
/* 027 */ /* input[0, int] */
/* 028 */ int value3 = i.getInt(0);
/* 029 */ boolean isNull1 = false;
/* 030 */ int value1 = -1;
/* 031 */ if (!false && false) {
/* 032 */ /* null */
/* 033 */ final int value4 = -1;
/* 034 */ isNull1 = true;
/* 035 */ value1 = value4;
/* 036 */ } else {
/* 037 */ /* input[0, int] */
/* 038 */ int value5 = i.getInt(0);
/* 039 */ isNull1 = false;
/* 040 */ value1 = value5;
/* 041 */ }
/* 042 */ if (isNull1) {
/* 043 */ values[0] = null;
/* 044 */ } else {
/* 045 */ values[0] = value1;
/* 046 */ }
/* 047 */
/* 048 */ final org.apache.spark.sql.Row value = new
org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values,
this.schema);
/* 049 */ if (false) {
/* 050 */ mutableRow.setNullAt(0);
/* 051 */ } else {
/* 052 */
/* 053 */ mutableRow.update(0, value);
/* 054 */ }
/* 055 */
/* 056 */ return mutableRow;
/* 057 */ }
/* 058 */ }
/* 059 */
{code}
As you can notice, those two generated codes are essentially equal but not
equal as String objects so they will be compiled each.
was:
Sometimes, we have generated codes they are equal except for comments.
One example is here.
{code}
val df = sc.parallelize(1 to 10).toDF
df.selectExpr("value + 1").show // query1
df.selectExpr("value + 2").show // query2
{code}
The following code is one of generated code when query1 above is executed.
{code}
{code}
On the other hand, the following code is for query2.
{code}
{code}
As you can notice, those two generated codes are equal but
> Codegen can compile more than twice for the same source code
> ------------------------------------------------------------
>
> Key: SPARK-15205
> URL: https://issues.apache.org/jira/browse/SPARK-15205
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.0.0
> Reporter: Kousuke Saruta
>
> Sometimes, we have generated codes they are equal except for comments.
> One example is here.
> {code}
> val df = sc.parallelize(1 to 10).toDF
> df.selectExpr("value + 1").show // query1
> df.selectExpr("value + 2").show // query2
> {code}
> The following code is one of generated code when query1 above is executed.
> {code}
> /* 001 */
> /* 002 */ public java.lang.Object generate(Object[] references) {
> /* 003 */ return new SpecificSafeProjection(references);
> /* 004 */ }
> /* 005 */
> /* 006 */ class SpecificSafeProjection extends
> org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
> /* 007 */
> /* 008 */ private Object[] references;
> /* 009 */ private MutableRow mutableRow;
> /* 010 */ private Object[] values;
> /* 011 */ private org.apache.spark.sql.types.StructType schema;
> /* 012 */
> /* 013 */
> /* 014 */ public SpecificSafeProjection(Object[] references) {
> /* 015 */ this.references = references;
> /* 016 */ mutableRow = (MutableRow) references[references.length - 1];
> /* 017 */
> /* 018 */ this.schema = (org.apache.spark.sql.types.StructType)
> references[0];
> /* 019 */ }
> /* 020 */
> /* 021 */ public java.lang.Object apply(java.lang.Object _i) {
> /* 022 */ InternalRow i = (InternalRow) _i;
> /* 023 */ /* createexternalrow(if (isnull(input[0, int])) null else
> input[0, int], StructField((value + 1),IntegerType,false)) */
> /* 024 */ values = new Object[1];
> /* 025 */ /* if (isnull(input[0, int])) null else input[0, int] */
> /* 026 */ /* isnull(input[0, int]) */
> /* 027 */ /* input[0, int] */
> /* 028 */ int value3 = i.getInt(0);
> /* 029 */ boolean isNull1 = false;
> /* 030 */ int value1 = -1;
> /* 031 */ if (!false && false) {
> /* 032 */ /* null */
> /* 033 */ final int value4 = -1;
> /* 034 */ isNull1 = true;
> /* 035 */ value1 = value4;
> /* 036 */ } else {
> /* 037 */ /* input[0, int] */
> /* 038 */ int value5 = i.getInt(0);
> /* 039 */ isNull1 = false;
> /* 040 */ value1 = value5;
> /* 041 */ }
> /* 042 */ if (isNull1) {
> /* 043 */ values[0] = null;
> /* 044 */ } else {
> /* 045 */ values[0] = value1;
> /* 046 */ }
> /* 047 */
> /* 048 */ final org.apache.spark.sql.Row value = new
> org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values,
> this.schema);
> /* 049 */ if (false) {
> /* 050 */ mutableRow.setNullAt(0);
> /* 051 */ } else {
> /* 052 */
> /* 053 */ mutableRow.update(0, value);
> /* 054 */ }
> /* 055 */
> /* 056 */ return mutableRow;
> /* 057 */ }
> /* 058 */ }
> /* 059 */
> {code}
> On the other hand, the following code is for query2.
> {code}
> /* 001 */
> /* 002 */ public java.lang.Object generate(Object[] references) {
> /* 003 */ return new SpecificSafeProjection(references);
> /* 004 */ }
> /* 005 */
> /* 006 */ class SpecificSafeProjection extends
> org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
> /* 007 */
> /* 008 */ private Object[] references;
> /* 009 */ private MutableRow mutableRow;
> /* 010 */ private Object[] values;
> /* 011 */ private org.apache.spark.sql.types.StructType schema;
> /* 012 */
> /* 013 */
> /* 014 */ public SpecificSafeProjection(Object[] references) {
> /* 015 */ this.references = references;
> /* 016 */ mutableRow = (MutableRow) references[references.length - 1];
> /* 017 */
> /* 018 */ this.schema = (org.apache.spark.sql.types.StructType)
> references[0];
> /* 019 */ }
> /* 020 */
> /* 021 */ public java.lang.Object apply(java.lang.Object _i) {
> /* 022 */ InternalRow i = (InternalRow) _i;
> /* 023 */ /* createexternalrow(if (isnull(input[0, int])) null else
> input[0, int], StructField((value + 2),IntegerType,false)) */
> /* 024 */ values = new Object[1];
> /* 025 */ /* if (isnull(input[0, int])) null else input[0, int] */
> /* 026 */ /* isnull(input[0, int]) */
> /* 027 */ /* input[0, int] */
> /* 028 */ int value3 = i.getInt(0);
> /* 029 */ boolean isNull1 = false;
> /* 030 */ int value1 = -1;
> /* 031 */ if (!false && false) {
> /* 032 */ /* null */
> /* 033 */ final int value4 = -1;
> /* 034 */ isNull1 = true;
> /* 035 */ value1 = value4;
> /* 036 */ } else {
> /* 037 */ /* input[0, int] */
> /* 038 */ int value5 = i.getInt(0);
> /* 039 */ isNull1 = false;
> /* 040 */ value1 = value5;
> /* 041 */ }
> /* 042 */ if (isNull1) {
> /* 043 */ values[0] = null;
> /* 044 */ } else {
> /* 045 */ values[0] = value1;
> /* 046 */ }
> /* 047 */
> /* 048 */ final org.apache.spark.sql.Row value = new
> org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values,
> this.schema);
> /* 049 */ if (false) {
> /* 050 */ mutableRow.setNullAt(0);
> /* 051 */ } else {
> /* 052 */
> /* 053 */ mutableRow.update(0, value);
> /* 054 */ }
> /* 055 */
> /* 056 */ return mutableRow;
> /* 057 */ }
> /* 058 */ }
> /* 059 */
> {code}
> As you can notice, those two generated codes are essentially equal but not
> equal as String objects so they will be compiled each.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]