[
https://issues.apache.org/jira/browse/SPARK-15205?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15288484#comment-15288484
]
Reynold Xin commented on SPARK-15205:
-------------------------------------
This is not a bug per se. I've changed it to improvement and targeted it for
2.1.
> Codegen can compile the same source code more than twice
> --------------------------------------------------------
>
> Key: SPARK-15205
> URL: https://issues.apache.org/jira/browse/SPARK-15205
> Project: Spark
> Issue Type: Improvement
> Components: SQL
> Affects Versions: 2.0.0
> Reporter: Kousuke Saruta
>
> Sometimes, we have generated codes they are equal except for comments.
> One example is here.
> {code}
> val df = sc.parallelize(1 to 10).toDF
> df.selectExpr("value + 1").show // query1
> df.selectExpr("value + 2").show // query2
> {code}
> The following code is one of generated code when query1 above is executed.
> {code}
> /* 001 */
> /* 002 */ public java.lang.Object generate(Object[] references) {
> /* 003 */ return new SpecificSafeProjection(references);
> /* 004 */ }
> /* 005 */
> /* 006 */ class SpecificSafeProjection extends
> org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
> /* 007 */
> /* 008 */ private Object[] references;
> /* 009 */ private MutableRow mutableRow;
> /* 010 */ private Object[] values;
> /* 011 */ private org.apache.spark.sql.types.StructType schema;
> /* 012 */
> /* 013 */
> /* 014 */ public SpecificSafeProjection(Object[] references) {
> /* 015 */ this.references = references;
> /* 016 */ mutableRow = (MutableRow) references[references.length - 1];
> /* 017 */
> /* 018 */ this.schema = (org.apache.spark.sql.types.StructType)
> references[0];
> /* 019 */ }
> /* 020 */
> /* 021 */ public java.lang.Object apply(java.lang.Object _i) {
> /* 022 */ InternalRow i = (InternalRow) _i;
> /* 023 */ /* createexternalrow(if (isnull(input[0, int])) null else
> input[0, int], StructField((value + 1),IntegerType,false)) */
> /* 024 */ values = new Object[1];
> /* 025 */ /* if (isnull(input[0, int])) null else input[0, int] */
> /* 026 */ /* isnull(input[0, int]) */
> /* 027 */ /* input[0, int] */
> /* 028 */ int value3 = i.getInt(0);
> /* 029 */ boolean isNull1 = false;
> /* 030 */ int value1 = -1;
> /* 031 */ if (!false && false) {
> /* 032 */ /* null */
> /* 033 */ final int value4 = -1;
> /* 034 */ isNull1 = true;
> /* 035 */ value1 = value4;
> /* 036 */ } else {
> /* 037 */ /* input[0, int] */
> /* 038 */ int value5 = i.getInt(0);
> /* 039 */ isNull1 = false;
> /* 040 */ value1 = value5;
> /* 041 */ }
> /* 042 */ if (isNull1) {
> /* 043 */ values[0] = null;
> /* 044 */ } else {
> /* 045 */ values[0] = value1;
> /* 046 */ }
> /* 047 */
> /* 048 */ final org.apache.spark.sql.Row value = new
> org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values,
> this.schema);
> /* 049 */ if (false) {
> /* 050 */ mutableRow.setNullAt(0);
> /* 051 */ } else {
> /* 052 */
> /* 053 */ mutableRow.update(0, value);
> /* 054 */ }
> /* 055 */
> /* 056 */ return mutableRow;
> /* 057 */ }
> /* 058 */ }
> /* 059 */
> {code}
> On the other hand, the following code is for query2.
> {code}
> /* 001 */
> /* 002 */ public java.lang.Object generate(Object[] references) {
> /* 003 */ return new SpecificSafeProjection(references);
> /* 004 */ }
> /* 005 */
> /* 006 */ class SpecificSafeProjection extends
> org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
> /* 007 */
> /* 008 */ private Object[] references;
> /* 009 */ private MutableRow mutableRow;
> /* 010 */ private Object[] values;
> /* 011 */ private org.apache.spark.sql.types.StructType schema;
> /* 012 */
> /* 013 */
> /* 014 */ public SpecificSafeProjection(Object[] references) {
> /* 015 */ this.references = references;
> /* 016 */ mutableRow = (MutableRow) references[references.length - 1];
> /* 017 */
> /* 018 */ this.schema = (org.apache.spark.sql.types.StructType)
> references[0];
> /* 019 */ }
> /* 020 */
> /* 021 */ public java.lang.Object apply(java.lang.Object _i) {
> /* 022 */ InternalRow i = (InternalRow) _i;
> /* 023 */ /* createexternalrow(if (isnull(input[0, int])) null else
> input[0, int], StructField((value + 2),IntegerType,false)) */
> /* 024 */ values = new Object[1];
> /* 025 */ /* if (isnull(input[0, int])) null else input[0, int] */
> /* 026 */ /* isnull(input[0, int]) */
> /* 027 */ /* input[0, int] */
> /* 028 */ int value3 = i.getInt(0);
> /* 029 */ boolean isNull1 = false;
> /* 030 */ int value1 = -1;
> /* 031 */ if (!false && false) {
> /* 032 */ /* null */
> /* 033 */ final int value4 = -1;
> /* 034 */ isNull1 = true;
> /* 035 */ value1 = value4;
> /* 036 */ } else {
> /* 037 */ /* input[0, int] */
> /* 038 */ int value5 = i.getInt(0);
> /* 039 */ isNull1 = false;
> /* 040 */ value1 = value5;
> /* 041 */ }
> /* 042 */ if (isNull1) {
> /* 043 */ values[0] = null;
> /* 044 */ } else {
> /* 045 */ values[0] = value1;
> /* 046 */ }
> /* 047 */
> /* 048 */ final org.apache.spark.sql.Row value = new
> org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values,
> this.schema);
> /* 049 */ if (false) {
> /* 050 */ mutableRow.setNullAt(0);
> /* 051 */ } else {
> /* 052 */
> /* 053 */ mutableRow.update(0, value);
> /* 054 */ }
> /* 055 */
> /* 056 */ return mutableRow;
> /* 057 */ }
> /* 058 */ }
> /* 059 */
> {code}
> As you can notice, those two generated codes are essentially equal but not
> equal as String objects so they will be compiled each.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]