koert kuipers created SPARK-18147:
-------------------------------------

             Summary: Broken Spark SQL Codegen
                 Key: SPARK-18147
                 URL: https://issues.apache.org/jira/browse/SPARK-18147
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 2.0.1
            Reporter: koert kuipers
            Priority: Minor


this is me on purpose trying to break spark sql codegen to uncover potential 
issues, by creating arbitrately complex data structures using primitives, 
strings, basic collections, tuples, and case classes.

first example: nested case classes
code:
{noformat}
class ComplexResultAgg[B: TypeTag, C: TypeTag](val zero: B, result: C) extends 
Aggregator[Row, B, C] {
  override def reduce(b: B, input: Row): B = b

  override def merge(b1: B, b2: B): B = b1

  override def finish(reduction: B): C = result

  override def bufferEncoder: Encoder[B] = ExpressionEncoder[B]()
  override def outputEncoder: Encoder[C] = ExpressionEncoder[C]()
}

case class Struct2(d: Double = 0.0, s1: Seq[Double] = Seq.empty, s2: Seq[Long] 
= Seq.empty)

case class Struct3(a: Struct2 = Struct2(), b: Struct2 = Struct2())

val df1 = Seq(("a", "aa"), ("a", "aa"), ("b", "b"), ("b", null)).toDF("x", 
"y").groupBy("x").agg(
  new ComplexResultAgg("boo", Struct3()).toColumn
)
df1.printSchema
df1.show
{noformat}

the result is:
{noformat}
[info]   Cause: java.util.concurrent.ExecutionException: java.lang.Exception: 
failed to compile: org.codehaus.commons.compiler.CompileException: File 
'generated.java', Line 33, Column 12: Expression "isNull1" is not an rvalue
[info] /* 001 */ public java.lang.Object generate(Object[] references) {
[info] /* 002 */   return new SpecificMutableProjection(references);
[info] /* 003 */ }
[info] /* 004 */
[info] /* 005 */ class SpecificMutableProjection extends 
org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection {
[info] /* 006 */
[info] /* 007 */   private Object[] references;
[info] /* 008 */   private MutableRow mutableRow;
[info] /* 009 */   private Object[] values;
[info] /* 010 */   private java.lang.String errMsg;
[info] /* 011 */   private Object[] values1;
[info] /* 012 */   private java.lang.String errMsg1;
[info] /* 013 */   private boolean[] argIsNulls;
[info] /* 014 */   private scala.collection.Seq argValue;
[info] /* 015 */   private java.lang.String errMsg2;
[info] /* 016 */   private boolean[] argIsNulls1;
[info] /* 017 */   private scala.collection.Seq argValue1;
[info] /* 018 */   private java.lang.String errMsg3;
[info] /* 019 */   private java.lang.String errMsg4;
[info] /* 020 */   private Object[] values2;
[info] /* 021 */   private java.lang.String errMsg5;
[info] /* 022 */   private boolean[] argIsNulls2;
[info] /* 023 */   private scala.collection.Seq argValue2;
[info] /* 024 */   private java.lang.String errMsg6;
[info] /* 025 */   private boolean[] argIsNulls3;
[info] /* 026 */   private scala.collection.Seq argValue3;
[info] /* 027 */   private java.lang.String errMsg7;
[info] /* 028 */   private boolean isNull_0;
[info] /* 029 */   private InternalRow value_0;
[info] /* 030 */
[info] /* 031 */   private void apply_1(InternalRow i) {
[info] /* 032 */
[info] /* 033 */     if (isNull1) {
[info] /* 034 */       throw new RuntimeException(errMsg3);
[info] /* 035 */     }
[info] /* 036 */
[info] /* 037 */     boolean isNull24 = false;
[info] /* 038 */     final com.tresata.spark.sql.Struct2 value24 = isNull24 ? 
null : (com.tresata.spark.sql.Struct2) value1.a();
[info] /* 039 */     isNull24 = value24 == null;
[info] /* 040 */
[info] /* 041 */     boolean isNull23 = isNull24;
[info] /* 042 */     final scala.collection.Seq value23 = isNull23 ? null : 
(scala.collection.Seq) value24.s2();
[info] /* 043 */     isNull23 = value23 == null;
[info] /* 044 */     argIsNulls1[0] = isNull23;
[info] /* 045 */     argValue1 = value23;
[info] /* 046 */
[info] /* 047 */
[info] /* 048 */
[info] /* 049 */     boolean isNull22 = false;
[info] /* 050 */     for (int idx = 0; idx < 1; idx++) {
[info] /* 051 */       if (argIsNulls1[idx]) { isNull22 = true; break; }
[info] /* 052 */     }
[info] /* 053 */
[info] /* 054 */     final ArrayData value22 = isNull22 ? null : new 
org.apache.spark.sql.catalyst.util.GenericArrayData(argValue1);
[info] /* 055 */     if (isNull22) {
[info] /* 056 */       values1[2] = null;
[info] /* 057 */     } else {
[info] /* 058 */       values1[2] = value22;
[info] /* 059 */     }
[info] /* 060 */   }
[info] /* 061 */
[info] /* 062 */
[info] /* 063 */   private void apply1_1(InternalRow i) {
[info] /* 064 */
[info] /* 065 */     if (isNull1) {
[info] /* 066 */       throw new RuntimeException(errMsg7);
[info] /* 067 */     }
[info] /* 068 */
[info] /* 069 */     boolean isNull41 = false;
[info] /* 070 */     final com.tresata.spark.sql.Struct2 value41 = isNull41 ? 
null : (com.tresata.spark.sql.Struct2) value1.b();
[info] /* 071 */     isNull41 = value41 == null;
[info] /* 072 */
[info] /* 073 */     boolean isNull40 = isNull41;
[info] /* 074 */     final scala.collection.Seq value40 = isNull40 ? null : 
(scala.collection.Seq) value41.s2();
[info] /* 075 */     isNull40 = value40 == null;
[info] /* 076 */     argIsNulls3[0] = isNull40;
[info] /* 077 */     argValue3 = value40;
[info] /* 078 */
[info] /* 079 */
[info] /* 080 */
[info] /* 081 */     boolean isNull39 = false;
[info] /* 082 */     for (int idx = 0; idx < 1; idx++) {
[info] /* 083 */       if (argIsNulls3[idx]) { isNull39 = true; break; }
[info] /* 084 */     }
[info] /* 085 */
[info] /* 086 */     final ArrayData value39 = isNull39 ? null : new 
org.apache.spark.sql.catalyst.util.GenericArrayData(argValue3);
[info] /* 087 */     if (isNull39) {
[info] /* 088 */       values2[2] = null;
[info] /* 089 */     } else {
[info] /* 090 */       values2[2] = value39;
[info] /* 091 */     }
[info] /* 092 */   }
[info] /* 093 */
[info] /* 094 */
[info] /* 095 */   private void apply_0(InternalRow i) {
[info] /* 096 */
[info] /* 097 */     if (isNull1) {
[info] /* 098 */       throw new RuntimeException(errMsg1);
[info] /* 099 */     }
[info] /* 100 */
[info] /* 101 */     boolean isNull16 = false;
[info] /* 102 */     final com.tresata.spark.sql.Struct2 value16 = isNull16 ? 
null : (com.tresata.spark.sql.Struct2) value1.a();
[info] /* 103 */     isNull16 = value16 == null;
[info] /* 104 */
[info] /* 105 */     boolean isNull15 = isNull16;
[info] /* 106 */     final double value15 = isNull15 ? -1.0 : value16.d();
[info] /* 107 */     if (isNull15) {
[info] /* 108 */       values1[0] = null;
[info] /* 109 */     } else {
[info] /* 110 */       values1[0] = value15;
[info] /* 111 */     }
[info] /* 112 */     if (isNull1) {
[info] /* 113 */       throw new RuntimeException(errMsg2);
[info] /* 114 */     }
[info] /* 115 */
[info] /* 116 */     boolean isNull20 = false;
[info] /* 117 */     final com.tresata.spark.sql.Struct2 value20 = isNull20 ? 
null : (com.tresata.spark.sql.Struct2) value1.a();
[info] /* 118 */     isNull20 = value20 == null;
[info] /* 119 */
[info] /* 120 */     boolean isNull19 = isNull20;
[info] /* 121 */     final scala.collection.Seq value19 = isNull19 ? null : 
(scala.collection.Seq) value20.s1();
[info] /* 122 */     isNull19 = value19 == null;
[info] /* 123 */     argIsNulls[0] = isNull19;
[info] /* 124 */     argValue = value19;
[info] /* 125 */
[info] /* 126 */
[info] /* 127 */
[info] /* 128 */     boolean isNull18 = false;
[info] /* 129 */     for (int idx = 0; idx < 1; idx++) {
[info] /* 130 */       if (argIsNulls[idx]) { isNull18 = true; break; }
[info] /* 131 */     }
[info] /* 132 */
[info] /* 133 */     final ArrayData value18 = isNull18 ? null : new 
org.apache.spark.sql.catalyst.util.GenericArrayData(argValue);
[info] /* 134 */     if (isNull18) {
[info] /* 135 */       values1[1] = null;
[info] /* 136 */     } else {
[info] /* 137 */       values1[1] = value18;
[info] /* 138 */     }
[info] /* 139 */   }
[info] /* 140 */
[info] /* 141 */
[info] /* 142 */   private void apply1_0(InternalRow i) {
[info] /* 143 */
[info] /* 144 */     if (isNull1) {
[info] /* 145 */       throw new RuntimeException(errMsg5);
[info] /* 146 */     }
[info] /* 147 */
[info] /* 148 */     boolean isNull33 = false;
[info] /* 149 */     final com.tresata.spark.sql.Struct2 value33 = isNull33 ? 
null : (com.tresata.spark.sql.Struct2) value1.b();
[info] /* 150 */     isNull33 = value33 == null;
[info] /* 151 */
[info] /* 152 */     boolean isNull32 = isNull33;
[info] /* 153 */     final double value32 = isNull32 ? -1.0 : value33.d();
[info] /* 154 */     if (isNull32) {
[info] /* 155 */       values2[0] = null;
[info] /* 156 */     } else {
[info] /* 157 */       values2[0] = value32;
[info] /* 158 */     }
[info] /* 159 */     if (isNull1) {
[info] /* 160 */       throw new RuntimeException(errMsg6);
[info] /* 161 */     }
[info] /* 162 */
[info] /* 163 */     boolean isNull37 = false;
[info] /* 164 */     final com.tresata.spark.sql.Struct2 value37 = isNull37 ? 
null : (com.tresata.spark.sql.Struct2) value1.b();
[info] /* 165 */     isNull37 = value37 == null;
[info] /* 166 */
[info] /* 167 */     boolean isNull36 = isNull37;
[info] /* 168 */     final scala.collection.Seq value36 = isNull36 ? null : 
(scala.collection.Seq) value37.s1();
[info] /* 169 */     isNull36 = value36 == null;
[info] /* 170 */     argIsNulls2[0] = isNull36;
[info] /* 171 */     argValue2 = value36;
[info] /* 172 */
[info] /* 173 */
[info] /* 174 */
[info] /* 175 */     boolean isNull35 = false;
[info] /* 176 */     for (int idx = 0; idx < 1; idx++) {
[info] /* 177 */       if (argIsNulls2[idx]) { isNull35 = true; break; }
[info] /* 178 */     }
[info] /* 179 */
[info] /* 180 */     final ArrayData value35 = isNull35 ? null : new 
org.apache.spark.sql.catalyst.util.GenericArrayData(argValue2);
[info] /* 181 */     if (isNull35) {
[info] /* 182 */       values2[1] = null;
[info] /* 183 */     } else {
[info] /* 184 */       values2[1] = value35;
[info] /* 185 */     }
[info] /* 186 */   }
[info] /* 187 */
[info] /* 188 */
[info] /* 189 */   public SpecificMutableProjection(Object[] references) {
[info] /* 190 */     this.references = references;
[info] /* 191 */     mutableRow = new 
org.apache.spark.sql.catalyst.expressions.GenericMutableRow(1);
[info] /* 192 */     this.values = null;
[info] /* 193 */     this.errMsg = (java.lang.String) references[1];
[info] /* 194 */     this.values1 = null;
[info] /* 195 */     this.errMsg1 = (java.lang.String) references[2];
[info] /* 196 */     argIsNulls = new boolean[1];
[info] /* 197 */
[info] /* 198 */     this.errMsg2 = (java.lang.String) references[3];
[info] /* 199 */     argIsNulls1 = new boolean[1];
[info] /* 200 */
[info] /* 201 */     this.errMsg3 = (java.lang.String) references[4];
[info] /* 202 */     this.errMsg4 = (java.lang.String) references[5];
[info] /* 203 */     this.values2 = null;
[info] /* 204 */     this.errMsg5 = (java.lang.String) references[6];
[info] /* 205 */     argIsNulls2 = new boolean[1];
[info] /* 206 */
[info] /* 207 */     this.errMsg6 = (java.lang.String) references[7];
[info] /* 208 */     argIsNulls3 = new boolean[1];
[info] /* 209 */
[info] /* 210 */     this.errMsg7 = (java.lang.String) references[8];
[info] /* 211 */     this.isNull_0 = true;
[info] /* 212 */     this.value_0 = null;
[info] /* 213 */   }
[info] /* 214 */
[info] /* 215 */   public 
org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection 
target(MutableRow row) {
[info] /* 216 */     mutableRow = row;
[info] /* 217 */     return this;
[info] /* 218 */   }
[info] /* 219 */
[info] /* 220 */   /* Provide immutable access to the last projected row. */
[info] /* 221 */   public InternalRow currentValue() {
[info] /* 222 */     return (InternalRow) mutableRow;
[info] /* 223 */   }
[info] /* 224 */
[info] /* 225 */   public java.lang.Object apply(java.lang.Object _i) {
[info] /* 226 */     InternalRow i = (InternalRow) _i;
[info] /* 227 */
[info] /* 228 */
[info] /* 229 */
[info] /* 230 */     Object obj = ((Expression) references[0]).eval(null);
[info] /* 231 */     org.apache.spark.sql.expressions.Aggregator value2 = 
(org.apache.spark.sql.expressions.Aggregator) obj;
[info] /* 232 */
[info] /* 233 */     boolean isNull4 = i.isNullAt(0);
[info] /* 234 */     UTF8String value4 = isNull4 ? null : (i.getUTF8String(0));
[info] /* 235 */
[info] /* 236 */     boolean isNull3 = isNull4;
[info] /* 237 */     final java.lang.String value3 = isNull3 ? null : 
(java.lang.String) value4.toString();
[info] /* 238 */     isNull3 = value3 == null;
[info] /* 239 */     boolean isNull1 = false || isNull3;
[info] /* 240 */     final com.tresata.spark.sql.Struct3 value1 = isNull1 ? 
null : (com.tresata.spark.sql.Struct3) value2.finish(value3);
[info] /* 241 */     isNull1 = value1 == null;
[info] /* 242 */
[info] /* 243 */     boolean isNull5 = false;
[info] /* 244 */     InternalRow value5 = null;
[info] /* 245 */     if (!false && isNull1) {
[info] /* 246 */
[info] /* 247 */       final InternalRow value7 = null;
[info] /* 248 */       isNull5 = true;
[info] /* 249 */       value5 = value7;
[info] /* 250 */     } else {
[info] /* 251 */
[info] /* 252 */       boolean isNull8 = false;
[info] /* 253 */       this.values = new Object[2];
[info] /* 254 */       if (isNull1) {
[info] /* 255 */         throw new RuntimeException(errMsg);
[info] /* 256 */       }
[info] /* 257 */
[info] /* 258 */       boolean isNull11 = false;
[info] /* 259 */       final com.tresata.spark.sql.Struct2 value11 = isNull11 ? 
null : (com.tresata.spark.sql.Struct2) value1.a();
[info] /* 260 */       isNull11 = value11 == null;
[info] /* 261 */       boolean isNull9 = false;
[info] /* 262 */       InternalRow value9 = null;
[info] /* 263 */       if (!false && isNull11) {
[info] /* 264 */
[info] /* 265 */         final InternalRow value13 = null;
[info] /* 266 */         isNull9 = true;
[info] /* 267 */         value9 = value13;
[info] /* 268 */       } else {
[info] /* 269 */
[info] /* 270 */         boolean isNull14 = false;
[info] /* 271 */         values1 = new Object[3];apply_0(i);
[info] /* 272 */         apply_1(i);
[info] /* 273 */         final InternalRow value14 = new 
org.apache.spark.sql.catalyst.expressions.GenericInternalRow(values1);
[info] /* 274 */         this.values1 = null;
[info] /* 275 */         isNull9 = isNull14;
[info] /* 276 */         value9 = value14;
[info] /* 277 */       }
[info] /* 278 */       if (isNull9) {
[info] /* 279 */         values[0] = null;
[info] /* 280 */       } else {
[info] /* 281 */         values[0] = value9;
[info] /* 282 */       }
[info] /* 283 */       if (isNull1) {
[info] /* 284 */         throw new RuntimeException(errMsg4);
[info] /* 285 */       }
[info] /* 286 */
[info] /* 287 */       boolean isNull28 = false;
[info] /* 288 */       final com.tresata.spark.sql.Struct2 value28 = isNull28 ? 
null : (com.tresata.spark.sql.Struct2) value1.b();
[info] /* 289 */       isNull28 = value28 == null;
[info] /* 290 */       boolean isNull26 = false;
[info] /* 291 */       InternalRow value26 = null;
[info] /* 292 */       if (!false && isNull28) {
[info] /* 293 */
[info] /* 294 */         final InternalRow value30 = null;
[info] /* 295 */         isNull26 = true;
[info] /* 296 */         value26 = value30;
[info] /* 297 */       } else {
[info] /* 298 */
[info] /* 299 */         boolean isNull31 = false;
[info] /* 300 */         values2 = new Object[3];apply1_0(i);
[info] /* 301 */         apply1_1(i);
[info] /* 302 */         final InternalRow value31 = new 
org.apache.spark.sql.catalyst.expressions.GenericInternalRow(values2);
[info] /* 303 */         this.values2 = null;
[info] /* 304 */         isNull26 = isNull31;
[info] /* 305 */         value26 = value31;
[info] /* 306 */       }
[info] /* 307 */       if (isNull26) {
[info] /* 308 */         values[1] = null;
[info] /* 309 */       } else {
[info] /* 310 */         values[1] = value26;
[info] /* 311 */       }
[info] /* 312 */       final InternalRow value8 = new 
org.apache.spark.sql.catalyst.expressions.GenericInternalRow(values);
[info] /* 313 */       this.values = null;
[info] /* 314 */       isNull5 = isNull8;
[info] /* 315 */       value5 = value8;
[info] /* 316 */     }
[info] /* 317 */     this.isNull_0 = isNull5;
[info] /* 318 */     this.value_0 = value5;
[info] /* 319 */
[info] /* 320 */     // copy all the results into MutableRow
[info] /* 321 */
[info] /* 322 */     if (!this.isNull_0) {
[info] /* 323 */       mutableRow.update(0, this.value_0);
[info] /* 324 */     } else {
[info] /* 325 */       mutableRow.setNullAt(0);
[info] /* 326 */     }
[info] /* 327 */
[info] /* 328 */     return mutableRow;
[info] /* 329 */   }
[info] /* 330 */ }
{noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to