Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/19937#discussion_r156007853 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala --- @@ -513,26 +513,28 @@ case class SortMergeJoinExec( * the variables should be declared separately from accessing the columns, we can't use the * codegen of BoundReference here. */ - private def createLeftVars(ctx: CodegenContext, leftRow: String): Seq[ExprCode] = { + private def createLeftVars(ctx: CodegenContext, leftRow: String): (Seq[ExprCode], Seq[String]) = { ctx.INPUT_ROW = leftRow left.output.zipWithIndex.map { case (a, i) => val value = ctx.freshName("value") val valueCode = ctx.getValue(leftRow, a.dataType, i.toString) - // declare it as class member, so we can access the column before or in the loop. - ctx.addMutableState(ctx.javaType(a.dataType), value) if (a.nullable) { val isNull = ctx.freshName("isNull") - ctx.addMutableState(ctx.JAVA_BOOLEAN, isNull) val code = s""" |$isNull = $leftRow.isNullAt($i); |$value = $isNull ? ${ctx.defaultValue(a.dataType)} : ($valueCode); """.stripMargin - ExprCode(code, isNull, value) + (ExprCode(code, isNull, value), --- End diff -- Good question. If we declare variable types only at `code`, it may lead to compilation error in`janino`. The `code` that you pointed out may go to `$leftAfter` in `$condCheck` that exists in a `if-then` block at the inner most of the loop in the generated code, by using `leftVar`. The variable in `leftVars` is also referred to at `${consume(ctx, leftVars ++ rightVars)}`. In the following example, If we declare variable types only at `code`, we will drop lines 145 and will declare `int` for `smj_value8` at lines 162. Since `smj_value8` is refered at line 169, the generated code would cause compilation error. WDYT? ``` /* 143 */ protected void processNext() throws java.io.IOException { /* 144 */ while (findNextInnerJoinRows(smj_leftInput, smj_rightInput)) { /* 145 */ int smj_value8 = -1; /* 146 */ boolean smj_isNull6 = false; /* 147 */ int smj_value9 = -1; /* 148 */ boolean smj_loaded = false; /* 149 */ smj_isNull6 = smj_leftRow.isNullAt(1); /* 150 */ smj_value9 = smj_isNull6 ? -1 : (smj_leftRow.getInt(1)); /* 151 */ scala.collection.Iterator<UnsafeRow> smj_iterator = smj_matches.generateIterator(); /* 152 */ while (smj_iterator.hasNext()) { ... /* 160 */ if (!smj_loaded) { /* 161 */ smj_loaded = true; /* 162 */ smj_value8 = smj_leftRow.getInt(0); /* 163 */ } /* 164 */ int smj_value10 = smj_rightRow1.getInt(0); /* 165 */ smj_numOutputRows.add(1); /* 166 */ /* 167 */ smj_rowWriter.zeroOutNullBytes(); /* 168 */ /* 169 */ smj_rowWriter.write(0, smj_value8); ... /* 185 */ /* 186 */ } /* 187 */ if (shouldStop()) return; /* 188 */ } /* 189 */ } ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org