Repository: spark Updated Branches: refs/heads/master 0f1413e32 -> acc645257
[SPARK-25444][SQL] Refactor GenArrayData.genCodeToCreateArrayData method ## What changes were proposed in this pull request? This PR makes `GenArrayData.genCodeToCreateArrayData` method simple by using `ArrayData.createArrayData` method. Before this PR, `genCodeToCreateArrayData` method was complicated * Generated a temporary Java array to create `ArrayData` * Had separate code generation path to assign values for `GenericArrayData` and `UnsafeArrayData` After this PR, the method * Directly generates `GenericArrayData` or `UnsafeArrayData` without a temporary array * Has only code generation path to assign values ## How was this patch tested? Existing UTs Closes #22439 from kiszk/SPARK-25444. Authored-by: Kazuaki Ishizaki <ishiz...@jp.ibm.com> Signed-off-by: Takuya UESHIN <ues...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acc64525 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acc64525 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acc64525 Branch: refs/heads/master Commit: acc6452579aca99aae9f9787ddbe5c4aeb170e58 Parents: 0f1413e Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com> Authored: Tue Sep 18 12:44:54 2018 +0900 Committer: Takuya UESHIN <ues...@databricks.com> Committed: Tue Sep 18 12:44:54 2018 +0900 ---------------------------------------------------------------------- .../expressions/complexTypeCreator.scala | 122 +++++++------------ 1 file changed, 45 insertions(+), 77 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/acc64525/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index fd8b5e9..0361372 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -61,11 +61,10 @@ case class CreateArray(children: Seq[Expression]) extends Expression { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val et = dataType.elementType - val evals = children.map(e => e.genCode(ctx)) - val (preprocess, assigns, postprocess, arrayData) = - GenArrayData.genCodeToCreateArrayData(ctx, et, evals, false) + val (allocation, assigns, arrayData) = + GenArrayData.genCodeToCreateArrayData(ctx, et, children, false, "createArray") ev.copy( - code = code"${preprocess}${assigns}${postprocess}", + code = code"${allocation}${assigns}", value = JavaCode.variable(arrayData, dataType), isNull = FalseLiteral) } @@ -75,87 +74,60 @@ case class CreateArray(children: Seq[Expression]) extends Expression { private [sql] object GenArrayData { /** - * Return Java code pieces based on DataType and isPrimitive to allocate ArrayData class + * Return Java code pieces based on DataType and array size to allocate ArrayData class * * @param ctx a [[CodegenContext]] * @param elementType data type of underlying array elements - * @param elementsCode concatenated set of [[ExprCode]] for each element of an underlying array + * @param elementsExpr concatenated set of [[Expression]] for each element of an underlying array * @param isMapKey if true, throw an exception when the element is null - * @return (code pre-assignments, concatenated assignments to each array elements, - * code post-assignments, arrayData name) + * @param functionName string to include in the error message + * @return (array allocation, concatenated assignments to each array elements, arrayData name) */ def genCodeToCreateArrayData( ctx: CodegenContext, elementType: DataType, - elementsCode: Seq[ExprCode], - isMapKey: Boolean): (String, String, String, String) = { + elementsExpr: Seq[Expression], + isMapKey: Boolean, + functionName: String): (String, String, String) = { val arrayDataName = ctx.freshName("arrayData") - val numElements = elementsCode.length + val numElements = s"${elementsExpr.length}L" - if (!CodeGenerator.isPrimitiveType(elementType)) { - val arrayName = ctx.freshName("arrayObject") - val genericArrayClass = classOf[GenericArrayData].getName + val initialization = CodeGenerator.createArrayData( + arrayDataName, elementType, numElements, s" $functionName failed.") - val assignments = elementsCode.zipWithIndex.map { case (eval, i) => - val isNullAssignment = if (!isMapKey) { - s"$arrayName[$i] = null;" - } else { - "throw new RuntimeException(\"Cannot use null as map key!\");" - } - eval.code + s""" - if (${eval.isNull}) { - $isNullAssignment - } else { - $arrayName[$i] = ${eval.value}; - } - """ - } - val assignmentString = ctx.splitExpressionsWithCurrentInputs( - expressions = assignments, - funcName = "apply", - extraArguments = ("Object[]", arrayName) :: Nil) - - (s"Object[] $arrayName = new Object[$numElements];", - assignmentString, - s"final ArrayData $arrayDataName = new $genericArrayClass($arrayName);", - arrayDataName) - } else { - val arrayName = ctx.freshName("array") - val unsafeArraySizeInBytes = - UnsafeArrayData.calculateHeaderPortionInBytes(numElements) + - ByteArrayMethods.roundNumberOfBytesToNearestWord(elementType.defaultSize * numElements) - val baseOffset = Platform.BYTE_ARRAY_OFFSET - - val primitiveValueTypeName = CodeGenerator.primitiveTypeName(elementType) - val assignments = elementsCode.zipWithIndex.map { case (eval, i) => + val assignments = elementsExpr.zipWithIndex.map { case (expr, i) => + val eval = expr.genCode(ctx) + val setArrayElement = CodeGenerator.setArrayElement( + arrayDataName, elementType, i.toString, eval.value) + + val assignment = if (!expr.nullable) { + setArrayElement + } else { val isNullAssignment = if (!isMapKey) { s"$arrayDataName.setNullAt($i);" } else { "throw new RuntimeException(\"Cannot use null as map key!\");" } - eval.code + s""" - if (${eval.isNull}) { - $isNullAssignment - } else { - $arrayDataName.set$primitiveValueTypeName($i, ${eval.value}); - } - """ + + s""" + |if (${eval.isNull}) { + | $isNullAssignment + |} else { + | $setArrayElement + |} + """.stripMargin } - val assignmentString = ctx.splitExpressionsWithCurrentInputs( - expressions = assignments, - funcName = "apply", - extraArguments = ("UnsafeArrayData", arrayDataName) :: Nil) - - (s""" - byte[] $arrayName = new byte[$unsafeArraySizeInBytes]; - UnsafeArrayData $arrayDataName = new UnsafeArrayData(); - Platform.putLong($arrayName, $baseOffset, $numElements); - $arrayDataName.pointTo($arrayName, $baseOffset, $unsafeArraySizeInBytes); - """, - assignmentString, - "", - arrayDataName) + s""" + |${eval.code} + |$assignment + """.stripMargin } + val assignmentString = ctx.splitExpressionsWithCurrentInputs( + expressions = assignments, + funcName = "apply", + extraArguments = ("ArrayData", arrayDataName) :: Nil) + + (initialization, assignmentString, arrayDataName) } } @@ -216,21 +188,17 @@ case class CreateMap(children: Seq[Expression]) extends Expression { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val mapClass = classOf[ArrayBasedMapData].getName val MapType(keyDt, valueDt, _) = dataType - val evalKeys = keys.map(e => e.genCode(ctx)) - val evalValues = values.map(e => e.genCode(ctx)) - val (preprocessKeyData, assignKeys, postprocessKeyData, keyArrayData) = - GenArrayData.genCodeToCreateArrayData(ctx, keyDt, evalKeys, true) - val (preprocessValueData, assignValues, postprocessValueData, valueArrayData) = - GenArrayData.genCodeToCreateArrayData(ctx, valueDt, evalValues, false) + val (allocationKeyData, assignKeys, keyArrayData) = + GenArrayData.genCodeToCreateArrayData(ctx, keyDt, keys, true, "createMap") + val (allocationValueData, assignValues, valueArrayData) = + GenArrayData.genCodeToCreateArrayData(ctx, valueDt, values, false, "createMap") val code = code""" final boolean ${ev.isNull} = false; - $preprocessKeyData + $allocationKeyData $assignKeys - $postprocessKeyData - $preprocessValueData + $allocationValueData $assignValues - $postprocessValueData final MapData ${ev.value} = new $mapClass($keyArrayData, $valueArrayData); """ ev.copy(code = code) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org