Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/7752#discussion_r36034431
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
 ---
    @@ -250,6 +258,252 @@ object GenerateUnsafeProjection extends 
CodeGenerator[Seq[Expression], UnsafePro
         GeneratedExpressionCode(code, isNull, primitive)
       }
     
    +  /**
    +   * Generates the Java code to convert a struct (backed by InternalRow) 
to UnsafeRow.
    +   *
    +   * @param ctx code generation context
    +   * @param inputs could be the codes for expressions or input struct 
fields.
    +   * @param inputTypes types of the inputs
    +   */
    +  private def createCodeForStruct2(
    +      ctx: CodeGenContext,
    +      inputs: Seq[GeneratedExpressionCode],
    +      inputTypes: Seq[DataType]): GeneratedExpressionCode = {
    +
    +    val output = ctx.freshName("convertedStruct")
    +    ctx.addMutableState("UnsafeRow", output, s"$output = new UnsafeRow();")
    +    val buffer = ctx.freshName("buffer")
    +    ctx.addMutableState("byte[]", buffer, s"$buffer = new byte[64];")
    +    val numBytes = ctx.freshName("numBytes")
    +    val cursor = ctx.freshName("cursor")
    +
    +    val convertedFields = inputTypes.zip(inputs).map { case (dt, input) =>
    +      createConvertCode(ctx, input, dt)
    +    }
    +
    +    val fixedSize = 8 * inputTypes.length + 
UnsafeRow.calculateBitSetWidthInBytes(inputTypes.length)
    +    val additionalSize = inputTypes.zip(convertedFields).map { case (dt, 
ev) =>
    +      genAdditionalSize(dt, ev)
    +    }.mkString("")
    +
    +    val fieldWriters = inputTypes.zip(convertedFields).zipWithIndex.map { 
case ((dt, ev), i) =>
    +      val update = genFieldWriter(ctx, dt, ev, output, i, cursor)
    +      s"""
    +        if (${ev.isNull}) {
    +          $output.setNullAt($i);
    +        } else {
    +          $update;
    +        }
    +      """
    +    }.mkString("\n")
    +
    +    val code = s"""
    +      ${convertedFields.map(_.code).mkString("\n")}
    +
    +      final int $numBytes = $fixedSize $additionalSize;
    +      if ($numBytes > $buffer.length) {
    +        $buffer = new byte[$numBytes];
    +      }
    +
    +      $output.pointTo(
    +        $buffer,
    +        $PlatformDependent.BYTE_ARRAY_OFFSET,
    +        ${inputTypes.length},
    +        $numBytes);
    +
    +      int $cursor = $fixedSize;
    +
    +      $fieldWriters
    +      """
    +    GeneratedExpressionCode(code, "false", output)
    +  }
    +
    +  private def getWriter(dt: DataType) = dt match {
    +    case StringType => classOf[UnsafeWriters.UTF8StringWriter].getName
    +    case BinaryType => classOf[UnsafeWriters.BinaryWriter].getName
    +    case CalendarIntervalType => 
classOf[UnsafeWriters.IntervalWriter].getName
    +    case _: StructType => classOf[UnsafeWriters.StructWriter].getName
    +    case _: ArrayType => classOf[UnsafeWriters.ArrayWriter].getName
    +    case _: DecimalType => classOf[UnsafeWriters.DecimalWriter].getName
    +  }
    +
    +  private def createCodeForArray(
    --- End diff --
    
    This part maybe hard to review, the overall process is:
    
    1. iterate the input array to calculate how many bytes we need. Array is 
different from row, we can't know the length of it in advance.
    2. while calculating numBytes in step1, if the element type is not 
primitive, store the converted-to-unsafe data in a tmp array. For example, 
store UTF8String[] for string type, store UnsafeRow[] for struct type.
    3. start writing value from `cursor` and move the `cursor` until end.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to