[GitHub] spark pull request #21208: [SPARK-23925][SQL] Add array_repeat collection fu...

pepinoflo Sun, 06 May 2018 07:13:10 -0700

Github user pepinoflo commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21208#discussion_r186292037
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -1229,3 +1229,140 @@ case class Flatten(child: Expression) extends 
UnaryExpression {
     
       override def prettyName: String = "flatten"
     }
    +
    +/**
    + * Returns the array containing the given input value (left) count (right) 
times.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(element, count) - Returns the array containing element 
count times.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_('123', 2);
    +       ['123', '123']
    +  """)
    +case class ArrayRepeat(left: Expression, right: Expression)
    +  extends BinaryExpression with ExpectsInputTypes {
    +
    +  override def dataType: ArrayType = ArrayType(left.dataType, 
left.nullable)
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, 
IntegerType)
    +
    +  override def nullable: Boolean = right.nullable
    +
    +  override def eval(input: InternalRow): Any = {
    +    val count = right.eval(input)
    +    if (count == null) {
    +      null
    +    } else {
    +      new 
GenericArrayData(List.fill(count.asInstanceOf[Int])(left.eval(input)))
    +    }
    +  }
    +
    +  override def prettyName: String = "array_repeat"
    +
    +  override def nullSafeCodeGen(ctx: CodegenContext,
    +                               ev: ExprCode,
    +                               f: (String, String) => String): ExprCode = {
    +    val leftGen = left.genCode(ctx)
    +    val rightGen = right.genCode(ctx)
    +    val resultCode = f(leftGen.value, rightGen.value)
    +
    +    if (nullable) {
    +      val nullSafeEval =
    +        leftGen.code +
    +          rightGen.code + ctx.nullSafeExec(right.nullable, 
rightGen.isNull) {
    +            s"""
    +              ${ev.isNull} = false;
    +              $resultCode
    +            """
    +          }
    +
    +      ev.copy(code =
    +        s"""
    +           | boolean ${ev.isNull} = true;
    +           | ${CodeGenerator.javaType(dataType)} ${ev.value} =
    +           |   ${CodeGenerator.defaultValue(dataType)};
    +           | $nullSafeEval
    +         """.stripMargin
    +      )
    +    } else {
    +      ev.copy(code =
    +        s"""
    +           | boolean ${ev.isNull} = false;
    +           | ${leftGen.code}
    +           | ${rightGen.code}
    +           | ${CodeGenerator.javaType(dataType)} ${ev.value} =
    +           |   ${CodeGenerator.defaultValue(dataType)};
    +           | $resultCode
    +         """.stripMargin
    +        , isNull = FalseLiteral)
    +    }
    +
    +  }
    +
    +  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    +
    +    nullSafeCodeGen(ctx, ev, (l, r) => {
    +      val et = dataType.elementType
    +      val isPrimitive = CodeGenerator.isPrimitiveType(et)
    +
    +      val arrayDataName = ctx.freshName("arrayData")
    +      val arrayName = ctx.freshName("arrayObject")
    +      val numElements = ctx.freshName("numElements")
    +
    +      val genNumElements =
    +        s"""
    +           | int $numElements = 0;
    +           | if ($r > 0) {
    +           |   $numElements = $r;
    +           | }
    +         """.stripMargin
    +
    +      val initialization = if (isPrimitive) {
    +        val arrayName = ctx.freshName("array")
    +        val baseOffset = Platform.BYTE_ARRAY_OFFSET
    +        s"""
    +           | int numBytes = ${et.defaultSize} * $numElements;
    +           | int unsafeArraySizeInBytes =
    +           |   UnsafeArrayData.calculateHeaderPortionInBytes($numElements)
    +           |     + org.apache.spark.unsafe.array.ByteArrayMethods
    +           |       .roundNumberOfBytesToNearestWord(numBytes);
    +           | byte[] $arrayName = new byte[unsafeArraySizeInBytes];
    --- End diff --
    
    So you mean, we should do a size check to make sure it fits in the array, 
and if it doesn't we should do boxing and initialize a `GenericArrayData` 
instead with the given size?
    Also when you say `0x7000_0000`, do you mean the hex value?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #21208: [SPARK-23925][SQL] Add array_repeat collection fu...

Reply via email to