Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21031#discussion_r180804842 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -24,6 +25,47 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData} import org.apache.spark.sql.types._ +/** + * Common base class for [[Size]] and [[Cardinality]]. + */ +abstract class SizeUtil extends UnaryExpression with ExpectsInputTypes { + override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(ArrayType, MapType)) + override def nullable: Boolean = false + + def sizeEval(child: Expression, input: InternalRow, resultTypeBigInt: Boolean): Any = { + val value = child.eval(input) + val result = if (value == null) { + -1 + } else child.dataType match { + case _: ArrayType => value.asInstanceOf[ArrayData].numElements() + case _: MapType => value.asInstanceOf[MapData].numElements() + } + if (resultTypeBigInt) { + new Decimal().setOrNull(result.asInstanceOf[Int].toLong, DecimalType.MAX_PRECISION, 0) + } else { + result + } + } + + def doSizeGenCode(ctx: CodegenContext, ev: ExprCode, resultTypeBigInt: Boolean): ExprCode = { --- End diff -- minor: what about adding a `def resultTypeBigInt` (or something similar) which is overridden by the subclasses instead of having it as a argument?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org