Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/21031#discussion_r180992933
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
@@ -24,6 +25,47 @@ import
org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData,
MapData}
import org.apache.spark.sql.types._
+/**
+ * Common base class for [[Size]] and [[Cardinality]].
+ */
+abstract class SizeUtil extends UnaryExpression with ExpectsInputTypes {
+ override def inputTypes: Seq[AbstractDataType] =
Seq(TypeCollection(ArrayType, MapType))
+ override def nullable: Boolean = false
+
+ def sizeEval(child: Expression, input: InternalRow, resultTypeBigInt:
Boolean): Any = {
+ val value = child.eval(input)
+ val result = if (value == null) {
+ -1
+ } else child.dataType match {
+ case _: ArrayType => value.asInstanceOf[ArrayData].numElements()
+ case _: MapType => value.asInstanceOf[MapData].numElements()
+ }
+ if (resultTypeBigInt) {
+ new Decimal().setOrNull(result.asInstanceOf[Int].toLong,
DecimalType.MAX_PRECISION, 0)
+ } else {
+ result
+ }
+ }
+
+ def doSizeGenCode(ctx: CodegenContext, ev: ExprCode, resultTypeBigInt:
Boolean): ExprCode = {
--- End diff --
in this way you can directly write the `eval` and `doGenCode` methods and
in the `Size` and `Cardinality` classes we just need to override `def
resultTypeBigInt` setting it to `true` or `false`. I think it is cleaner, but
it is not a big deal.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]