Github user chenghao-intel commented on a diff in the pull request:
https://github.com/apache/spark/pull/7581#discussion_r35845965
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
@@ -35,3 +36,70 @@ case class Size(child: Expression) extends
UnaryExpression with ExpectsInputType
nullSafeCodeGen(ctx, ev, c => s"${ev.primitive} = ($c).size();")
}
}
+
+/**
+ * Sorts the input array in ascending / descending order according to the
natural ordering of
+ * the array elements and returns it.
+ */
+case class SortArray(base: Expression, ascendingOrder: Expression)
+ extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
+
+ def this(e: Expression) = this(e, Literal(true))
+
+ override def left: Expression = base
+ override def right: Expression = ascendingOrder
+ override def dataType: DataType = base.dataType
+ override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType,
BooleanType)
+
+ override def checkInputDataTypes(): TypeCheckResult = base.dataType
match {
+ case _ @ ArrayType(n: AtomicType, _) =>
TypeCheckResult.TypeCheckSuccess
+ case _ @ ArrayType(n, _) => TypeCheckResult.TypeCheckFailure(
+ s"Type $n is not the AtomicType, we can not perform
the ordering operations")
+ case other =>
+ TypeCheckResult.TypeCheckFailure(s"ArrayType(AtomicType) is
expected, but we got $other")
+ }
+
+ @transient
+ private lazy val lt: (Any, Any) => Boolean = {
+ val ordering = base.dataType match {
+ case _ @ ArrayType(n: AtomicType, _) =>
n.ordering.asInstanceOf[Ordering[Any]]
+ }
+
+ (left, right) => {
+ if (left == null && right == null) {
+ false
+ } else if (left == null) {
+ true
+ } else if (right == null) {
+ false
+ } else {
+ ordering.compare(left, right) < 0
+ }
+ }
+ }
+
+ @transient
+ private lazy val gt: (Any, Any) => Boolean = {
+ val ordering = base.dataType match {
+ case _ @ ArrayType(n: AtomicType, _) =>
n.ordering.asInstanceOf[Ordering[Any]]
+ }
+
+ (left, right) => {
+ if (left == null && right == null) {
+ true
+ } else if (left == null) {
+ false
+ } else if (right == null) {
+ true
+ } else {
+ ordering.compare(left, right) > 0
+ }
+ }
+ }
+
+ override def nullSafeEval(array: Any, ascending: Any): Seq[Any] = {
+ array.asInstanceOf[Seq[Any]].sortWith(if
(ascending.asInstanceOf[Boolean]) lt else gt)
--- End diff --
Hmm, but we can not assume the underlying data type of the `ArrayType` is
the `Array[T]`, it's supposed to be the `Seq`, I think, and `Seq` has many sub
concrete classes, like `ArraySeq` or `Buffer`, `WrappedArray` etc, and , even
we can not tell if it's mutable or immutable.
The only way we sort the seq is the method `sortWith`.
I'll agree if we make the catalyst data type unique instead of the the
abstract scala type, and even mutable, that's will be great improvement for
performance, as we can reuse the existed objects in expression evaluation.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]