[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/21021 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186076760 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = $base.copy();" +} else { + val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |Object[] $array = $base.toObjectArray($elementTypeTerm); --- End diff -- Yes, probably the performance gain would not be worth this effort. I agree going on with the current implementation. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r18607 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = $base.copy();" +} else { + val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |Object[] $array = $base.toObjectArray($elementTypeTerm); --- End diff -- In my opinion, main issue is to find or prepare sort implementations to handle required specification (ascending, descending, or ordering of null, with various types, in specialized implementation). If we can use existing implementation for now, we can handle them in this PR. Otherwise, we would like to handle such a case in another PR since it requires to implement sort algorithms. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186071434 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = $base.copy();" +} else { + val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |Object[] $array = $base.toObjectArray($elementTypeTerm); --- End diff -- Now I added the handling of primitives in the PR you mentioned, so it handles that now. My sentence meant that probably the main issue is to avoid problems with nulls for primitive types, since in that case the returned array doesn't contain null but the default value for null items IIUC. So probably it is some extra effort (we should also check if it is worth). I am fine having this done in a followup PR. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186068983 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = $base.copy();" +} else { + val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); --- End diff -- Now I see, thanks. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186061995 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = $base.copy();" +} else { + val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |Object[] $array = $base.toObjectArray($elementTypeTerm); --- End diff -- I am sorry that I cannot understand this phase `Probably it is quite hard for null handling`. Would it be possible to elaborate on your thought? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186060660 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = $base.copy();" +} else { + val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); --- End diff -- IIUC, this is because `compare()` in `java.util.Arrays.sort` accepts two `Object` arguments. If we could use existing type specialized sort algorithm (i.e. compare(int, int), compare(float, float), and so one), we are happy to avoid boxing. Do you have any suggestion for such an existing sorting implementation? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186059899 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = $base.copy();" +} else { + val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |Object[] $array = $base.toObjectArray($elementTypeTerm); --- End diff -- Since `java.util.Arrays.sort` accepts `Object[]`, this PR always uses `Object[]`. Like [this thread](https://github.com/apache/spark/pull/21040#discussion_r180770274), another PR can avoid boxing by using type-specialized sorting algorithm. WDYT? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186040751 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = $base.copy();" +} else { + val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); --- End diff -- why do we need to enforce the boxing? An why do we need to cast to the java type in the non primitive scenario? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186042414 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = $base.copy();" +} else { + val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |Object[] $array = $base.toObjectArray($elementTypeTerm); --- End diff -- can we avoid boxing? Probably it is quite hard for null handling, so we can ignore this. Did you try? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186037573 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -117,47 +118,16 @@ case class MapValues(child: Expression) } /** - * Sorts the input array in ascending / descending order according to the natural ordering of - * the array elements and returns it. + * Common base class for [[SortArray]] and [[ArraySort]]. */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", - examples = """ -Examples: - > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); - ["a","b","c","d"] - """) -// scalastyle:on line.size.limit -case class SortArray(base: Expression, ascendingOrder: Expression) - extends BinaryExpression with ExpectsInputTypes with CodegenFallback { - - def this(e: Expression) = this(e, Literal(true)) +trait ArraySortUtil extends ExpectsInputTypes { --- End diff -- Sure, thank you for your review while it is a long-holiday week in Japan. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186037483 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = (($arrayData) $base).copy();" +} else { + val dataTypes = ctx.addReferenceObj("dataType", elementType) --- End diff -- I see --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186037447 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = (($arrayData) $base).copy();" --- End diff -- Good catch, done --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186002801 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = (($arrayData) $base).copy();" --- End diff -- nit: do we need cast `base` to `ArrayData`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186003960 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -117,47 +118,16 @@ case class MapValues(child: Expression) } /** - * Sorts the input array in ascending / descending order according to the natural ordering of - * the array elements and returns it. + * Common base class for [[SortArray]] and [[ArraySort]]. */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", - examples = """ -Examples: - > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); - ["a","b","c","d"] - """) -// scalastyle:on line.size.limit -case class SortArray(base: Expression, ascendingOrder: Expression) - extends BinaryExpression with ExpectsInputTypes with CodegenFallback { - - def this(e: Expression) = this(e, Literal(true)) +trait ArraySortUtil extends ExpectsInputTypes { --- End diff -- How about `ArraySortLike`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186003272 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = (($arrayData) $base).copy();" +} else { + val dataTypes = ctx.addReferenceObj("dataType", elementType) --- End diff -- How about `elementTypeTerm` or something? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r186002826 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,191 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +if (elementType == NullType) { + s"${ev.value} = (($arrayData) $base).copy();" +} else { + val dataTypes = ctx.addReferenceObj("dataType", elementType) + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |Object[] $array = (Object[]) (($arrayData) $base).toObjectArray($dataTypes); --- End diff -- ditto. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r185378543 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,202 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +val dataTypes = elementType match { + case DecimalType.Fixed(p, s) => +s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)" + case ArrayType(et, cn) => +val dt = s"org.apache.spark.sql.types.$et$$.MODULE$$" +s"org.apache.spark.sql.types.DataTypes.createArrayType($dt, $cn)" + case StructType(f) => +"org.apache.spark.sql.types.StructType$.MODULE$." + + s"apply(new java.util.ArrayList(${f.length}))" + case _ => +s"org.apache.spark.sql.types.$elementType$$.MODULE$$" +} --- End diff -- Definitely, I added some complex test cases with nests. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r185197041 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,202 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +val dataTypes = elementType match { + case DecimalType.Fixed(p, s) => +s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)" + case ArrayType(et, cn) => +val dt = s"org.apache.spark.sql.types.$et$$.MODULE$$" +s"org.apache.spark.sql.types.DataTypes.createArrayType($dt, $cn)" + case StructType(f) => +"org.apache.spark.sql.types.StructType$.MODULE$." + + s"apply(new java.util.ArrayList(${f.length}))" + case _ => +s"org.apache.spark.sql.types.$elementType$$.MODULE$$" +} --- End diff -- Could you also add some tests using `ArrayType` and `StructType` for `elementType`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r185196808 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,202 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +val dataTypes = elementType match { + case DecimalType.Fixed(p, s) => +s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)" + case ArrayType(et, cn) => +val dt = s"org.apache.spark.sql.types.$et$$.MODULE$$" +s"org.apache.spark.sql.types.DataTypes.createArrayType($dt, $cn)" + case StructType(f) => +"org.apache.spark.sql.types.StructType$.MODULE$." + + s"apply(new java.util.ArrayList(${f.length}))" + case _ => +s"org.apache.spark.sql.types.$elementType$$.MODULE$$" +} --- End diff -- I'm still wondering whether this will work or not. What if `elementType` is `ArrayType(ArrayType(IntegerType))`? Can't we use a reference object of `elementType`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r185163397 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,205 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +val sort = if (elementType == NullType) "" else { + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |final int $sortOrder = $order ? 1 : -1; + |java.util.Arrays.sort($array, new java.util.Comparator() { + | @Override public int compare(Object $o1, Object $o2) { + |if ($o1 == null && $o2 == null) { + | return 0; + |} else if ($o1 == null) { + | return $sortOrder * $nullOrder; + |} else if ($o2 == null) { + | return -$sortOrder * $nullOrder; + |} + |$comp + |return $sortOrder * $c; + | } + |}); + """.stripMargin +} +val dataTypes = elementType match { + case DecimalType.Fixed(p, s) => +s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)" + case ArrayType(et, cn) => +s"org.apache.spark.sql.types.DataTypes.createArrayType($et, $cn)" + case MapType(kt, vt, cn) => +s"org.apache.spark.sql.types.DataTypes.createMapType($kt, $vt, $cn)" + case StructType(f) => +"org.apache.spark.sql.types.StructType$.MODULE$." + + s"apply(new java.util.ArrayList(${f.length}))" + case _ => +s"org.apache.spark.sql.types.DataTypes.$elementType" +} --- End diff -- I'm wondering if this will work for all complex types, e.g. `ArrayType(ArrayType(IntegerType))`? How about using reference object of `elementType`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r185166848 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,205 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +val sort = if (elementType == NullType) "" else { + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |final int $sortOrder = $order ? 1 : -1; + |java.util.Arrays.sort($array, new java.util.Comparator() { + | @Override public int compare(Object $o1, Object $o2) { + |if ($o1 == null && $o2 == null) { + | return 0; + |} else if ($o1 == null) { + | return $sortOrder * $nullOrder; + |} else if ($o2 == null) { + | return -$sortOrder * $nullOrder; + |} + |$comp + |return $sortOrder * $c; + | } + |}); + """.stripMargin +} +val dataTypes = elementType match { + case DecimalType.Fixed(p, s) => +s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)" + case ArrayType(et, cn) => +s"org.apache.spark.sql.types.DataTypes.createArrayType($et, $cn)" + case MapType(kt, vt, cn) => +s"org.apache.spark.sql.types.DataTypes.createMapType($kt, $vt, $cn)" + case StructType(f) => +"org.apache.spark.sql.types.StructType$.MODULE$." + + s"apply(new java.util.ArrayList(${f.length}))" + case _ => +s"org.apache.spark.sql.types.DataTypes.$elementType" +} +s""" + |Object[] $array = (Object[]) (($arrayData) $base).toArray( --- End diff -- How about using `toObjectArray` which doesn't need ClassTag? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r185163319 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,205 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +val sort = if (elementType == NullType) "" else { + val sortOrder = ctx.freshName("sortOrder") + val o1 = ctx.freshName("o1") + val o2 = ctx.freshName("o2") + val jt = CodeGenerator.javaType(elementType) + val comp = if (CodeGenerator.isPrimitiveType(elementType)) { +val bt = CodeGenerator.boxedType(elementType) +val v1 = ctx.freshName("v1") +val v2 = ctx.freshName("v2") +s""" + |$jt $v1 = (($bt) $o1).${jt}Value(); + |$jt $v2 = (($bt) $o2).${jt}Value(); + |int $c = ${ctx.genComp(elementType, v1, v2)}; + """.stripMargin + } else { +s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};" + } + s""" + |final int $sortOrder = $order ? 1 : -1; + |java.util.Arrays.sort($array, new java.util.Comparator() { + | @Override public int compare(Object $o1, Object $o2) { + |if ($o1 == null && $o2 == null) { + | return 0; + |} else if ($o1 == null) { + | return $sortOrder * $nullOrder; + |} else if ($o2 == null) { + | return -$sortOrder * $nullOrder; + |} + |$comp + |return $sortOrder * $c; + | } + |}); + """.stripMargin +} +val dataTypes = elementType match { + case DecimalType.Fixed(p, s) => +s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)" + case ArrayType(et, cn) => +s"org.apache.spark.sql.types.DataTypes.createArrayType($et, $cn)" + case MapType(kt, vt, cn) => --- End diff -- We don't need for `MapType` because `MapType` is not orderable? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r185166899 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,28 +161,205 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + -nullOrder } else if (o2 == null) { - -1 + nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def elementType: DataType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType + + def sortEval(array: Any, ascending: Boolean): Any = { val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } + def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = { +val arrayData = classOf[ArrayData].getName +val genericArrayData = classOf[GenericArrayData].getName +val array = ctx.freshName("array") +val c = ctx.freshName("c") +val sort = if (elementType == NullType) "" else { --- End diff -- How about just copying the original array if `elementType == NullType`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184632402 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -185,6 +183,8 @@ trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback { object ArraySortUtil { type NullOrder = Int + // Least: place null element at the end of the array + // Greatest: place null element at the beginning of the array --- End diff -- Sure. I will also fix statements. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184293071 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -185,6 +183,8 @@ trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback { object ArraySortUtil { type NullOrder = Int + // Least: place null element at the end of the array + // Greatest: place null element at the beginning of the array --- End diff -- Can you add `for ascending order` or something? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184284276 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,24 +163,85 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * nullOrder } else if (o2 == null) { - -1 + -1 * nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +object ArraySortUtil { + type NullOrder = Int + object NullOrder { +val Least: NullOrder = -1 +val Greatest: NullOrder = 1 + } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order + according to the natural ordering of the array elements. Null elements will be placed + at the beginning of the returned array in ascending order or at the end of the returned + array in descending order. + """, + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true); + [null,"a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + override def nullOrder: Int = NullOrder.Greatest --- End diff -- Good catch, thanks --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184262552 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -300,6 +333,49 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI override def prettyName: String = "reverse" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array. + """, + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """, + since = "2.4.0") +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { + + override def dataType: DataType = child.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType) + + override def arrayExpression: Expression = child + override def nullOrder: Int = NullOrder.Least --- End diff -- `nullOrder: NullOrder`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184262594 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -117,47 +118,18 @@ case class MapValues(child: Expression) } /** - * Sorts the input array in ascending / descending order according to the natural ordering of - * the array elements and returns it. + * Common base class for [[SortArray]] and [[ArraySort]]. */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", - examples = """ -Examples: - > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); - ["a","b","c","d"] - """) -// scalastyle:on line.size.limit -case class SortArray(base: Expression, ascendingOrder: Expression) - extends BinaryExpression with ExpectsInputTypes with CodegenFallback { - - def this(e: Expression) = this(e, Literal(true)) - - override def left: Expression = base - override def right: Expression = ascendingOrder - override def dataType: DataType = base.dataType - override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) +trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback { + protected def arrayExpression: Expression - override def checkInputDataTypes(): TypeCheckResult = base.dataType match { -case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => - ascendingOrder match { -case Literal(_: Boolean, BooleanType) => - TypeCheckResult.TypeCheckSuccess -case _ => - TypeCheckResult.TypeCheckFailure( -"Sort order in second argument requires a boolean literal.") - } -case ArrayType(dt, _) => - TypeCheckResult.TypeCheckFailure( -s"$prettyName does not support sorting array of type ${dt.simpleString}") -case _ => - TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") - } + // If -1, place null element at the end of the array + // If 1, place null element at the beginning of the array + protected def nullOrder: Int --- End diff -- `nullOrder: NullOrder`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184262545 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,24 +163,85 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * nullOrder } else if (o2 == null) { - -1 + -1 * nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +object ArraySortUtil { + type NullOrder = Int + object NullOrder { +val Least: NullOrder = -1 +val Greatest: NullOrder = 1 + } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order + according to the natural ordering of the array elements. Null elements will be placed + at the beginning of the returned array in ascending order or at the end of the returned + array in descending order. + """, + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true); + [null,"a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + override def nullOrder: Int = NullOrder.Greatest --- End diff -- `nullOrder: NullOrder`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184261968 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -168,9 +140,9 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - -1 + -1 * nullOrder --- End diff -- `nullOrder`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184262013 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,24 +163,85 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * nullOrder --- End diff -- `-nullOrder`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184261902 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,24 +163,85 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * nullOrder } else if (o2 == null) { - -1 + -1 * nullOrder } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +object ArraySortUtil { + type NullOrder = Int + object NullOrder { +val Least: NullOrder = -1 +val Greatest: NullOrder = 1 + } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order + according to the natural ordering of the array elements. Null elements will be placed + at the beginning of the returned array in ascending order or at the end of the returned + array in descending order. + """, + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true); + [null,"a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + override def nullOrder: Int = NullOrder.Greatest --- End diff -- `Least`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184262044 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -191,24 +163,85 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * nullOrder } else if (o2 == null) { - -1 + -1 * nullOrder --- End diff -- `nullOrder`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184261996 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -168,9 +140,9 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - -1 + -1 * nullOrder } else if (o2 == null) { - 1 + 1 * nullOrder --- End diff -- `-nullOrder`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184262129 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -117,47 +118,18 @@ case class MapValues(child: Expression) } /** - * Sorts the input array in ascending / descending order according to the natural ordering of - * the array elements and returns it. + * Common base class for [[SortArray]] and [[ArraySort]]. */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", - examples = """ -Examples: - > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); - ["a","b","c","d"] - """) -// scalastyle:on line.size.limit -case class SortArray(base: Expression, ascendingOrder: Expression) - extends BinaryExpression with ExpectsInputTypes with CodegenFallback { - - def this(e: Expression) = this(e, Literal(true)) - - override def left: Expression = base - override def right: Expression = ascendingOrder - override def dataType: DataType = base.dataType - override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) +trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback { + protected def arrayExpression: Expression - override def checkInputDataTypes(): TypeCheckResult = base.dataType match { -case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => - ascendingOrder match { -case Literal(_: Boolean, BooleanType) => - TypeCheckResult.TypeCheckSuccess -case _ => - TypeCheckResult.TypeCheckFailure( -"Sort order in second argument requires a boolean literal.") - } -case ArrayType(dt, _) => - TypeCheckResult.TypeCheckFailure( -s"$prettyName does not support sorting array of type ${dt.simpleString}") -case _ => - TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") - } + // If -1, place null element at the end of the array + // If 1, place null element at the beginning of the array --- End diff -- These comments are not correct now. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r184261947 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -300,6 +333,49 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI override def prettyName: String = "reverse" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array. + """, + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """, + since = "2.4.0") +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { + + override def dataType: DataType = child.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType) + + override def arrayExpression: Expression = child + override def nullOrder: Int = NullOrder.Least --- End diff -- `Greatest`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r182999463 --- Diff: python/pyspark/sql/functions.py --- @@ -2168,6 +2171,23 @@ def sort_array(col, asc=True): return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc)) +@since(2.4) +def array_sort(col): +""" +Collection function: sorts the input array in ascending order. The elements of the input array +must be orderable. Null elements will be placed at the end of the returned array. + +:param col: name of column or expression + +>>> from pyspark.sql.functions import array_sort +>>> df = spark.createDataFrame([([2, 1, None, 3],),([1],),([],)], ['data']) +>>> df.select(array_sort(df.data).alias('r')).collect() +[Row(r=[1, 2, 3, None]), Row(r=[1]), Row(r=[])] + """ --- End diff -- nit: an extra space? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r182992892 --- Diff: python/pyspark/sql/functions.py --- @@ -2154,10 +2154,13 @@ def array_max(col): def sort_array(col, asc=True): """ Collection function: sorts the input array in ascending or descending order according -to the natural ordering of the array elements. +to the natural ordering of the array elements. Null elements will be placed at the beginning +of the returned array in ascending order or at the end of the returned array in descending +order. :param col: name of column or expression +>>> from pyspark.sql.functions import sort_array --- End diff -- Do we need this? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r18236 --- Diff: python/pyspark/sql/functions.py --- @@ -2154,10 +2154,13 @@ def array_max(col): def sort_array(col, asc=True): """ Collection function: sorts the input array in ascending or descending order according -to the natural ordering of the array elements. +to the natural ordering of the array elements. Null elements will be placed at the beginning +of the returned array in ascending order or at the end of the returned array in descending +order. :param col: name of column or expression +>>> from pyspark.sql.functions import sort_array >>> df = spark.createDataFrame([([2, 1, 3],),([1],),([],)], ['data']) --- End diff -- We should include `None` to show where the `None` comes? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r182998415 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -117,47 +117,18 @@ case class MapValues(child: Expression) } /** - * Sorts the input array in ascending / descending order according to the natural ordering of - * the array elements and returns it. + * Common base class for [[SortArray]] and [[ArraySort]]. */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", - examples = """ -Examples: - > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); - ["a","b","c","d"] - """) -// scalastyle:on line.size.limit -case class SortArray(base: Expression, ascendingOrder: Expression) - extends BinaryExpression with ExpectsInputTypes with CodegenFallback { - - def this(e: Expression) = this(e, Literal(true)) +trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback { + protected def arrayExpression: Expression - override def left: Expression = base - override def right: Expression = ascendingOrder - override def dataType: DataType = base.dataType - override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) - - override def checkInputDataTypes(): TypeCheckResult = base.dataType match { -case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => - ascendingOrder match { -case Literal(_: Boolean, BooleanType) => - TypeCheckResult.TypeCheckSuccess -case _ => - TypeCheckResult.TypeCheckFailure( -"Sort order in second argument requires a boolean literal.") - } -case ArrayType(dt, _) => - TypeCheckResult.TypeCheckFailure( -s"$prettyName does not support sorting array of type ${dt.simpleString}") -case _ => - TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") - } + // If -1, place null element at the end of the array + // If 1, place null element at the beginning of the array + protected def placeNullAtEnd: Int --- End diff -- I guess the name is confusing. How about `nullOrder`? `nullOrder = -1` means `null` is the least, place at the beginning for asc and at the end for desc, whereas `1` means the greatest, place at the end for asc and at the beginning for desc. And maybe we should define the constants like: ```scala object ArraySortUtil { type NullOrder = Int object NullOrder { val Least: NullOrder = -1 val Greatest: NullOrder = 1 } } ``` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r182993016 --- Diff: python/pyspark/sql/functions.py --- @@ -2168,6 +2171,23 @@ def sort_array(col, asc=True): return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc)) +@since(2.4) +def array_sort(col): +""" +Collection function: sorts the input array in ascending order. The elements of the input array +must be orderable. Null elements will be placed at the end of the returned array. + +:param col: name of column or expression + +>>> from pyspark.sql.functions import array_sort --- End diff -- ditto. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180429349 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +161,118 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + override def placeNullAtEnd: Int = 1 + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + val dtSimple = dt.simpleString + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type $dtSimple which is not orderable") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """, + since = "2.4.0") +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { --- End diff -- Yeah, as you said they are doing similar things. Therefore, a new trait is not introduced to reuse as possible. When one is subset of another one (e.g. `size` v.s. `cardinality`), we could take an approach that one calls another one. What I am doing in `cardinality`. Good point about the description. I will add the description on how it works with `null`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180400407 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +161,118 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + override def placeNullAtEnd: Int = 1 + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + val dtSimple = dt.simpleString + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type $dtSimple which is not orderable") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """, + since = "2.4.0") +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { --- End diff -- I see, thanks. Then can't we add another flag for it? I just find a bit weird that we have two functions which do basically the same thing. Just one has one more flag than the other and they behave differently with nulls. As a user I would not understand when to use one or the other. What do you think? I'd be also interested in @gatorsmile's point of view, since he created the JIRA for this. Moreover, can we at least improve than the description of the `SortArray` case class in order to point out clearly how it behaves with nulls? Now there is no mention about it. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional c
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180392603 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +161,118 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + override def placeNullAtEnd: Int = 1 + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + val dtSimple = dt.simpleString + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type $dtSimple which is not orderable") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """, + since = "2.4.0") +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { --- End diff -- As you can see the result in UT, `null` handing is different. As you suggested, to reuse existing code as possible, I refactored by using `ArraySortUtil`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180384110 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +161,118 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + override def placeNullAtEnd: Int = 1 + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + val dtSimple = dt.simpleString + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type $dtSimple which is not orderable") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """, + since = "2.4.0") +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { --- End diff -- why do we need this? Can't we just reuse the previous `SortArray` fixing the `right` to be `TrueLiteral`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180319251 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +160,114 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """) +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { + + override def dataType: DataType = child.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType) + + override def arrayExpression: Expression = child + override def placeNullAtEnd: Int = -1 + + override def checkInputDataTypes(): TypeCheckResult = child.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + TypeCheckResult.TypeCheckSuccess +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") --- End diff -- Sure --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180318978 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +160,114 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """) +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { + + override def dataType: DataType = child.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType) + + override def arrayExpression: Expression = child + override def placeNullAtEnd: Int = -1 + + override def checkInputDataTypes(): TypeCheckResult = child.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + TypeCheckResult.TypeCheckSuccess +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") --- End diff -- Sounds good to me. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180317825 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +160,114 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """) +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { + + override def dataType: DataType = child.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType) + + override def arrayExpression: Expression = child + override def placeNullAtEnd: Int = -1 + + override def checkInputDataTypes(): TypeCheckResult = child.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + TypeCheckResult.TypeCheckSuccess +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") --- End diff -- The original message is compatible with `SortArray`. Is it better to update the message in `SortArray` with this change? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org F
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180317210 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +160,114 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] --- End diff -- Thanks for letting know the example. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180316279 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +160,114 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] + """) +// scalastyle:on line.size.limit +case class ArraySort(child: Expression) extends UnaryExpression with ArraySortUtil { + + override def dataType: DataType = child.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType) + + override def arrayExpression: Expression = child + override def placeNullAtEnd: Int = -1 + + override def checkInputDataTypes(): TypeCheckResult = child.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + TypeCheckResult.TypeCheckSuccess +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") --- End diff -- `...not support sorting array of type ${dt.simpleString} which is not orderable.` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: review
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180312180 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +160,114 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] --- End diff -- which will be shown in SQL documentation and the extended description command in sql syntax --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180311923 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +160,114 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] --- End diff -- Every expression needs it .. many of functions don't have it because that field was added from 2.3.0. You could do it: https://github.com/apache/spark/blob/2ca9bb083c515511d2bfee271fc3e0269aceb9d5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala#L511 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180311343 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +160,114 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] --- End diff -- Do we have to add `since` in this file? I cannot find `since` in `collectionOperations.scala`. Of course, `functions.scala` and `functions.py` have `since`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180310842 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala --- @@ -324,6 +324,30 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { assert(intercept[AnalysisException] { df3.selectExpr("sort_array(a)").collect() }.getMessage().contains("only supports array input")) + +checkAnswer( + df.select(array_sort($"a"), array_sort($"b")), + Seq( +Row(Seq(1, 2, 3), Seq("a", "b", "c")), +Row(Seq[Int](), Seq[String]()), +Row(null, null)) +) +checkAnswer( + df.selectExpr("array_sort(a)", "array_sort(b)"), + Seq( +Row(Seq(1, 2, 3), Seq("a", "b", "c")), +Row(Seq[Int](), Seq[String]()), --- End diff -- Maybe `Seq.empty[Int]` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180310741 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -190,28 +160,114 @@ case class SortArray(base: Expression, ascendingOrder: Expression) if (o1 == null && o2 == null) { 0 } else if (o1 == null) { - 1 + 1 * placeNullAtEnd } else if (o2 == null) { - -1 + -1 * placeNullAtEnd } else { -ordering.compare(o1, o2) } } } } - override def nullSafeEval(array: Any, ascending: Any): Any = { -val elementType = base.dataType.asInstanceOf[ArrayType].elementType + def sortEval(array: Any, ascending: Boolean): Any = { +val elementType = arrayExpression.dataType.asInstanceOf[ArrayType].elementType val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType) if (elementType != NullType) { - java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt) + java.util.Arrays.sort(data, if (ascending) lt else gt) } new GenericArrayData(data.asInstanceOf[Array[Any]]) } +} + +/** + * Sorts the input array in ascending / descending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); + ["a","b","c","d"] + """) +// scalastyle:on line.size.limit +case class SortArray(base: Expression, ascendingOrder: Expression) + extends BinaryExpression with ArraySortUtil { + + def this(e: Expression) = this(e, Literal(true)) + + override def left: Expression = base + override def right: Expression = ascendingOrder + override def dataType: DataType = base.dataType + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) + + override def arrayExpression: Expression = base + + override def checkInputDataTypes(): TypeCheckResult = base.dataType match { +case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => + ascendingOrder match { +case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess +case _ => + TypeCheckResult.TypeCheckFailure( +"Sort order in second argument requires a boolean literal.") + } +case ArrayType(dt, _) => + TypeCheckResult.TypeCheckFailure( +s"$prettyName does not support sorting array of type ${dt.simpleString}") +case _ => + TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") + } + + override def nullSafeEval(array: Any, ascending: Any): Any = { +sortEval(array, ascending.asInstanceOf[Boolean]) + } override def prettyName: String = "sort_array" } +/** + * Sorts the input array in ascending order according to the natural ordering of + * the array elements and returns it. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ +_FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must + be orderable. Null elements will be placed at the end of the returned array.""", + examples = """ +Examples: + > SELECT _FUNC_(array('b', 'd', null, 'c', 'a')); + ["a","b","c","d",null] --- End diff -- +1 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21021#discussion_r180310339 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -116,47 +116,17 @@ case class MapValues(child: Expression) } /** - * Sorts the input array in ascending / descending order according to the natural ordering of - * the array elements and returns it. + * Common base class for [[SortArray]] and [[ArraySort]]. */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.", - examples = """ -Examples: - > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true); - ["a","b","c","d"] - """) -// scalastyle:on line.size.limit -case class SortArray(base: Expression, ascendingOrder: Expression) - extends BinaryExpression with ExpectsInputTypes with CodegenFallback { - - def this(e: Expression) = this(e, Literal(true)) +trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback { + protected def arrayExpression: Expression - override def left: Expression = base - override def right: Expression = ascendingOrder - override def dataType: DataType = base.dataType - override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType) - - override def checkInputDataTypes(): TypeCheckResult = base.dataType match { -case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => - ascendingOrder match { -case Literal(_: Boolean, BooleanType) => - TypeCheckResult.TypeCheckSuccess -case _ => - TypeCheckResult.TypeCheckFailure( -"Sort order in second argument requires a boolean literal.") - } -case ArrayType(dt, _) => - TypeCheckResult.TypeCheckFailure( -s"$prettyName does not support sorting array of type ${dt.simpleString}") -case _ => - TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.") - } + // If -1, place null element at the end of the array + protected def placeNullAtEnd: Int = 1 --- End diff -- sure --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org