subject:"\[GitHub\] spark pull request #21021\: \[SPARK\-23921\]\[SQL\] Add array

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-06 Thread asfgit

Github user asfgit closed the pull request at:

https://github.com/apache/spark/pull/21021


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread mgaido91

Github user mgaido91 commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186076760
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = $base.copy();"
+} else {
+  val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", 
elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |Object[] $array = $base.toObjectArray($elementTypeTerm);
--- End diff --

Yes, probably the performance gain would not be worth this effort. I agree 
going on with the current implementation.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r18607
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = $base.copy();"
+} else {
+  val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", 
elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |Object[] $array = $base.toObjectArray($elementTypeTerm);
--- End diff --

In my opinion, main issue is to find or prepare sort implementations to 
handle required specification (ascending, descending, or ordering of null, with 
various types, in specialized implementation).
If we can use existing implementation for now, we can handle them in this 
PR. Otherwise, we would like to handle such a case in another PR since it 
requires to implement sort algorithms.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread mgaido91

Github user mgaido91 commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186071434
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = $base.copy();"
+} else {
+  val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", 
elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |Object[] $array = $base.toObjectArray($elementTypeTerm);
--- End diff --

Now I added the handling of primitives in the PR you mentioned, so it 
handles that now.

My sentence meant that probably the main issue is to avoid problems with 
nulls for primitive types, since in that case the returned array doesn't 
contain null but the default value for null items IIUC. So probably it is some 
extra effort (we should also check if it is worth). I am fine having this done 
in a followup PR.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread mgaido91

Github user mgaido91 commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186068983
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = $base.copy();"
+} else {
+  val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", 
elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
--- End diff --

Now I see, thanks.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186061995
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = $base.copy();"
+} else {
+  val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", 
elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |Object[] $array = $base.toObjectArray($elementTypeTerm);
--- End diff --

I am sorry that I cannot understand this phase `Probably it is quite hard 
for null handling`. Would it be possible to elaborate on your thought?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186060660
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = $base.copy();"
+} else {
+  val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", 
elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
--- End diff --

IIUC, this is because `compare()` in `java.util.Arrays.sort` accepts two 
`Object` arguments. If we could use existing type specialized sort algorithm 
(i.e. compare(int, int), compare(float, float), and so one), we are happy to 
avoid boxing.
Do you have any suggestion for such an existing sorting implementation?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186059899
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = $base.copy();"
+} else {
+  val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", 
elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |Object[] $array = $base.toObjectArray($elementTypeTerm);
--- End diff --

Since `java.util.Arrays.sort` accepts `Object[]`, this PR always uses 
`Object[]`. Like [this 
thread](https://github.com/apache/spark/pull/21040#discussion_r180770274), 
another PR can avoid boxing by using type-specialized sorting algorithm. 
WDYT?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread mgaido91

Github user mgaido91 commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186040751
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = $base.copy();"
+} else {
+  val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", 
elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
--- End diff --

why do we need to enforce the boxing? An why do we need to cast to the java 
type in the non primitive scenario?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread mgaido91

Github user mgaido91 commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186042414
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = $base.copy();"
+} else {
+  val elementTypeTerm = ctx.addReferenceObj("elementTypeTerm", 
elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |Object[] $array = $base.toObjectArray($elementTypeTerm);
--- End diff --

can we avoid boxing? Probably it is quite hard for null handling, so we can 
ignore this. Did you try?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186037573
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -117,47 +118,16 @@ case class MapValues(child: Expression)
 }
 
 /**
- * Sorts the input array in ascending / descending order according to the 
natural ordering of
- * the array elements and returns it.
+ * Common base class for [[SortArray]] and [[ArraySort]].
  */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
-  examples = """
-Examples:
-  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
-   ["a","b","c","d"]
-  """)
-// scalastyle:on line.size.limit
-case class SortArray(base: Expression, ascendingOrder: Expression)
-  extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
-
-  def this(e: Expression) = this(e, Literal(true))
+trait ArraySortUtil extends ExpectsInputTypes {
--- End diff --

Sure, thank you for your review while it is a long-holiday week in Japan.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186037483
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = (($arrayData) $base).copy();"
+} else {
+  val dataTypes = ctx.addReferenceObj("dataType", elementType)
--- End diff --

I see


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186037447
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = (($arrayData) $base).copy();"
--- End diff --

Good catch, done


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186002801
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = (($arrayData) $base).copy();"
--- End diff --

nit: do we need cast `base` to `ArrayData`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186003960
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -117,47 +118,16 @@ case class MapValues(child: Expression)
 }
 
 /**
- * Sorts the input array in ascending / descending order according to the 
natural ordering of
- * the array elements and returns it.
+ * Common base class for [[SortArray]] and [[ArraySort]].
  */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
-  examples = """
-Examples:
-  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
-   ["a","b","c","d"]
-  """)
-// scalastyle:on line.size.limit
-case class SortArray(base: Expression, ascendingOrder: Expression)
-  extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
-
-  def this(e: Expression) = this(e, Literal(true))
+trait ArraySortUtil extends ExpectsInputTypes {
--- End diff --

How about `ArraySortLike`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186003272
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = (($arrayData) $base).copy();"
+} else {
+  val dataTypes = ctx.addReferenceObj("dataType", elementType)
--- End diff --

How about `elementTypeTerm` or something?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-04 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r186002826
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,191 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+if (elementType == NullType) {
+  s"${ev.value} = (($arrayData) $base).copy();"
+} else {
+  val dataTypes = ctx.addReferenceObj("dataType", elementType)
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |Object[] $array = (Object[]) (($arrayData) 
$base).toObjectArray($dataTypes);
--- End diff --

ditto.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-01 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r185378543
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,202 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+val dataTypes = elementType match {
+  case DecimalType.Fixed(p, s) =>
+s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)"
+  case ArrayType(et, cn) =>
+val dt = s"org.apache.spark.sql.types.$et$$.MODULE$$"
+s"org.apache.spark.sql.types.DataTypes.createArrayType($dt, $cn)"
+  case StructType(f) =>
+"org.apache.spark.sql.types.StructType$.MODULE$." +
+  s"apply(new java.util.ArrayList(${f.length}))"
+  case _ =>
+s"org.apache.spark.sql.types.$elementType$$.MODULE$$"
+}
--- End diff --

Definitely, I added some complex test cases with nests.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-01 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r185197041
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,202 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+val dataTypes = elementType match {
+  case DecimalType.Fixed(p, s) =>
+s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)"
+  case ArrayType(et, cn) =>
+val dt = s"org.apache.spark.sql.types.$et$$.MODULE$$"
+s"org.apache.spark.sql.types.DataTypes.createArrayType($dt, $cn)"
+  case StructType(f) =>
+"org.apache.spark.sql.types.StructType$.MODULE$." +
+  s"apply(new java.util.ArrayList(${f.length}))"
+  case _ =>
+s"org.apache.spark.sql.types.$elementType$$.MODULE$$"
+}
--- End diff --

Could you also add some tests using `ArrayType` and `StructType` for 
`elementType`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-05-01 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r185196808
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,202 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+val dataTypes = elementType match {
+  case DecimalType.Fixed(p, s) =>
+s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)"
+  case ArrayType(et, cn) =>
+val dt = s"org.apache.spark.sql.types.$et$$.MODULE$$"
+s"org.apache.spark.sql.types.DataTypes.createArrayType($dt, $cn)"
+  case StructType(f) =>
+"org.apache.spark.sql.types.StructType$.MODULE$." +
+  s"apply(new java.util.ArrayList(${f.length}))"
+  case _ =>
+s"org.apache.spark.sql.types.$elementType$$.MODULE$$"
+}
--- End diff --

I'm still wondering whether this will work or not. What if `elementType` is 
`ArrayType(ArrayType(IntegerType))`?
Can't we use a reference object of `elementType`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-30 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r185163397
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,205 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+val sort = if (elementType == NullType) "" else {
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |final int $sortOrder = $order ? 1 : -1;
+ |java.util.Arrays.sort($array, new java.util.Comparator() {
+ |  @Override public int compare(Object $o1, Object $o2) {
+ |if ($o1 == null && $o2 == null) {
+ |  return 0;
+ |} else if ($o1 == null) {
+ |  return $sortOrder * $nullOrder;
+ |} else if ($o2 == null) {
+ |  return -$sortOrder * $nullOrder;
+ |}
+ |$comp
+ |return $sortOrder * $c;
+ |  }
+ |});
+   """.stripMargin
+}
+val dataTypes = elementType match {
+  case DecimalType.Fixed(p, s) =>
+s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)"
+  case ArrayType(et, cn) =>
+s"org.apache.spark.sql.types.DataTypes.createArrayType($et, $cn)"
+  case MapType(kt, vt, cn) =>
+s"org.apache.spark.sql.types.DataTypes.createMapType($kt, $vt, 
$cn)"
+  case StructType(f) =>
+"org.apache.spark.sql.types.StructType$.MODULE$." +
+  s"apply(new java.util.ArrayList(${f.length}))"
+  case _ =>
+s"org.apache.spark.sql.types.DataTypes.$elementType"
+}
--- End diff --

I'm wondering if this will work for all complex types, e.g. 
`ArrayType(ArrayType(IntegerType))`?
How about using reference object of `elementType`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-30 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r185166848
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,205 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+val sort = if (elementType == NullType) "" else {
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |final int $sortOrder = $order ? 1 : -1;
+ |java.util.Arrays.sort($array, new java.util.Comparator() {
+ |  @Override public int compare(Object $o1, Object $o2) {
+ |if ($o1 == null && $o2 == null) {
+ |  return 0;
+ |} else if ($o1 == null) {
+ |  return $sortOrder * $nullOrder;
+ |} else if ($o2 == null) {
+ |  return -$sortOrder * $nullOrder;
+ |}
+ |$comp
+ |return $sortOrder * $c;
+ |  }
+ |});
+   """.stripMargin
+}
+val dataTypes = elementType match {
+  case DecimalType.Fixed(p, s) =>
+s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)"
+  case ArrayType(et, cn) =>
+s"org.apache.spark.sql.types.DataTypes.createArrayType($et, $cn)"
+  case MapType(kt, vt, cn) =>
+s"org.apache.spark.sql.types.DataTypes.createMapType($kt, $vt, 
$cn)"
+  case StructType(f) =>
+"org.apache.spark.sql.types.StructType$.MODULE$." +
+  s"apply(new java.util.ArrayList(${f.length}))"
+  case _ =>
+s"org.apache.spark.sql.types.DataTypes.$elementType"
+}
+s"""
+   |Object[] $array = (Object[]) (($arrayData) $base).toArray(
--- End diff --

How about using `toObjectArray` which doesn't need ClassTag?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-30 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r185163319
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,205 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+val sort = if (elementType == NullType) "" else {
+  val sortOrder = ctx.freshName("sortOrder")
+  val o1 = ctx.freshName("o1")
+  val o2 = ctx.freshName("o2")
+  val jt = CodeGenerator.javaType(elementType)
+  val comp = if (CodeGenerator.isPrimitiveType(elementType)) {
+val bt = CodeGenerator.boxedType(elementType)
+val v1 = ctx.freshName("v1")
+val v2 = ctx.freshName("v2")
+s"""
+   |$jt $v1 = (($bt) $o1).${jt}Value();
+   |$jt $v2 = (($bt) $o2).${jt}Value();
+   |int $c = ${ctx.genComp(elementType, v1, v2)};
+ """.stripMargin
+  } else {
+s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) 
$o2)")};"
+  }
+  s"""
+ |final int $sortOrder = $order ? 1 : -1;
+ |java.util.Arrays.sort($array, new java.util.Comparator() {
+ |  @Override public int compare(Object $o1, Object $o2) {
+ |if ($o1 == null && $o2 == null) {
+ |  return 0;
+ |} else if ($o1 == null) {
+ |  return $sortOrder * $nullOrder;
+ |} else if ($o2 == null) {
+ |  return -$sortOrder * $nullOrder;
+ |}
+ |$comp
+ |return $sortOrder * $c;
+ |  }
+ |});
+   """.stripMargin
+}
+val dataTypes = elementType match {
+  case DecimalType.Fixed(p, s) =>
+s"org.apache.spark.sql.types.DataTypes.createDecimalType($p, $s)"
+  case ArrayType(et, cn) =>
+s"org.apache.spark.sql.types.DataTypes.createArrayType($et, $cn)"
+  case MapType(kt, vt, cn) =>
--- End diff --

We don't need for `MapType` because `MapType` is not orderable?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-30 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r185166899
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,28 +161,205 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  -nullOrder
 } else if (o2 == null) {
-  -1
+  nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def elementType: DataType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+
+  def sortEval(array: Any, ascending: Boolean): Any = {
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
+  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: 
String): String = {
+val arrayData = classOf[ArrayData].getName
+val genericArrayData = classOf[GenericArrayData].getName
+val array = ctx.freshName("array")
+val c = ctx.freshName("c")
+val sort = if (elementType == NullType) "" else {
--- End diff --

How about just copying the original array if `elementType == NullType`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-27 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184632402
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -185,6 +183,8 @@ trait ArraySortUtil extends ExpectsInputTypes with 
CodegenFallback {
 
 object ArraySortUtil {
   type NullOrder = Int
+  // Least: place null element at the end of the array
+  // Greatest: place null element at the beginning of the array
--- End diff --

Sure. I will also fix statements.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-26 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184293071
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -185,6 +183,8 @@ trait ArraySortUtil extends ExpectsInputTypes with 
CodegenFallback {
 
 object ArraySortUtil {
   type NullOrder = Int
+  // Least: place null element at the end of the array
+  // Greatest: place null element at the beginning of the array
--- End diff --

Can you add `for ascending order` or something?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184284276
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,24 +163,85 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * nullOrder
 } else if (o2 == null) {
-  -1
+  -1 * nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+object ArraySortUtil {
+  type NullOrder = Int
+  object NullOrder {
+val Least: NullOrder = -1
+val Greatest: NullOrder = 1
+  }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending 
or descending order
+  according to the natural ordering of the array elements. Null 
elements will be placed
+  at the beginning of the returned array in ascending order or at the 
end of the returned
+  array in descending order.
+  """,
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true);
+   [null,"a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+  override def nullOrder: Int = NullOrder.Greatest
--- End diff --

Good catch, thanks


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184262552
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -300,6 +333,49 @@ case class Reverse(child: Expression) extends 
UnaryExpression with ImplicitCastI
   override def prettyName: String = "reverse"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.
+  """,
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """,
+  since = "2.4.0")
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
+
+  override def dataType: DataType = child.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  override def arrayExpression: Expression = child
+  override def nullOrder: Int = NullOrder.Least
--- End diff --

`nullOrder: NullOrder`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184262594
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -117,47 +118,18 @@ case class MapValues(child: Expression)
 }
 
 /**
- * Sorts the input array in ascending / descending order according to the 
natural ordering of
- * the array elements and returns it.
+ * Common base class for [[SortArray]] and [[ArraySort]].
  */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
-  examples = """
-Examples:
-  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
-   ["a","b","c","d"]
-  """)
-// scalastyle:on line.size.limit
-case class SortArray(base: Expression, ascendingOrder: Expression)
-  extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
-
-  def this(e: Expression) = this(e, Literal(true))
-
-  override def left: Expression = base
-  override def right: Expression = ascendingOrder
-  override def dataType: DataType = base.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback {
+  protected def arrayExpression: Expression
 
-  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
-case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
-  ascendingOrder match {
-case Literal(_: Boolean, BooleanType) =>
-  TypeCheckResult.TypeCheckSuccess
-case _ =>
-  TypeCheckResult.TypeCheckFailure(
-"Sort order in second argument requires a boolean literal.")
-  }
-case ArrayType(dt, _) =>
-  TypeCheckResult.TypeCheckFailure(
-s"$prettyName does not support sorting array of type 
${dt.simpleString}")
-case _ =>
-  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
-  }
+  // If -1, place null element at the end of the array
+  // If 1, place null element at the beginning of the array
+  protected def nullOrder: Int
--- End diff --

`nullOrder: NullOrder`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184262545
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,24 +163,85 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * nullOrder
 } else if (o2 == null) {
-  -1
+  -1 * nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+object ArraySortUtil {
+  type NullOrder = Int
+  object NullOrder {
+val Least: NullOrder = -1
+val Greatest: NullOrder = 1
+  }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending 
or descending order
+  according to the natural ordering of the array elements. Null 
elements will be placed
+  at the beginning of the returned array in ascending order or at the 
end of the returned
+  array in descending order.
+  """,
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true);
+   [null,"a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+  override def nullOrder: Int = NullOrder.Greatest
--- End diff --

`nullOrder: NullOrder`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184261968
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -168,9 +140,9 @@ case class SortArray(base: Expression, ascendingOrder: 
Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  -1
+  -1 * nullOrder
--- End diff --

`nullOrder`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184262013
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,24 +163,85 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * nullOrder
--- End diff --

`-nullOrder`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184261902
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,24 +163,85 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * nullOrder
 } else if (o2 == null) {
-  -1
+  -1 * nullOrder
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+object ArraySortUtil {
+  type NullOrder = Int
+  object NullOrder {
+val Least: NullOrder = -1
+val Greatest: NullOrder = 1
+  }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending 
or descending order
+  according to the natural ordering of the array elements. Null 
elements will be placed
+  at the beginning of the returned array in ascending order or at the 
end of the returned
+  array in descending order.
+  """,
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true);
+   [null,"a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+  override def nullOrder: Int = NullOrder.Greatest
--- End diff --

`Least`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184262044
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -191,24 +163,85 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * nullOrder
 } else if (o2 == null) {
-  -1
+  -1 * nullOrder
--- End diff --

`nullOrder`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184261996
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -168,9 +140,9 @@ case class SortArray(base: Expression, ascendingOrder: 
Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  -1
+  -1 * nullOrder
 } else if (o2 == null) {
-  1
+  1 * nullOrder
--- End diff --

`-nullOrder`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184262129
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -117,47 +118,18 @@ case class MapValues(child: Expression)
 }
 
 /**
- * Sorts the input array in ascending / descending order according to the 
natural ordering of
- * the array elements and returns it.
+ * Common base class for [[SortArray]] and [[ArraySort]].
  */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
-  examples = """
-Examples:
-  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
-   ["a","b","c","d"]
-  """)
-// scalastyle:on line.size.limit
-case class SortArray(base: Expression, ascendingOrder: Expression)
-  extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
-
-  def this(e: Expression) = this(e, Literal(true))
-
-  override def left: Expression = base
-  override def right: Expression = ascendingOrder
-  override def dataType: DataType = base.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback {
+  protected def arrayExpression: Expression
 
-  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
-case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
-  ascendingOrder match {
-case Literal(_: Boolean, BooleanType) =>
-  TypeCheckResult.TypeCheckSuccess
-case _ =>
-  TypeCheckResult.TypeCheckFailure(
-"Sort order in second argument requires a boolean literal.")
-  }
-case ArrayType(dt, _) =>
-  TypeCheckResult.TypeCheckFailure(
-s"$prettyName does not support sorting array of type 
${dt.simpleString}")
-case _ =>
-  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
-  }
+  // If -1, place null element at the end of the array
+  // If 1, place null element at the beginning of the array
--- End diff --

These comments are not correct now.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-25 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r184261947
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -300,6 +333,49 @@ case class Reverse(child: Expression) extends 
UnaryExpression with ImplicitCastI
   override def prettyName: String = "reverse"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.
+  """,
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """,
+  since = "2.4.0")
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
+
+  override def dataType: DataType = child.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  override def arrayExpression: Expression = child
+  override def nullOrder: Int = NullOrder.Least
--- End diff --

`Greatest`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-20 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r182999463
  
--- Diff: python/pyspark/sql/functions.py ---
@@ -2168,6 +2171,23 @@ def sort_array(col, asc=True):
 return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc))
 
 
+@since(2.4)
+def array_sort(col):
+"""
+Collection function: sorts the input array in ascending order. The 
elements of the input array
+must be orderable. Null elements will be placed at the end of the 
returned array.
+
+:param col: name of column or expression
+
+>>> from pyspark.sql.functions import array_sort
+>>> df = spark.createDataFrame([([2, 1, None, 3],),([1],),([],)], 
['data'])
+>>> df.select(array_sort(df.data).alias('r')).collect()
+[Row(r=[1, 2, 3, None]), Row(r=[1]), Row(r=[])]
+ """
--- End diff --

nit: an extra space?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-20 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r182992892
  
--- Diff: python/pyspark/sql/functions.py ---
@@ -2154,10 +2154,13 @@ def array_max(col):
 def sort_array(col, asc=True):
 """
 Collection function: sorts the input array in ascending or descending 
order according
-to the natural ordering of the array elements.
+to the natural ordering of the array elements. Null elements will be 
placed at the beginning
+of the returned array in ascending order or at the end of the returned 
array in descending
+order.
 
 :param col: name of column or expression
 
+>>> from pyspark.sql.functions import sort_array
--- End diff --

Do we need this?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-20 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r18236
  
--- Diff: python/pyspark/sql/functions.py ---
@@ -2154,10 +2154,13 @@ def array_max(col):
 def sort_array(col, asc=True):
 """
 Collection function: sorts the input array in ascending or descending 
order according
-to the natural ordering of the array elements.
+to the natural ordering of the array elements. Null elements will be 
placed at the beginning
+of the returned array in ascending order or at the end of the returned 
array in descending
+order.
 
 :param col: name of column or expression
 
+>>> from pyspark.sql.functions import sort_array
 >>> df = spark.createDataFrame([([2, 1, 3],),([1],),([],)], ['data'])
--- End diff --

We should include `None` to show where the `None` comes?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-20 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r182998415
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -117,47 +117,18 @@ case class MapValues(child: Expression)
 }
 
 /**
- * Sorts the input array in ascending / descending order according to the 
natural ordering of
- * the array elements and returns it.
+ * Common base class for [[SortArray]] and [[ArraySort]].
  */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
-  examples = """
-Examples:
-  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
-   ["a","b","c","d"]
-  """)
-// scalastyle:on line.size.limit
-case class SortArray(base: Expression, ascendingOrder: Expression)
-  extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
-
-  def this(e: Expression) = this(e, Literal(true))
+trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback {
+  protected def arrayExpression: Expression
 
-  override def left: Expression = base
-  override def right: Expression = ascendingOrder
-  override def dataType: DataType = base.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
-
-  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
-case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
-  ascendingOrder match {
-case Literal(_: Boolean, BooleanType) =>
-  TypeCheckResult.TypeCheckSuccess
-case _ =>
-  TypeCheckResult.TypeCheckFailure(
-"Sort order in second argument requires a boolean literal.")
-  }
-case ArrayType(dt, _) =>
-  TypeCheckResult.TypeCheckFailure(
-s"$prettyName does not support sorting array of type 
${dt.simpleString}")
-case _ =>
-  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
-  }
+  // If -1, place null element at the end of the array
+  // If 1, place null element at the beginning of the array
+  protected def placeNullAtEnd: Int
--- End diff --

I guess the name is confusing.

How about `nullOrder`?
`nullOrder = -1` means `null` is the least, place at the beginning for asc 
and at the end for desc, whereas `1` means the greatest, place at the end for 
asc and at the beginning for desc.
And maybe we should define the constants like:

```scala
object ArraySortUtil {
  type NullOrder = Int
  object NullOrder {
val Least: NullOrder = -1
val Greatest: NullOrder = 1
  }
}
```


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-20 Thread ueshin

Github user ueshin commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r182993016
  
--- Diff: python/pyspark/sql/functions.py ---
@@ -2168,6 +2171,23 @@ def sort_array(col, asc=True):
 return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc))
 
 
+@since(2.4)
+def array_sort(col):
+"""
+Collection function: sorts the input array in ascending order. The 
elements of the input array
+must be orderable. Null elements will be placed at the end of the 
returned array.
+
+:param col: name of column or expression
+
+>>> from pyspark.sql.functions import array_sort
--- End diff --

ditto.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-10 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180429349
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +161,118 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+  override def placeNullAtEnd: Int = 1
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  val dtSimple = dt.simpleString
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type $dtSimple 
which is not orderable")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """,
+  since = "2.4.0")
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
--- End diff --

Yeah, as you said they are doing similar things. Therefore, a new trait is 
not introduced to reuse as possible.   
When one is subset of another one (e.g. `size` v.s. `cardinality`), we 
could take an approach that one calls another one. What I am doing in 
`cardinality`.

Good point about the description. I will add the description on how it 
works with `null`.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-10 Thread mgaido91

Github user mgaido91 commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180400407
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +161,118 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+  override def placeNullAtEnd: Int = 1
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  val dtSimple = dt.simpleString
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type $dtSimple 
which is not orderable")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """,
+  since = "2.4.0")
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
--- End diff --

I see, thanks. Then can't we add another flag for it? I just find a bit 
weird that we have two functions which do basically the same thing. Just one 
has one more flag than the other and they behave differently with nulls. As a 
user I would not understand when to use one or the other. What do you think? 
I'd be also interested in @gatorsmile's point of view, since he created the 
JIRA for this.

Moreover, can we at least improve than the description of the `SortArray` 
case class in order to point out clearly how it behaves with nulls? Now there 
is no mention about it.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional c

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-10 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180392603
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +161,118 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+  override def placeNullAtEnd: Int = 1
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  val dtSimple = dt.simpleString
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type $dtSimple 
which is not orderable")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """,
+  since = "2.4.0")
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
--- End diff --

As you can see the result in UT, `null` handing is different. As you 
suggested, to reuse existing code as possible, I refactored by using 
`ArraySortUtil`.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-10 Thread mgaido91

Github user mgaido91 commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180384110
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +161,118 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+  override def placeNullAtEnd: Int = 1
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  val dtSimple = dt.simpleString
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type $dtSimple 
which is not orderable")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """,
+  since = "2.4.0")
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
--- End diff --

why do we need this? Can't we just reuse the previous `SortArray` fixing 
the `right` to be `TrueLiteral`?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-10 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180319251
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +160,114 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """)
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
+
+  override def dataType: DataType = child.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  override def arrayExpression: Expression = child
+  override def placeNullAtEnd: Int = -1
+
+  override def checkInputDataTypes(): TypeCheckResult = child.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  TypeCheckResult.TypeCheckSuccess
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
--- End diff --

Sure


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-10 Thread viirya

Github user viirya commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180318978
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +160,114 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """)
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
+
+  override def dataType: DataType = child.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  override def arrayExpression: Expression = child
+  override def placeNullAtEnd: Int = -1
+
+  override def checkInputDataTypes(): TypeCheckResult = child.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  TypeCheckResult.TypeCheckSuccess
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
--- End diff --

Sounds good to me.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-10 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180317825
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +160,114 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """)
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
+
+  override def dataType: DataType = child.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  override def arrayExpression: Expression = child
+  override def placeNullAtEnd: Int = -1
+
+  override def checkInputDataTypes(): TypeCheckResult = child.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  TypeCheckResult.TypeCheckSuccess
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
--- End diff --

The original message is compatible with `SortArray`. Is it better to update 
the message in `SortArray` with this change?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
F

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-10 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180317210
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +160,114 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
--- End diff --

Thanks for letting know the example.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-10 Thread viirya

Github user viirya commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180316279
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +160,114 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
+  """)
+// scalastyle:on line.size.limit
+case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
+
+  override def dataType: DataType = child.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  override def arrayExpression: Expression = child
+  override def placeNullAtEnd: Int = -1
+
+  override def checkInputDataTypes(): TypeCheckResult = child.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  TypeCheckResult.TypeCheckSuccess
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
--- End diff --

`...not support sorting array of type ${dt.simpleString} which is not 
orderable.`


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: review

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-09 Thread HyukjinKwon

Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180312180
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +160,114 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
--- End diff --

which will be shown in SQL documentation and the extended description 
command in sql syntax


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-09 Thread HyukjinKwon

Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180311923
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +160,114 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
--- End diff --

Every expression needs it .. many of functions don't have it because that 
field was added from 2.3.0. You could do it:


https://github.com/apache/spark/blob/2ca9bb083c515511d2bfee271fc3e0269aceb9d5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala#L511


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-09 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180311343
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +160,114 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
--- End diff --

Do we have to add `since` in this file? I cannot find `since`  in 
`collectionOperations.scala`. Of course, `functions.scala` and `functions.py` 
have `since`.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-09 Thread HyukjinKwon

Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180310842
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -324,6 +324,30 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSQLContext {
 assert(intercept[AnalysisException] {
   df3.selectExpr("sort_array(a)").collect()
 }.getMessage().contains("only supports array input"))
+
+checkAnswer(
+  df.select(array_sort($"a"), array_sort($"b")),
+  Seq(
+Row(Seq(1, 2, 3), Seq("a", "b", "c")),
+Row(Seq[Int](), Seq[String]()),
+Row(null, null))
+)
+checkAnswer(
+  df.selectExpr("array_sort(a)", "array_sort(b)"),
+  Seq(
+Row(Seq(1, 2, 3), Seq("a", "b", "c")),
+Row(Seq[Int](), Seq[String]()),
--- End diff --

Maybe `Seq.empty[Int]`


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-09 Thread HyukjinKwon

Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180310741
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -190,28 +160,114 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
 if (o1 == null && o2 == null) {
   0
 } else if (o1 == null) {
-  1
+  1 * placeNullAtEnd
 } else if (o2 == null) {
-  -1
+  -1 * placeNullAtEnd
 } else {
   -ordering.compare(o1, o2)
 }
   }
 }
   }
 
-  override def nullSafeEval(array: Any, ascending: Any): Any = {
-val elementType = base.dataType.asInstanceOf[ArrayType].elementType
+  def sortEval(array: Any, ascending: Boolean): Any = {
+val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
 if (elementType != NullType) {
-  java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
+  java.util.Arrays.sort(data, if (ascending) lt else gt)
 }
 new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
+}
+
+/**
+ * Sorts the input array in ascending / descending order according to the 
natural ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+   ["a","b","c","d"]
+  """)
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ArraySortUtil {
+
+  def this(e: Expression) = this(e, Literal(true))
+
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
+
+  override def arrayExpression: Expression = base
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
+case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+  ascendingOrder match {
+case Literal(_: Boolean, BooleanType) =>
+  TypeCheckResult.TypeCheckSuccess
+case _ =>
+  TypeCheckResult.TypeCheckFailure(
+"Sort order in second argument requires a boolean literal.")
+  }
+case ArrayType(dt, _) =>
+  TypeCheckResult.TypeCheckFailure(
+s"$prettyName does not support sorting array of type 
${dt.simpleString}")
+case _ =>
+  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
+  }
+
+  override def nullSafeEval(array: Any, ascending: Any): Any = {
+sortEval(array, ascending.asInstanceOf[Boolean])
+  }
 
   override def prettyName: String = "sort_array"
 }
 
+/**
+ * Sorts the input array in ascending order according to the natural 
ordering of
+ * the array elements and returns it.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+_FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
+  be orderable. Null elements will be placed at the end of the 
returned array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+   ["a","b","c","d",null]
--- End diff --

+1


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

2018-04-09 Thread kiszk

Github user kiszk commented on a diff in the pull request:

https://github.com/apache/spark/pull/21021#discussion_r180310339
  
--- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
@@ -116,47 +116,17 @@ case class MapValues(child: Expression)
 }
 
 /**
- * Sorts the input array in ascending / descending order according to the 
natural ordering of
- * the array elements and returns it.
+ * Common base class for [[SortArray]] and [[ArraySort]].
  */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
-  examples = """
-Examples:
-  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
-   ["a","b","c","d"]
-  """)
-// scalastyle:on line.size.limit
-case class SortArray(base: Expression, ascendingOrder: Expression)
-  extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
-
-  def this(e: Expression) = this(e, Literal(true))
+trait ArraySortUtil extends ExpectsInputTypes with CodegenFallback {
+  protected def arrayExpression: Expression
 
-  override def left: Expression = base
-  override def right: Expression = ascendingOrder
-  override def dataType: DataType = base.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
-
-  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
-case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
-  ascendingOrder match {
-case Literal(_: Boolean, BooleanType) =>
-  TypeCheckResult.TypeCheckSuccess
-case _ =>
-  TypeCheckResult.TypeCheckFailure(
-"Sort order in second argument requires a boolean literal.")
-  }
-case ArrayType(dt, _) =>
-  TypeCheckResult.TypeCheckFailure(
-s"$prettyName does not support sorting array of type 
${dt.simpleString}")
-case _ =>
-  TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
-  }
+  // If -1, place null element at the end of the array
+  protected def placeNullAtEnd: Int = 1
--- End diff --

sure


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

57 matches

Mail list logo