kiszk commented on a change in pull request #25728: [SPARK-29020][WIP][SQL] 
Improving array_sort behaviour
URL: https://github.com/apache/spark/pull/25728#discussion_r324499400
 
 

 ##########
 File path: 
sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
 ##########
 @@ -311,8 +311,73 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
       Seq(Row(2))
     )
   }
+  test("array_sort with lambda functions") {
 
-  test("sort_array/array_sort functions") {
+    spark.udf.register("fAsc", (x: Int, y: Int) => {
+      if(x < y) -1
+      else if(x == y) 0
+      else 1
+    })
+
+    spark.udf.register("fDesc", (x: Int, y: Int) => {
+      if(x < y) 1
+      else if(x == y) 0
+      else -1
+    })
+
+    spark.udf.register("fString", (x: String, y: String) => {
+      if(x < y) -1
+      else if(x == y) 0
+      else 1
+    })
+
+    spark.udf.register("fStringLength", (x: String, y: String) => {
+      if(x.length < y.length) 1
+      else if(x.length == y.length) 0
+      else -1
+    })
+
+    spark.udf.register("fArraylength", (x: Int, y: Int) => {
+      if(x < y) 1
+      else if(x == y) 0
+      else -1
+    })
+
+
+    val df1 = Seq(Array[Int](3, 2, 5, 1, 2)).toDF("a")
+
+    checkAnswer(
+      df1.selectExpr("array_new_sort(a, (b, i) -> fAsc(b,i))"),
+      Seq(
+        Row(Seq(5, 3, 2, 2, 1))))
+
+    checkAnswer(
+      df1.selectExpr("array_new_sort(a, (b, i) -> fDesc(b,i))"),
+      Seq(
+        Row(Seq(1, 2, 2, 3, 5))))
+
+    val df2 = Seq(Array[String]("bc", "ab", "dc")).toDF("a")
+    checkAnswer(
+      df2.selectExpr("array_new_sort(a, (b, i) -> fString(b,i))"),
+      Seq(
+        Row(Seq("dc", "bc", "ab"))))
+
+    val df3 = Seq(Array[String]("a", "abcd", "abc")).toDF("a")
+    checkAnswer(
+      df3.selectExpr("array_new_sort(a, (b, i) -> fStringLength(b,i))"),
+      Seq(
+        Row(Seq("a", "abc", "abcd"))))
+
+
+    val df4 = Seq((Array[Array[Int]](Array(2, 3, 1), Array(4, 2, 1, 4), 
Array(1, 2)), "x")).toDF("a", "b")
+    checkAnswer(
+      df4.selectExpr("array_new_sort(a, (b, i) -> 
fArraylength(cardinality(b),cardinality(i)))"),
+      Seq(
+        Row(Seq[Seq[Int]](Seq(1, 2), Seq(2,3,1), Seq(4, 2, 1, 4)))))
+
+  }
+
+    test("sort_array/array_sort functions") {
 
 Review comment:
   nit: fix indent position (no indent is required here).

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to