mikhailnik-db commented on code in PR #48748:
URL: https://github.com/apache/spark/pull/48748#discussion_r1850214768


##########
sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala:
##########
@@ -161,6 +161,267 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
     }
   }
 
+  private[this] def hexToBytes(s: String): Array[Byte] = {
+    val byteArray = BigInt(s, 16).toByteArray
+    if (byteArray.length > 1 && byteArray(0) == 0) {
+      // remove sign byte for positive numbers if exists
+      byteArray.tail
+    } else {
+      byteArray
+    }
+  }
+
+  test("listagg function") {
+    withTempView("df", "df2") {
+      Seq(("a", "b"), ("a", "c"), ("b", "c"), ("b", "d"), (null, 
null)).toDF("a", "b")
+        .createOrReplaceTempView("df")
+      checkAnswer(
+        sql("select listagg(b) from df group by a"),
+        Row(null) :: Row("bc") :: Row("cd") :: Nil)
+
+      checkAnswer(
+        sql("select string_agg(b) from df group by a"),
+        Row(null) :: Row("bc") :: Row("cd") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(b, null) from df group by a"),
+        Row(null) :: Row("bc") :: Row("cd") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(b) from df where 1 != 1"),
+        Row(null) :: Nil)
+
+      checkAnswer(
+        sql("select listagg(b, '|') from df group by a"),
+        Row("b|c") :: Row("c|d") :: Row(null) :: Nil)
+
+      checkAnswer(
+        spark.sql("select listagg(b, :param || ' ') from df group by a", 
Map("param" -> ",")),
+        Row("b, c") :: Row("c, d") :: Row(null) :: Nil)
+
+      checkAnswer(
+        sql("select listagg(a) from df"),
+        Row("aabb") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(distinct a) from df"),
+        Row("ab") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(a) within group (order by a) from df"),
+        Row("aabb") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(a) within group (order by a desc) from df"),
+        Row("bbaa") :: Nil)
+
+      checkAnswer(
+        sql("""select listagg(a) within group (order by a desc) over 
(partition by b) from df"""),
+        Row("a") :: Row("ba") :: Row("ba") :: Row("b") :: Row(null) :: Nil)
+
+      checkAnswer(
+        sql("select listagg(a) within group (order by b) from df"),
+        Row("aabb") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(a) within group (order by b desc) from df"),
+        Row("baba") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(a, '|') within group (order by b desc) from df"),
+        Row("b|a|b|a") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(a) within group (order by b desc, a asc) from df"),
+        Row("baba") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(a) within group (order by b desc, a desc) from 
df"),
+        Row("bbaa") :: Nil)
+
+      checkAnswer(
+        sql("select listagg(c1)from values (X'DEAD'), (X'BEEF') as t(c1)"),
+        Row(hexToBytes("DEADBEEF")) :: Nil)
+
+      checkAnswer(
+        sql("select listagg(c1, null)from values (X'DEAD'), (X'BEEF') as 
t(c1)"),
+        Row(hexToBytes("DEADBEEF")) :: Nil)
+
+      checkAnswer(
+        sql("select listagg(c1, X'42')from values (X'DEAD'), (X'BEEF') as 
t(c1)"),
+        Row(hexToBytes("DEAD42BEEF")) :: Nil)
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select listagg(c1) from values (array('a', 'b')) as t(c1)")
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        parameters = Map(
+          "sqlExpr" -> "\"listagg(c1, NULL)\"",
+          "paramIndex" -> "first",
+          "requiredType" -> "(\"STRING\" or \"BINARY\")",
+          "inputSql" -> "\"c1\"",
+          "inputType" -> "\"ARRAY<STRING>\""),
+        context = ExpectedContext(
+          fragment = "listagg(c1)",
+          start = 7,
+          stop = 17))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select listagg(c1, ', ')from values (X'DEAD'), (X'BEEF') as 
t(c1)")
+        },
+        condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+        parameters = Map(
+          "sqlExpr" -> "\"listagg(c1, , )\"",
+          "functionName" -> "`listagg`",
+          "dataType" -> "(\"BINARY\" or \"STRING\")"),
+        context = ExpectedContext(
+          fragment = "listagg(c1, ', ')",
+          start = 7,
+          stop = 23))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select listagg(b, a) from df group by a")
+        },
+        condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+        parameters = Map(
+          "sqlExpr" -> "\"listagg(b, a)\"",
+          "inputName" -> "`delimiter`",
+          "inputType" -> "\"STRING\"",
+          "inputExpr" -> "\"a\""),
+        context = ExpectedContext(
+          fragment = "listagg(b, a)",
+          start = 7,
+          stop = 19))
+
+      checkAnswer(
+        sql("select listagg(a) over (order by a) from df"),
+        Row(null) :: Row("aa") :: Row("aa") :: Row("aabb") :: Row("aabb") :: 
Nil)
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select listagg(a) within group (order by a) over (order by a) 
from df")
+        },
+        condition = "INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC",
+        parameters = Map("aggFunc" -> "\"listagg(a, NULL, a)\""),
+        context = ExpectedContext(
+          fragment = "listagg(a) within group (order by a) over (order by a)",
+          start = 7,
+          stop = 60))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select string_agg(a) within group (order by a) over (order by 
a) from df")
+        },
+        condition = "INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC",
+        parameters = Map("aggFunc" -> "\"listagg(a, NULL, a)\""),
+        context = ExpectedContext(
+          fragment = "string_agg(a) within group (order by a) over (order by 
a)",
+          start = 7,
+          stop = 63))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select listagg(distinct a) over (order by a) from df")
+        },
+        condition = "DISTINCT_WINDOW_FUNCTION_UNSUPPORTED",
+        parameters = Map("windowExpr" ->
+          ("\"listagg(DISTINCT a, NULL) " +
+          "OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING 
AND CURRENT ROW)\"")),
+        context = ExpectedContext(
+          fragment = "listagg(distinct a) over (order by a)",
+          start = 7,
+          stop = 43))
+
+      checkAnswer(
+        sql("select listagg(distinct a) within group (order by a DESC) from 
df"),
+        Row("ba") :: Nil)
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select listagg(distinct a) within group (order by b) from df")
+        },
+        condition = "FUNCTION_AND_ORDER_EXPRESSION_MISMATCH",
+        parameters = Map(
+          "functionName" -> "`listagg`",
+          "functionArgs" -> "\"a\"",
+          "orderExpr" -> "\"b\""),
+        context = ExpectedContext(
+          fragment = "listagg(distinct a) within group (order by b)",
+          start = 7,
+          stop = 51))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select listagg(distinct a) within group (order by a, b) from 
df")
+        },
+        condition = "FUNCTION_AND_ORDER_EXPRESSION_MISMATCH",
+        parameters = Map(
+          "functionName" -> "`listagg`",
+          "functionArgs" -> "\"a\"",
+          "orderExpr" -> "\"a\", \"b\""),
+        context = ExpectedContext(
+          fragment = "listagg(distinct a) within group (order by a, b)",
+          start = 7,
+          stop = 54))
+
+      Seq((1, true), (2, false), (3, false)).toDF("a", 
"b").createOrReplaceTempView("df2")
+
+      checkAnswer(
+        sql("select listagg(a), listagg(b, ',') from df2"),
+        Row("123", "true,false,false") :: Nil)
+    }
+  }
+
+  test("listagg collation test") {
+    checkAnswer(
+      sql("select listagg(c1) within group (order by c1 collate utf8_binary)" +
+        " from values ('a'), ('A'), ('b'), ('B') as t(c1)"),
+      Row("ABab") :: Nil)
+
+    checkAnswer(
+      sql("select listagg(c1) within group (order by c1 collate utf8_lcase)" +
+        " from values ('a'), ('A'), ('b'), ('B') as t(c1)"),
+      Row("aAbB") :: Nil)
+
+    checkAnswer(
+      sql("select listagg(DISTINCT c1 collate utf8_binary)" +
+        " from values ('a'), ('A'), ('b'), ('B') as t(c1)"),
+      Row("aAbB") :: Nil)
+
+    checkAnswer(
+      sql("select listagg(DISTINCT c1 collate utf8_lcase)" +
+        " from values ('a'), ('A'), ('b'), ('B') as t(c1)"),
+      Row("ab") :: Nil)
+
+    checkAnswer(
+      sql("select listagg(DISTINCT c1 collate utf8_lcase)" +
+        " within group (order by c1 collate utf8_lcase)" +
+        " from values ('a'), ('B'), ('b'), ('A') as t(c1)"),
+      Row("aB") :: Nil)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          """select listagg(DISTINCT c1 collate utf8_lcase)
+            | within group (order by c1 collate utf8_binary)
+            | from values ('a'), ('b'), ('A'), ('B') as t(c1)""".stripMargin)

Review Comment:
   Not, but I like to have tests, that could be easily found, run, and debugged 
in ide so I would leave them



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to