mikhailnik-db commented on code in PR #48748:
URL: https://github.com/apache/spark/pull/48748#discussion_r1837775883
##########
sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala:
##########
@@ -161,6 +161,267 @@ class SQLQuerySuite extends QueryTest with
SharedSparkSession with AdaptiveSpark
}
}
+ private[this] def hexToBytes(s: String): Array[Byte] = {
+ val byteArray = BigInt(s, 16).toByteArray
+ if (byteArray.length > 1 && byteArray(0) == 0) {
+ // remove sign byte for positive numbers if exists
+ byteArray.tail
+ } else {
+ byteArray
+ }
+ }
+
+ test("listagg function") {
+ withTempView("df", "df2") {
+ Seq(("a", "b"), ("a", "c"), ("b", "c"), ("b", "d"), (null,
null)).toDF("a", "b")
+ .createOrReplaceTempView("df")
+ checkAnswer(
+ sql("select listagg(b) from df group by a"),
+ Row(null) :: Row("bc") :: Row("cd") :: Nil)
+
+ checkAnswer(
+ sql("select string_agg(b) from df group by a"),
+ Row(null) :: Row("bc") :: Row("cd") :: Nil)
+
+ checkAnswer(
+ sql("select listagg(b, null) from df group by a"),
+ Row(null) :: Row("bc") :: Row("cd") :: Nil)
Review Comment:
But is it an issue? As I mentioned there are `collect_*` functions with the
same non-deterministic behavior but tested the same way.
I don't see any existing test utils for comparing results with "one of"
semantics, and adding such is not a trivial task. If the current implementation
doesn't lead to flakiness I would separate this into another ticket.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]