mikhailnik-db commented on code in PR #48748:
URL: https://github.com/apache/spark/pull/48748#discussion_r1830767800
##########
sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala:
##########
@@ -161,6 +161,267 @@ class SQLQuerySuite extends QueryTest with
SharedSparkSession with AdaptiveSpark
}
}
+ private[this] def hexToBytes(s: String): Array[Byte] = {
+ val byteArray = BigInt(s, 16).toByteArray
+ if (byteArray.length > 1 && byteArray(0) == 0) {
+ // remove sign byte for positive numbers if exists
+ byteArray.tail
+ } else {
+ byteArray
+ }
+ }
+
+ test("listagg function") {
+ withTempView("df", "df2") {
+ Seq(("a", "b"), ("a", "c"), ("b", "c"), ("b", "d"), (null,
null)).toDF("a", "b")
+ .createOrReplaceTempView("df")
+ checkAnswer(
+ sql("select listagg(b) from df group by a"),
+ Row(null) :: Row("bc") :: Row("cd") :: Nil)
+
+ checkAnswer(
+ sql("select string_agg(b) from df group by a"),
+ Row(null) :: Row("bc") :: Row("cd") :: Nil)
+
+ checkAnswer(
+ sql("select listagg(b, null) from df group by a"),
+ Row(null) :: Row("bc") :: Row("cd") :: Nil)
Review Comment:
Another thing that concerns me is that I wrote listagg is non-deterministic
and then added tests expecting particular order 😅 . Can I assume that in unit
tests Spark never does shuffle? Or have I just added tens of flaky tests?)
The `collect_list` tests are written the same way, so it's probably not an
issue. But it would be great if someone more experienced with Spark could
confirm or deny that.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]