This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new 5b41f584d55c [SPARK-52799][TESTS] Fix ThriftServerQueryTestSuite result comparison 5b41f584d55c is described below commit 5b41f584d55cd722f6a6e3768a90f1beea106c8f Author: Kent Yao <y...@apache.org> AuthorDate: Tue Jul 15 18:09:07 2025 +0800 [SPARK-52799][TESTS] Fix ThriftServerQueryTestSuite result comparison ### What changes were proposed in this pull request? This PR fixes ThriftServerQueryTestSuite result comparison. When re-reading the Golden Files, if the result lines exceed the row size, we assume they contain multiple lines for a single row. In this case, we group these lines into rows first to avoid line-by-line sorting. ### Why are the changes needed? For a multiline result of a single row, it might get malformed, for example ``` [info] Expected "[ <birth>2018</birth> [info] <name>[45 61 73 6F 6E]</name> [info] <org>[4B 69 6E 64 65 72 67 61 72 74 65 6E 20 43 6F 70]</org> [info] </ROW> [info] <]ROW>", but got "[<ROW> [info] <name>[45 61 73 6F 6E]</name> [info] <birth>2018</birth> [info] <org>[4B 69 6E 64 65 72 67 61 72 74 65 6E 20 43 6F 70]</org> [info] </]ROW>" ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Tested with https://github.com/apache/spark/pull/51470 locally ### Was this patch authored or co-authored using generative AI tooling? No Closes #51488 from yaooqinn/ThriftServerQueryTestSuite. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Kent Yao <y...@apache.org> (cherry picked from commit 9565d164f389e2e3db7ab43a873f6b473f5c9365) Signed-off-by: Kent Yao <y...@apache.org> --- .../thriftserver/ThriftServerQueryTestSuite.scala | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index 3fdb8fb7ab8a..874a70aed030 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -136,11 +136,13 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = true") } + val rowCounts = new Array[Int](queries.size) // Run the SQL queries preparing them for comparison. val outputs: Seq[QueryTestOutput] = withSQLConf(configSet: _*) { - queries.map { sql => + queries.zipWithIndex.map { case (sql, i) => val (_, output) = handleExceptions(getNormalizedResult(statement, sql)) // We might need to do some query canonicalization in the future. + rowCounts(i) = output.length ExecutionOutput( sql = sql, schema = Some(""), @@ -161,11 +163,19 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ val sql = segments(i * 3 + 1).trim val schema = segments(i * 3 + 2).trim val originalOut = segments(i * 3 + 3) - val output = if (schema != emptySchema && isNeedSort(sql)) { - originalOut.split("\n").sorted.mkString("\n") - } else { - originalOut - } + val output = + if (schema != emptySchema && isNeedSort(sql)) { + val splits = originalOut.split("\n") + if (splits.length > rowCounts(i)) { + // the result is multiline + val step = splits.length / rowCounts(i) + splits.grouped(step).map(_.mkString("\n")).toSeq.sorted.mkString("\n") + } else { + splits.sorted.mkString("\n") + } + } else { + originalOut + } ExecutionOutput( sql = sql, schema = Some(""), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org