This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 92a333ada7c5 [SPARK-47085][SQL][3.5] reduce the complexity of toTRowSet from n^2 to n 92a333ada7c5 is described below commit 92a333ada7c56b6f3dacffc18010880e37e66ee2 Author: Izek Greenfield <izek.greenfi...@adenza.com> AuthorDate: Tue Feb 20 12:39:24 2024 -0800 [SPARK-47085][SQL][3.5] reduce the complexity of toTRowSet from n^2 to n ### What changes were proposed in this pull request? reduce the complexity of RowSetUtils.toTRowSet from n^2 to n ### Why are the changes needed? This causes performance issues. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Tests + test manually on AWS EMR ### Was this patch authored or co-authored using generative AI tooling? No Closes #45165 from igreenfield/branch-3.5. Authored-by: Izek Greenfield <izek.greenfi...@adenza.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../apache/spark/sql/hive/thriftserver/RowSetUtils.scala | 14 ++++---------- .../hive/thriftserver/SparkExecuteStatementOperation.scala | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala index 9625021f392c..047f0612898d 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala @@ -52,11 +52,7 @@ object RowSetUtils { rows: Seq[Row], schema: Array[DataType], timeFormatters: TimeFormatters): TRowSet = { - var i = 0 - val rowSize = rows.length - val tRows = new java.util.ArrayList[TRow](rowSize) - while (i < rowSize) { - val row = rows(i) + val tRows = rows.map { row => val tRow = new TRow() var j = 0 val columnSize = row.length @@ -65,9 +61,8 @@ object RowSetUtils { tRow.addToColVals(columnValue) j += 1 } - i += 1 - tRows.add(tRow) - } + tRow + }.asJava new TRowSet(startRowOffSet, tRows) } @@ -159,8 +154,7 @@ object RowSetUtils { val size = rows.length val ret = new java.util.ArrayList[T](size) var idx = 0 - while (idx < size) { - val row = rows(idx) + rows.foreach { row => if (row.isNullAt(ordinal)) { nulls.set(idx, true) ret.add(idx, defaultVal) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index a9b46739fa66..e6b4c70bb395 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -114,7 +114,7 @@ private[hive] class SparkExecuteStatementOperation( val offset = iter.getPosition val rows = iter.take(maxRows).toList log.debug(s"Returning result set with ${rows.length} rows from offsets " + - s"[${iter.getFetchStart}, ${offset}) with $statementId") + s"[${iter.getFetchStart}, ${iter.getPosition}) with $statementId") RowSetUtils.toTRowSet(offset, rows, dataTypes, getProtocolVersion, getTimeFormatters) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org