This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new e942df4 [KYUUBI #2071] Using while-loop instead of map/range to
improve performance in RowSet
e942df4 is described below
commit e942df4006d364b4769f5b2c95d033273ea9548a
Author: Min Zhao <[email protected]>
AuthorDate: Sat Mar 12 16:26:43 2022 +0800
[KYUUBI #2071] Using while-loop instead of map/range to improve performance
in RowSet
### _Why are the changes needed?_
Using while-loop instead of map/range to improve performance in RowSet
### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [ ] [Run
test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests)
locally before make a pull request
Closes #2107 from zhaomin1423/improve_loop.
Closes #2071
b6d70b3a [Min Zhao] Merge branch 'improve_loop' of
github.com:zhaomin1423/kyuubi into improve_loop
43bca772 [Min Zhao] [Kyuubi #2071] Using while-loop instead of map/range to
improve performance in RowSet
ec2cb912 [Min Zhao] [Kyuubi #2071] Using while-loop instead of map/range to
improve performance in RowSet
Authored-by: Min Zhao <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
.../apache/kyuubi/engine/spark/schema/RowSet.scala | 32 ++++++++++++++++------
1 file changed, 23 insertions(+), 9 deletions(-)
diff --git
a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala
b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala
index 8d2fe8d..a6eff9e 100644
---
a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala
+++
b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala
@@ -46,21 +46,35 @@ object RowSet {
}
def toRowBasedSet(rows: Seq[Row], schema: StructType, timeZone: ZoneId):
TRowSet = {
- val tRows = rows.map { row =>
+ val tRows = new java.util.ArrayList[TRow]()
+ var i = 0
+ val rowSize = rows.length
+ while (i < rowSize) {
+ val row = rows(i)
val tRow = new TRow()
- (0 until row.length).map(i => toTColumnValue(i, row, schema, timeZone))
- .foreach(tRow.addToColVals)
- tRow
- }.asJava
+ var j = 0
+ val columnSize = row.length
+ while (j < columnSize) {
+ val columnValue = toTColumnValue(j, row, schema, timeZone)
+ tRow.addToColVals(columnValue)
+ j += 1
+ }
+ i += 1
+ tRows.add(tRow)
+ }
new TRowSet(0, tRows)
}
def toColumnBasedSet(rows: Seq[Row], schema: StructType, timeZone: ZoneId):
TRowSet = {
- val size = rows.length
- val tRowSet = new TRowSet(0, new java.util.ArrayList[TRow](size))
- schema.zipWithIndex.foreach { case (filed, i) =>
- val tColumn = toTColumn(rows, i, filed.dataType, timeZone)
+ val rowSize = rows.length
+ val tRowSet = new TRowSet(0, new java.util.ArrayList[TRow](rowSize))
+ var i = 0
+ val columnSize = schema.length
+ while (i < columnSize) {
+ val field = schema(i)
+ val tColumn = toTColumn(rows, i, field.dataType, timeZone)
tRowSet.addToColumns(tColumn)
+ i += 1
}
tRowSet
}