(hudi) branch master updated: [HUDI-9309] Fix perf regression from supporting display dag of insert/update statment in spark ui (#13139)

danny0405 Sun, 13 Apr 2025 20:19:50 -0700

This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git



The following commit(s) were added to refs/heads/master by this push:
     new b91183d764b [HUDI-9309] Fix perf regression from supporting display 
dag of insert/update statment in spark ui (#13139)
b91183d764b is described below

commit b91183d764b0d326303daf7b37cf7231d649dc29
Author: wangyinsheng <[email protected]>
AuthorDate: Mon Apr 14 08:52:27 2025 +0800

    [HUDI-9309] Fix perf regression from supporting display dag of 
insert/update statment in spark ui (#13139)
    
    Co-authored-by: wangyinsheng <[email protected]>
---
 .../apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala  | 4 +---
 .../org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala  | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git 
a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
 
b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
index 3376c4ecca6..a7eafb6b331 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
@@ -104,9 +104,7 @@ object InsertIntoHoodieTableCommand extends Logging with 
ProvidesHoodieConfig wi
     }
     val config = buildHoodieInsertConfig(catalogTable, sparkSession, 
isOverWritePartition, isOverWriteTable, partitionSpec, extraOptions, 
staticOverwritePartitionPathOpt)
 
-    val sparkRowSerDe = sparkAdapter.createSparkRowSerDe(query.schema)
-    val rows = query.execute().map(sparkRowSerDe.deserializeRow)
-    val df = sparkSession.createDataFrame(rows, query.schema)
+    val df = sparkSession.internalCreateDataFrame(query.execute(), 
query.schema)
     val (success, _, _, _, _, _) = 
HoodieSparkSqlWriter.write(sparkSession.sqlContext, mode, config, df)
 
     if (!success) {
diff --git 
a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala
 
b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala
index 85045c1ebaf..044895039b0 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala
@@ -57,9 +57,7 @@ case class UpdateHoodieTableCommand(ut: UpdateTable, query: 
LogicalPlan) extends
       buildHoodieConfig(catalogTable)
     }
 
-    val sparkRowSerDe = sparkAdapter.createSparkRowSerDe(plan.schema)
-    val rows = plan.execute().map(sparkRowSerDe.deserializeRow)
-    val df = sparkSession.createDataFrame(rows, plan.schema)
+    val df = sparkSession.internalCreateDataFrame(plan.execute(), plan.schema)
     HoodieSparkSqlWriter.write(sparkSession.sqlContext, SaveMode.Append, 
config, df)
 
     sparkSession.catalog.refreshTable(tableId)

(hudi) branch master updated: [HUDI-9309] Fix perf regression from supporting display dag of insert/update statment in spark ui (#13139)

Reply via email to