jinchengchenghh commented on code in PR #11868:
URL: https://github.com/apache/gluten/pull/11868#discussion_r3053591668
##########
backends-velox/src-iceberg/main/scala/org/apache/gluten/execution/AbstractIcebergWriteExec.scala:
##########
@@ -24,14 +24,23 @@ import org.apache.spark.sql.types.StructType
import org.apache.iceberg.spark.source.IcebergWriteUtil
import org.apache.iceberg.types.TypeUtil
+import scala.collection.JavaConverters._
+
abstract class AbstractIcebergWriteExec extends IcebergWriteExec {
// the writer factory works for both batch and streaming
private def createIcebergDataWriteFactory(schema: StructType):
IcebergDataWriteFactory = {
val writeSchema = IcebergWriteUtil.getWriteSchema(write)
val nestedField = TypeUtil.visit(writeSchema, new
IcebergNestedFieldVisitor)
+ // Filter out metadata columns from the Spark output schema and reorder to
match Iceberg schema
+ // Spark 4.0 may include metadata columns in the output schema during
UPDATE operations,
+ // but these should not be written to the Iceberg table
+ val schemaFieldMap = schema.fields.map(f => f.name -> f).toMap
Review Comment:
You could use Intellij to debug here, see the writeSchema and schema:
StructType difference, also use slice to take some of the columns
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]