This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 580e7021ab Spark 3.5: Fix clobbering of files across streaming epochs
when query ID is reused (#9255)
580e7021ab is described below
commit 580e7021ab4701764c3fcfd6c3e5e8f73f405992
Author: Amogh Jahagirdar <[email protected]>
AuthorDate: Tue Jan 2 08:41:22 2024 -0800
Spark 3.5: Fix clobbering of files across streaming epochs when query ID is
reused (#9255)
---
.../src/main/java/org/apache/iceberg/spark/source/SparkWrite.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java
index 58d77695f2..32f560a7bd 100644
---
a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java
+++
b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java
@@ -673,11 +673,11 @@ abstract class SparkWrite implements Write,
RequiresDistributionAndOrdering {
Table table = tableBroadcast.value();
PartitionSpec spec = table.specs().get(outputSpecId);
FileIO io = table.io();
-
+ String operationId = queryId + "-" + epochId;
OutputFileFactory fileFactory =
OutputFileFactory.builderFor(table, partitionId, taskId)
.format(format)
- .operationId(queryId)
+ .operationId(operationId)
.build();
SparkFileWriterFactory writerFactory =
SparkFileWriterFactory.builderFor(table)