This is an automated email from the ASF dual-hosted git repository.

amoghj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 6b04a6d000 Core: Track data files by spec id instead of full 
PartitionSpec (#11323)
6b04a6d000 is described below

commit 6b04a6d000019a6182eb6521d1e7e4124a0cd73b
Author: Eduard Tudenhoefner <[email protected]>
AuthorDate: Fri Oct 25 16:25:23 2024 +0200

    Core: Track data files by spec id instead of full PartitionSpec (#11323)
---
 .../apache/iceberg/MergingSnapshotProducer.java    | 23 +++++++++-------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java 
b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
index ab55f86ebf..50885dbb06 100644
--- a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
+++ b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
@@ -80,7 +80,7 @@ abstract class MergingSnapshotProducer<ThisT> extends 
SnapshotProducer<ThisT> {
   private final ManifestFilterManager<DeleteFile> deleteFilterManager;
 
   // update data
-  private final Map<PartitionSpec, DataFileSet> newDataFilesBySpec = 
Maps.newHashMap();
+  private final Map<Integer, DataFileSet> newDataFilesBySpec = 
Maps.newHashMap();
   private Long newDataFilesDataSequenceNumber;
   private final Map<Integer, DeleteFileSet> newDeleteFilesBySpec = 
Maps.newHashMap();
   private final List<ManifestFile> appendManifests = Lists.newArrayList();
@@ -138,18 +138,13 @@ abstract class MergingSnapshotProducer<ThisT> extends 
SnapshotProducer<ThisT> {
   }
 
   protected PartitionSpec dataSpec() {
-    Set<PartitionSpec> specs = dataSpecs();
+    Set<Integer> specIds = newDataFilesBySpec.keySet();
     Preconditions.checkState(
-        specs.size() == 1,
-        "Cannot return a single partition spec: data files with different 
partition specs have been added");
-    return specs.iterator().next();
-  }
-
-  protected Set<PartitionSpec> dataSpecs() {
-    Set<PartitionSpec> specs = newDataFilesBySpec.keySet();
+        !specIds.isEmpty(), "Cannot determine partition specs: no data files 
have been added");
     Preconditions.checkState(
-        !specs.isEmpty(), "Cannot determine partition specs: no data files 
have been added");
-    return ImmutableSet.copyOf(specs);
+        specIds.size() == 1,
+        "Cannot return a single partition spec: data files with different 
partition specs have been added");
+    return spec(Iterables.getOnlyElement(specIds));
   }
 
   protected Expression rowFilter() {
@@ -237,7 +232,7 @@ abstract class MergingSnapshotProducer<ThisT> extends 
SnapshotProducer<ThisT> {
         file.location());
 
     DataFileSet dataFiles =
-        newDataFilesBySpec.computeIfAbsent(spec, ignored -> 
DataFileSet.create());
+        newDataFilesBySpec.computeIfAbsent(spec.specId(), ignored -> 
DataFileSet.create());
     if (dataFiles.add(file)) {
       addedFilesSummary.addedFile(spec, file);
       hasNewDataFiles = true;
@@ -971,9 +966,9 @@ abstract class MergingSnapshotProducer<ThisT> extends 
SnapshotProducer<ThisT> {
 
     if (cachedNewDataManifests.isEmpty()) {
       newDataFilesBySpec.forEach(
-          (dataSpec, dataFiles) -> {
+          (specId, dataFiles) -> {
             List<ManifestFile> newDataManifests =
-                writeDataManifests(dataFiles, newDataFilesDataSequenceNumber, 
dataSpec);
+                writeDataManifests(dataFiles, newDataFilesDataSequenceNumber, 
spec(specId));
             cachedNewDataManifests.addAll(newDataManifests);
           });
       this.hasNewDataFiles = false;

Reply via email to