This is an automated email from the ASF dual-hosted git repository.

ashvin pushed a commit to branch 348-deletion-vectors-iceberg-target
in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git

commit 12186a9cde1a4e9d56549f4e0c3c7a1caca46279
Author: Ashvin Agrawal <[email protected]>
AuthorDate: Mon Feb 3 22:09:22 2025 -0800

    Delete inherits data
---
 .../java/org/apache/xtable/model/TableChange.java  | 15 +------------
 .../xtable/model/storage/InternalDataFile.java     | 13 ++++++++---
 .../model/storage/InternalDeletionVector.java      | 26 +++++++++++-----------
 .../apache/xtable/delta/DeltaActionsConverter.java |  9 ++++----
 .../apache/xtable/delta/DeltaConversionSource.java | 13 ++++++-----
 .../org/apache/xtable/ValidationTestHelper.java    |  6 ++++-
 .../xtable/delta/ITDeltaDeleteVectorConvert.java   | 12 +++++-----
 .../xtable/delta/TestDeltaActionsConverter.java    | 18 +++++++--------
 8 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/xtable-api/src/main/java/org/apache/xtable/model/TableChange.java 
b/xtable-api/src/main/java/org/apache/xtable/model/TableChange.java
index e750916f..85f546e9 100644
--- a/xtable-api/src/main/java/org/apache/xtable/model/TableChange.java
+++ b/xtable-api/src/main/java/org/apache/xtable/model/TableChange.java
@@ -18,15 +18,10 @@
  
 package org.apache.xtable.model;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
 import lombok.Builder;
 import lombok.Value;
 
 import org.apache.xtable.model.storage.DataFilesDiff;
-import org.apache.xtable.model.storage.InternalDeletionVector;
 
 /**
  * Captures the changes in a single commit/instant from the source table.
@@ -34,7 +29,7 @@ import org.apache.xtable.model.storage.InternalDeletionVector;
  * @since 0.1
  */
 @Value
-@Builder(toBuilder = true, builderClassName = "Builder")
+@Builder(toBuilder = true)
 public class TableChange {
   // Change in files at the specified instant
   DataFilesDiff filesDiff;
@@ -42,15 +37,7 @@ public class TableChange {
   // A commit can add deletion vectors when some records are deleted. New 
deletion vectors can be
   // added even if no new data files are added. However, as deletion vectors 
are always associated
   // with a data file, they are implicitly removed when a corresponding data 
file is removed.
-  List<InternalDeletionVector> deletionVectorsAdded;
 
   /** The {@link InternalTable} at the commit time to which this table change 
belongs. */
   InternalTable tableAsOfChange;
-
-  public static class Builder {
-    public Builder deletionVectorsAdded(Collection<InternalDeletionVector> 
deletionVectorsAdded) {
-      this.deletionVectorsAdded = new ArrayList<>(deletionVectorsAdded);
-      return this;
-    }
-  }
 }
diff --git 
a/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDataFile.java
 
b/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDataFile.java
index 3aee766e..3e742fb7 100644
--- 
a/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDataFile.java
+++ 
b/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDataFile.java
@@ -22,8 +22,12 @@ import java.util.Collections;
 import java.util.List;
 
 import lombok.Builder;
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
 import lombok.NonNull;
-import lombok.Value;
+import lombok.ToString;
+import lombok.experimental.FieldDefaults;
+import lombok.experimental.SuperBuilder;
 
 import org.apache.xtable.model.stat.ColumnStat;
 import org.apache.xtable.model.stat.PartitionValue;
@@ -33,8 +37,11 @@ import org.apache.xtable.model.stat.PartitionValue;
  *
  * @since 0.1
  */
-@Builder(toBuilder = true)
-@Value
+@SuperBuilder(toBuilder = true)
+@FieldDefaults(makeFinal = true, level = lombok.AccessLevel.PRIVATE)
+@Getter
+@ToString(callSuper = true)
+@EqualsAndHashCode
 public class InternalDataFile {
   // physical path of the file (absolute with scheme)
   @NonNull String physicalPath;
diff --git 
a/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDeletionVector.java
 
b/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDeletionVector.java
index bb1a13fd..868749dc 100644
--- 
a/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDeletionVector.java
+++ 
b/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDeletionVector.java
@@ -22,27 +22,27 @@ import java.util.Iterator;
 import java.util.function.Supplier;
 
 import lombok.AccessLevel;
-import lombok.Builder;
+import lombok.EqualsAndHashCode;
 import lombok.Getter;
 import lombok.NonNull;
-import lombok.Value;
+import lombok.ToString;
 import lombok.experimental.Accessors;
+import lombok.experimental.FieldDefaults;
+import lombok.experimental.SuperBuilder;
 
-@Builder(toBuilder = true, builderClassName = "Builder")
 @Accessors(fluent = true)
-@Value
-public class InternalDeletionVector {
+@SuperBuilder(toBuilder = true)
+@FieldDefaults(makeFinal = true, level = lombok.AccessLevel.PRIVATE)
+@Getter
+@ToString(callSuper = true)
+@EqualsAndHashCode(callSuper = true)
+public class InternalDeletionVector extends InternalDataFile {
   // path (absolute with scheme) of data file to which this deletion vector 
belongs
   @NonNull String dataFilePath;
 
-  // size of the deletion vector
-  int size;
-
-  // count of records deleted by this deletion vector
-  long countRecordsDeleted;
-
-  // physical path of the deletion vector file (absolute with scheme)
-  String sourceDeletionVectorFilePath;
+  // super.getFileSizeBytes() is the size of the deletion vector file
+  // super.getPhysicalPath() is the absolute path (with scheme) of the 
deletion vector file
+  // super.getRecordCount() is the count of records in the deletion vector file
 
   // offset of deletion vector start in a deletion vector file
   int offset;
diff --git 
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java 
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java
index 082a6d10..d75949f1 100644
--- 
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java
+++ 
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java
@@ -135,21 +135,22 @@ public class DeltaActionsConverter {
     String dataFilePath = addFile.path();
     dataFilePath = getFullPathToFile(snapshot, dataFilePath);
 
-    InternalDeletionVector.Builder deleteVectorBuilder =
+    InternalDeletionVector.InternalDeletionVectorBuilder<?, ?> 
deleteVectorBuilder =
         InternalDeletionVector.builder()
-            .countRecordsDeleted(deletionVector.cardinality())
-            .size(deletionVector.sizeInBytes())
+            .recordCount(deletionVector.cardinality())
+            .fileSizeBytes(deletionVector.sizeInBytes())
             .dataFilePath(dataFilePath);
 
     if (deletionVector.isInline()) {
       deleteVectorBuilder
           .binaryRepresentation(deletionVector.inlineData())
+          .physicalPath("")
           .ordinalsSupplier(() -> 
ordinalsIterator(deletionVector.inlineData()));
     } else {
       Path deletionVectorFilePath = 
deletionVector.absolutePath(snapshot.deltaLog().dataPath());
       deleteVectorBuilder
           .offset(getOffset(deletionVector))
-          .sourceDeletionVectorFilePath(deletionVectorFilePath.toString())
+          .physicalPath(deletionVectorFilePath.toString())
           .ordinalsSupplier(() -> ordinalsIterator(snapshot, deletionVector));
     }
 
diff --git 
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java 
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
index 9d44401f..e9fc5afd 100644
--- 
a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
+++ 
b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
@@ -25,6 +25,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import lombok.Builder;
 import lombok.extern.log4j.Log4j2;
@@ -161,16 +163,15 @@ public class DeltaConversionSource implements 
ConversionSource<Long> {
       }
     }
 
+    List<InternalDataFile> allAddedFiles =
+        Stream.concat(addedFiles.values().stream(), 
deletionVectors.values().stream())
+            .collect(Collectors.toList());
     DataFilesDiff dataFilesDiff =
         DataFilesDiff.builder()
-            .filesAdded(addedFiles.values())
+            .filesAdded(allAddedFiles)
             .filesRemoved(removedFiles.values())
             .build();
-    return TableChange.builder()
-        .tableAsOfChange(tableAtVersion)
-        .deletionVectorsAdded(deletionVectors.values())
-        .filesDiff(dataFilesDiff)
-        .build();
+    return 
TableChange.builder().tableAsOfChange(tableAtVersion).filesDiff(dataFilesDiff).build();
   }
 
   @Override
diff --git 
a/xtable-core/src/test/java/org/apache/xtable/ValidationTestHelper.java 
b/xtable-core/src/test/java/org/apache/xtable/ValidationTestHelper.java
index a20ae4e7..f56b4fa3 100644
--- a/xtable-core/src/test/java/org/apache/xtable/ValidationTestHelper.java
+++ b/xtable-core/src/test/java/org/apache/xtable/ValidationTestHelper.java
@@ -30,6 +30,7 @@ import java.util.stream.IntStream;
 import org.apache.xtable.model.InternalSnapshot;
 import org.apache.xtable.model.TableChange;
 import org.apache.xtable.model.storage.InternalDataFile;
+import org.apache.xtable.model.storage.InternalDeletionVector;
 
 public class ValidationTestHelper {
 
@@ -96,7 +97,10 @@ public class ValidationTestHelper {
   }
 
   private static Set<String> extractPathsFromDataFile(Set<InternalDataFile> 
dataFiles) {
-    return 
dataFiles.stream().map(InternalDataFile::getPhysicalPath).collect(Collectors.toSet());
+    return dataFiles.stream()
+        .filter(file -> !(file instanceof InternalDeletionVector))
+        .map(InternalDataFile::getPhysicalPath)
+        .collect(Collectors.toSet());
   }
 
   private static void replaceFileScheme(List<String> filePaths) {
diff --git 
a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java
 
b/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java
index 87cc8d99..dcfb5f80 100644
--- 
a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java
+++ 
b/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java
@@ -284,7 +284,9 @@ public class ITDeltaDeleteVectorConvert {
       Map<String, AddFile> activeFilesAfterCommit,
       TableChange changeDetectedForCommit) {
     Map<String, InternalDeletionVector> detectedDeleteInfos =
-        changeDetectedForCommit.getDeletionVectorsAdded().stream()
+        changeDetectedForCommit.getFilesDiff().getFilesAdded().stream()
+            .filter(file -> file instanceof InternalDeletionVector)
+            .map(file -> (InternalDeletionVector) file)
             .collect(Collectors.toMap(InternalDeletionVector::dataFilePath, 
file -> file));
 
     Map<String, AddFile> filesWithDeleteVectors =
@@ -299,20 +301,20 @@ public class ITDeltaDeleteVectorConvert {
       assertNotNull(deleteInfo);
       DeletionVectorDescriptor deletionVectorDescriptor =
           fileWithDeleteVector.getValue().deletionVector();
-      assertEquals(deletionVectorDescriptor.cardinality(), 
deleteInfo.countRecordsDeleted());
-      assertEquals(deletionVectorDescriptor.sizeInBytes(), deleteInfo.size());
+      assertEquals(deletionVectorDescriptor.cardinality(), 
deleteInfo.getRecordCount());
+      assertEquals(deletionVectorDescriptor.sizeInBytes(), 
deleteInfo.getFileSizeBytes());
       assertEquals(deletionVectorDescriptor.offset().get(), 
deleteInfo.offset());
 
       String deletionFilePath =
           deletionVectorDescriptor
               .absolutePath(new 
org.apache.hadoop.fs.Path(testSparkDeltaTable.getBasePath()))
               .toString();
-      assertEquals(deletionFilePath, 
deleteInfo.sourceDeletionVectorFilePath());
+      assertEquals(deletionFilePath, deleteInfo.getPhysicalPath());
 
       Iterator<Long> iterator = deleteInfo.ordinalsIterator();
       List<Long> deletes = new ArrayList<>();
       iterator.forEachRemaining(deletes::add);
-      assertEquals(deletes.size(), deleteInfo.countRecordsDeleted());
+      assertEquals(deletes.size(), deleteInfo.getRecordCount());
     }
   }
 
diff --git 
a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java
 
b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java
index 44586d51..117b3fc7 100644
--- 
a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java
+++ 
b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java
@@ -101,10 +101,10 @@ class TestDeltaActionsConverter {
         actionsConverter.extractDeletionVector(snapshot, addFileAction);
     assertNotNull(internaldeletionVector);
     assertEquals(basePath + dataFilePath, 
internaldeletionVector.dataFilePath());
-    assertEquals(deleteFilePath, 
internaldeletionVector.sourceDeletionVectorFilePath());
+    assertEquals(deleteFilePath, internaldeletionVector.getPhysicalPath());
     assertEquals(offset, internaldeletionVector.offset());
-    assertEquals(cardinality, internaldeletionVector.countRecordsDeleted());
-    assertEquals(size, internaldeletionVector.size());
+    assertEquals(cardinality, internaldeletionVector.getRecordCount());
+    assertEquals(size, internaldeletionVector.getFileSizeBytes());
     assertNull(internaldeletionVector.binaryRepresentation());
 
     Iterator<Long> iterator = internaldeletionVector.ordinalsIterator();
@@ -143,10 +143,10 @@ class TestDeltaActionsConverter {
         actionsConverter.extractDeletionVector(snapshot, addFileAction);
     assertNotNull(internaldeletionVector);
     assertEquals(basePath + dataFilePath, 
internaldeletionVector.dataFilePath());
-    assertEquals(deleteFilePath, 
internaldeletionVector.sourceDeletionVectorFilePath());
+    assertEquals(deleteFilePath, internaldeletionVector.getPhysicalPath());
     assertEquals(offset, internaldeletionVector.offset());
-    assertEquals(cardinality, internaldeletionVector.countRecordsDeleted());
-    assertEquals(size, internaldeletionVector.size());
+    assertEquals(cardinality, internaldeletionVector.getRecordCount());
+    assertEquals(size, internaldeletionVector.getFileSizeBytes());
     assertNull(internaldeletionVector.binaryRepresentation());
 
     Iterator<Long> iterator = internaldeletionVector.ordinalsIterator();
@@ -181,9 +181,9 @@ class TestDeltaActionsConverter {
     assertNotNull(internaldeletionVector);
     assertEquals(basePath + dataFilePath, 
internaldeletionVector.dataFilePath());
     assertArrayEquals(bytes, internaldeletionVector.binaryRepresentation());
-    assertEquals(cardinality, internaldeletionVector.countRecordsDeleted());
-    assertEquals(bytes.length, internaldeletionVector.size());
-    assertNull(internaldeletionVector.sourceDeletionVectorFilePath());
+    assertEquals(cardinality, internaldeletionVector.getRecordCount());
+    assertEquals(bytes.length, internaldeletionVector.getFileSizeBytes());
+    assertEquals("", internaldeletionVector.getPhysicalPath());
     assertEquals(0, internaldeletionVector.offset());
 
     Iterator<Long> iterator = internaldeletionVector.ordinalsIterator();

Reply via email to