This is an automated email from the ASF dual-hosted git repository. ashvin pushed a commit to branch 348-deletion-vectors-iceberg-target in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git
commit 12186a9cde1a4e9d56549f4e0c3c7a1caca46279 Author: Ashvin Agrawal <[email protected]> AuthorDate: Mon Feb 3 22:09:22 2025 -0800 Delete inherits data --- .../java/org/apache/xtable/model/TableChange.java | 15 +------------ .../xtable/model/storage/InternalDataFile.java | 13 ++++++++--- .../model/storage/InternalDeletionVector.java | 26 +++++++++++----------- .../apache/xtable/delta/DeltaActionsConverter.java | 9 ++++---- .../apache/xtable/delta/DeltaConversionSource.java | 13 ++++++----- .../org/apache/xtable/ValidationTestHelper.java | 6 ++++- .../xtable/delta/ITDeltaDeleteVectorConvert.java | 12 +++++----- .../xtable/delta/TestDeltaActionsConverter.java | 18 +++++++-------- 8 files changed, 57 insertions(+), 55 deletions(-) diff --git a/xtable-api/src/main/java/org/apache/xtable/model/TableChange.java b/xtable-api/src/main/java/org/apache/xtable/model/TableChange.java index e750916f..85f546e9 100644 --- a/xtable-api/src/main/java/org/apache/xtable/model/TableChange.java +++ b/xtable-api/src/main/java/org/apache/xtable/model/TableChange.java @@ -18,15 +18,10 @@ package org.apache.xtable.model; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - import lombok.Builder; import lombok.Value; import org.apache.xtable.model.storage.DataFilesDiff; -import org.apache.xtable.model.storage.InternalDeletionVector; /** * Captures the changes in a single commit/instant from the source table. @@ -34,7 +29,7 @@ import org.apache.xtable.model.storage.InternalDeletionVector; * @since 0.1 */ @Value -@Builder(toBuilder = true, builderClassName = "Builder") +@Builder(toBuilder = true) public class TableChange { // Change in files at the specified instant DataFilesDiff filesDiff; @@ -42,15 +37,7 @@ public class TableChange { // A commit can add deletion vectors when some records are deleted. New deletion vectors can be // added even if no new data files are added. However, as deletion vectors are always associated // with a data file, they are implicitly removed when a corresponding data file is removed. - List<InternalDeletionVector> deletionVectorsAdded; /** The {@link InternalTable} at the commit time to which this table change belongs. */ InternalTable tableAsOfChange; - - public static class Builder { - public Builder deletionVectorsAdded(Collection<InternalDeletionVector> deletionVectorsAdded) { - this.deletionVectorsAdded = new ArrayList<>(deletionVectorsAdded); - return this; - } - } } diff --git a/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDataFile.java b/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDataFile.java index 3aee766e..3e742fb7 100644 --- a/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDataFile.java +++ b/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDataFile.java @@ -22,8 +22,12 @@ import java.util.Collections; import java.util.List; import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; import lombok.NonNull; -import lombok.Value; +import lombok.ToString; +import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; import org.apache.xtable.model.stat.ColumnStat; import org.apache.xtable.model.stat.PartitionValue; @@ -33,8 +37,11 @@ import org.apache.xtable.model.stat.PartitionValue; * * @since 0.1 */ -@Builder(toBuilder = true) -@Value +@SuperBuilder(toBuilder = true) +@FieldDefaults(makeFinal = true, level = lombok.AccessLevel.PRIVATE) +@Getter +@ToString(callSuper = true) +@EqualsAndHashCode public class InternalDataFile { // physical path of the file (absolute with scheme) @NonNull String physicalPath; diff --git a/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDeletionVector.java b/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDeletionVector.java index bb1a13fd..868749dc 100644 --- a/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDeletionVector.java +++ b/xtable-api/src/main/java/org/apache/xtable/model/storage/InternalDeletionVector.java @@ -22,27 +22,27 @@ import java.util.Iterator; import java.util.function.Supplier; import lombok.AccessLevel; -import lombok.Builder; +import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NonNull; -import lombok.Value; +import lombok.ToString; import lombok.experimental.Accessors; +import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; -@Builder(toBuilder = true, builderClassName = "Builder") @Accessors(fluent = true) -@Value -public class InternalDeletionVector { +@SuperBuilder(toBuilder = true) +@FieldDefaults(makeFinal = true, level = lombok.AccessLevel.PRIVATE) +@Getter +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +public class InternalDeletionVector extends InternalDataFile { // path (absolute with scheme) of data file to which this deletion vector belongs @NonNull String dataFilePath; - // size of the deletion vector - int size; - - // count of records deleted by this deletion vector - long countRecordsDeleted; - - // physical path of the deletion vector file (absolute with scheme) - String sourceDeletionVectorFilePath; + // super.getFileSizeBytes() is the size of the deletion vector file + // super.getPhysicalPath() is the absolute path (with scheme) of the deletion vector file + // super.getRecordCount() is the count of records in the deletion vector file // offset of deletion vector start in a deletion vector file int offset; diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java index 082a6d10..d75949f1 100644 --- a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java +++ b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java @@ -135,21 +135,22 @@ public class DeltaActionsConverter { String dataFilePath = addFile.path(); dataFilePath = getFullPathToFile(snapshot, dataFilePath); - InternalDeletionVector.Builder deleteVectorBuilder = + InternalDeletionVector.InternalDeletionVectorBuilder<?, ?> deleteVectorBuilder = InternalDeletionVector.builder() - .countRecordsDeleted(deletionVector.cardinality()) - .size(deletionVector.sizeInBytes()) + .recordCount(deletionVector.cardinality()) + .fileSizeBytes(deletionVector.sizeInBytes()) .dataFilePath(dataFilePath); if (deletionVector.isInline()) { deleteVectorBuilder .binaryRepresentation(deletionVector.inlineData()) + .physicalPath("") .ordinalsSupplier(() -> ordinalsIterator(deletionVector.inlineData())); } else { Path deletionVectorFilePath = deletionVector.absolutePath(snapshot.deltaLog().dataPath()); deleteVectorBuilder .offset(getOffset(deletionVector)) - .sourceDeletionVectorFilePath(deletionVectorFilePath.toString()) + .physicalPath(deletionVectorFilePath.toString()) .ordinalsSupplier(() -> ordinalsIterator(snapshot, deletionVector)); } diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java index 9d44401f..e9fc5afd 100644 --- a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java +++ b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java @@ -25,6 +25,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; import lombok.Builder; import lombok.extern.log4j.Log4j2; @@ -161,16 +163,15 @@ public class DeltaConversionSource implements ConversionSource<Long> { } } + List<InternalDataFile> allAddedFiles = + Stream.concat(addedFiles.values().stream(), deletionVectors.values().stream()) + .collect(Collectors.toList()); DataFilesDiff dataFilesDiff = DataFilesDiff.builder() - .filesAdded(addedFiles.values()) + .filesAdded(allAddedFiles) .filesRemoved(removedFiles.values()) .build(); - return TableChange.builder() - .tableAsOfChange(tableAtVersion) - .deletionVectorsAdded(deletionVectors.values()) - .filesDiff(dataFilesDiff) - .build(); + return TableChange.builder().tableAsOfChange(tableAtVersion).filesDiff(dataFilesDiff).build(); } @Override diff --git a/xtable-core/src/test/java/org/apache/xtable/ValidationTestHelper.java b/xtable-core/src/test/java/org/apache/xtable/ValidationTestHelper.java index a20ae4e7..f56b4fa3 100644 --- a/xtable-core/src/test/java/org/apache/xtable/ValidationTestHelper.java +++ b/xtable-core/src/test/java/org/apache/xtable/ValidationTestHelper.java @@ -30,6 +30,7 @@ import java.util.stream.IntStream; import org.apache.xtable.model.InternalSnapshot; import org.apache.xtable.model.TableChange; import org.apache.xtable.model.storage.InternalDataFile; +import org.apache.xtable.model.storage.InternalDeletionVector; public class ValidationTestHelper { @@ -96,7 +97,10 @@ public class ValidationTestHelper { } private static Set<String> extractPathsFromDataFile(Set<InternalDataFile> dataFiles) { - return dataFiles.stream().map(InternalDataFile::getPhysicalPath).collect(Collectors.toSet()); + return dataFiles.stream() + .filter(file -> !(file instanceof InternalDeletionVector)) + .map(InternalDataFile::getPhysicalPath) + .collect(Collectors.toSet()); } private static void replaceFileScheme(List<String> filePaths) { diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java b/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java index 87cc8d99..dcfb5f80 100644 --- a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java +++ b/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java @@ -284,7 +284,9 @@ public class ITDeltaDeleteVectorConvert { Map<String, AddFile> activeFilesAfterCommit, TableChange changeDetectedForCommit) { Map<String, InternalDeletionVector> detectedDeleteInfos = - changeDetectedForCommit.getDeletionVectorsAdded().stream() + changeDetectedForCommit.getFilesDiff().getFilesAdded().stream() + .filter(file -> file instanceof InternalDeletionVector) + .map(file -> (InternalDeletionVector) file) .collect(Collectors.toMap(InternalDeletionVector::dataFilePath, file -> file)); Map<String, AddFile> filesWithDeleteVectors = @@ -299,20 +301,20 @@ public class ITDeltaDeleteVectorConvert { assertNotNull(deleteInfo); DeletionVectorDescriptor deletionVectorDescriptor = fileWithDeleteVector.getValue().deletionVector(); - assertEquals(deletionVectorDescriptor.cardinality(), deleteInfo.countRecordsDeleted()); - assertEquals(deletionVectorDescriptor.sizeInBytes(), deleteInfo.size()); + assertEquals(deletionVectorDescriptor.cardinality(), deleteInfo.getRecordCount()); + assertEquals(deletionVectorDescriptor.sizeInBytes(), deleteInfo.getFileSizeBytes()); assertEquals(deletionVectorDescriptor.offset().get(), deleteInfo.offset()); String deletionFilePath = deletionVectorDescriptor .absolutePath(new org.apache.hadoop.fs.Path(testSparkDeltaTable.getBasePath())) .toString(); - assertEquals(deletionFilePath, deleteInfo.sourceDeletionVectorFilePath()); + assertEquals(deletionFilePath, deleteInfo.getPhysicalPath()); Iterator<Long> iterator = deleteInfo.ordinalsIterator(); List<Long> deletes = new ArrayList<>(); iterator.forEachRemaining(deletes::add); - assertEquals(deletes.size(), deleteInfo.countRecordsDeleted()); + assertEquals(deletes.size(), deleteInfo.getRecordCount()); } } diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java index 44586d51..117b3fc7 100644 --- a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java +++ b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java @@ -101,10 +101,10 @@ class TestDeltaActionsConverter { actionsConverter.extractDeletionVector(snapshot, addFileAction); assertNotNull(internaldeletionVector); assertEquals(basePath + dataFilePath, internaldeletionVector.dataFilePath()); - assertEquals(deleteFilePath, internaldeletionVector.sourceDeletionVectorFilePath()); + assertEquals(deleteFilePath, internaldeletionVector.getPhysicalPath()); assertEquals(offset, internaldeletionVector.offset()); - assertEquals(cardinality, internaldeletionVector.countRecordsDeleted()); - assertEquals(size, internaldeletionVector.size()); + assertEquals(cardinality, internaldeletionVector.getRecordCount()); + assertEquals(size, internaldeletionVector.getFileSizeBytes()); assertNull(internaldeletionVector.binaryRepresentation()); Iterator<Long> iterator = internaldeletionVector.ordinalsIterator(); @@ -143,10 +143,10 @@ class TestDeltaActionsConverter { actionsConverter.extractDeletionVector(snapshot, addFileAction); assertNotNull(internaldeletionVector); assertEquals(basePath + dataFilePath, internaldeletionVector.dataFilePath()); - assertEquals(deleteFilePath, internaldeletionVector.sourceDeletionVectorFilePath()); + assertEquals(deleteFilePath, internaldeletionVector.getPhysicalPath()); assertEquals(offset, internaldeletionVector.offset()); - assertEquals(cardinality, internaldeletionVector.countRecordsDeleted()); - assertEquals(size, internaldeletionVector.size()); + assertEquals(cardinality, internaldeletionVector.getRecordCount()); + assertEquals(size, internaldeletionVector.getFileSizeBytes()); assertNull(internaldeletionVector.binaryRepresentation()); Iterator<Long> iterator = internaldeletionVector.ordinalsIterator(); @@ -181,9 +181,9 @@ class TestDeltaActionsConverter { assertNotNull(internaldeletionVector); assertEquals(basePath + dataFilePath, internaldeletionVector.dataFilePath()); assertArrayEquals(bytes, internaldeletionVector.binaryRepresentation()); - assertEquals(cardinality, internaldeletionVector.countRecordsDeleted()); - assertEquals(bytes.length, internaldeletionVector.size()); - assertNull(internaldeletionVector.sourceDeletionVectorFilePath()); + assertEquals(cardinality, internaldeletionVector.getRecordCount()); + assertEquals(bytes.length, internaldeletionVector.getFileSizeBytes()); + assertEquals("", internaldeletionVector.getPhysicalPath()); assertEquals(0, internaldeletionVector.offset()); Iterator<Long> iterator = internaldeletionVector.ordinalsIterator();
