This is an automated email from the ASF dual-hosted git repository.

etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new e4bc593d48 Core: Add DataFileSet / DeleteFileSet (#11195)
e4bc593d48 is described below

commit e4bc593d48df08c66549536266ca960024642295
Author: Eduard Tudenhoefner <[email protected]>
AuthorDate: Tue Oct 1 08:14:34 2024 +0200

    Core: Add DataFileSet / DeleteFileSet (#11195)
---
 .../java/org/apache/iceberg/util/DataFileSet.java  | 113 ++++++++
 .../org/apache/iceberg/util/DeleteFileSet.java     | 114 ++++++++
 .../java/org/apache/iceberg/util/WrapperSet.java   | 177 ++++++++++++
 .../org/apache/iceberg/util/TestDataFileSet.java   | 303 +++++++++++++++++++
 .../org/apache/iceberg/util/TestDeleteFileSet.java | 321 +++++++++++++++++++++
 5 files changed, 1028 insertions(+)

diff --git a/api/src/main/java/org/apache/iceberg/util/DataFileSet.java 
b/api/src/main/java/org/apache/iceberg/util/DataFileSet.java
new file mode 100644
index 0000000000..27cbee088a
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/util/DataFileSet.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.util;
+
+import java.util.Objects;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+
+public class DataFileSet extends WrapperSet<DataFile> {
+  private static final ThreadLocal<DataFileWrapper> WRAPPERS =
+      ThreadLocal.withInitial(() -> DataFileWrapper.wrap(null));
+
+  private DataFileSet() {
+    // needed for serialization/deserialization
+  }
+
+  private DataFileSet(Iterable<Wrapper<DataFile>> wrappers) {
+    super(wrappers);
+  }
+
+  public static DataFileSet create() {
+    return new DataFileSet();
+  }
+
+  public static DataFileSet of(Iterable<? extends DataFile> iterable) {
+    return new DataFileSet(
+        Iterables.transform(
+            iterable,
+            obj -> {
+              Preconditions.checkNotNull(obj, "Invalid object: null");
+              return DataFileWrapper.wrap(obj);
+            }));
+  }
+
+  @Override
+  protected Wrapper<DataFile> wrapper() {
+    return WRAPPERS.get();
+  }
+
+  @Override
+  protected Wrapper<DataFile> wrap(DataFile dataFile) {
+    return DataFileWrapper.wrap(dataFile);
+  }
+
+  @Override
+  protected Class<DataFile> elementClass() {
+    return DataFile.class;
+  }
+
+  private static class DataFileWrapper implements Wrapper<DataFile> {
+    private DataFile file;
+
+    private DataFileWrapper(DataFile file) {
+      this.file = file;
+    }
+
+    private static DataFileWrapper wrap(DataFile dataFile) {
+      return new DataFileWrapper(dataFile);
+    }
+
+    @Override
+    public DataFile get() {
+      return file;
+    }
+
+    @Override
+    public Wrapper<DataFile> set(DataFile dataFile) {
+      this.file = dataFile;
+      return this;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+
+      if (!(o instanceof DataFileWrapper)) {
+        return false;
+      }
+
+      DataFileWrapper that = (DataFileWrapper) o;
+      return Objects.equals(file.location(), that.file.location());
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(file.location());
+    }
+
+    @Override
+    public String toString() {
+      return file.location();
+    }
+  }
+}
diff --git a/api/src/main/java/org/apache/iceberg/util/DeleteFileSet.java 
b/api/src/main/java/org/apache/iceberg/util/DeleteFileSet.java
new file mode 100644
index 0000000000..bbe9824963
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/util/DeleteFileSet.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.util;
+
+import java.util.Objects;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+
+public class DeleteFileSet extends WrapperSet<DeleteFile> {
+  private static final ThreadLocal<DeleteFileWrapper> WRAPPERS =
+      ThreadLocal.withInitial(() -> DeleteFileWrapper.wrap(null));
+
+  private DeleteFileSet() {
+    // needed for serialization/deserialization
+  }
+
+  private DeleteFileSet(Iterable<Wrapper<DeleteFile>> wrappers) {
+    super(wrappers);
+  }
+
+  public static DeleteFileSet create() {
+    return new DeleteFileSet();
+  }
+
+  public static DeleteFileSet of(Iterable<? extends DeleteFile> iterable) {
+    return new DeleteFileSet(
+        Iterables.transform(
+            iterable,
+            obj -> {
+              Preconditions.checkNotNull(obj, "Invalid object: null");
+              return DeleteFileWrapper.wrap(obj);
+            }));
+  }
+
+  @Override
+  protected Wrapper<DeleteFile> wrapper() {
+    return WRAPPERS.get();
+  }
+
+  @Override
+  protected Wrapper<DeleteFile> wrap(DeleteFile deleteFile) {
+    return DeleteFileWrapper.wrap(deleteFile);
+  }
+
+  @Override
+  protected Class<DeleteFile> elementClass() {
+    return DeleteFile.class;
+  }
+
+  private static class DeleteFileWrapper implements Wrapper<DeleteFile> {
+    private DeleteFile file;
+
+    private DeleteFileWrapper(DeleteFile file) {
+      this.file = file;
+    }
+
+    private static DeleteFileWrapper wrap(DeleteFile deleteFile) {
+      return new DeleteFileWrapper(deleteFile);
+    }
+
+    @Override
+    public DeleteFile get() {
+      return file;
+    }
+
+    @Override
+    public Wrapper<DeleteFile> set(DeleteFile deleteFile) {
+      this.file = deleteFile;
+      return this;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+
+      if (!(o instanceof DeleteFileWrapper)) {
+        return false;
+      }
+
+      DeleteFileWrapper that = (DeleteFileWrapper) o;
+      // this needs to be updated once deletion vector support is added
+      return Objects.equals(file.location(), that.file.location());
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(file.location());
+    }
+
+    @Override
+    public String toString() {
+      return file.location();
+    }
+  }
+}
diff --git a/api/src/main/java/org/apache/iceberg/util/WrapperSet.java 
b/api/src/main/java/org/apache/iceberg/util/WrapperSet.java
new file mode 100644
index 0000000000..e589f435e1
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/util/WrapperSet.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.util;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterators;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.relocated.com.google.common.collect.Streams;
+
+/**
+ * A custom set for a {@link Wrapper} of the given type that maintains 
insertion order and does not
+ * allow null elements.
+ *
+ * @param <T> The type to wrap in a {@link Wrapper} instance.
+ */
+abstract class WrapperSet<T> implements Set<T>, Serializable {
+  private final Set<Wrapper<T>> set = Sets.newLinkedHashSet();
+
+  protected WrapperSet(Iterable<Wrapper<T>> wrappers) {
+    wrappers.forEach(set::add);
+  }
+
+  protected WrapperSet() {}
+
+  protected abstract Wrapper<T> wrapper();
+
+  protected abstract Wrapper<T> wrap(T file);
+
+  protected abstract Class<T> elementClass();
+
+  protected interface Wrapper<T> extends Serializable {
+    T get();
+
+    Wrapper<T> set(T object);
+  }
+
+  @Override
+  public int size() {
+    return set.size();
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return set.isEmpty();
+  }
+
+  @Override
+  public boolean contains(Object obj) {
+    Preconditions.checkNotNull(obj, "Invalid object: null");
+    Wrapper<T> wrapper = wrapper();
+    boolean result = set.contains(wrapper.set(elementClass().cast(obj)));
+    wrapper.set(null); // don't hold a reference to the value
+    return result;
+  }
+
+  @Override
+  public Iterator<T> iterator() {
+    return Iterators.transform(set.iterator(), Wrapper::get);
+  }
+
+  @Override
+  public Object[] toArray() {
+    return Lists.newArrayList(iterator()).toArray();
+  }
+
+  @Override
+  public <X> X[] toArray(X[] destArray) {
+    return Lists.newArrayList(iterator()).toArray(destArray);
+  }
+
+  @Override
+  public boolean add(T obj) {
+    Preconditions.checkNotNull(obj, "Invalid object: null");
+    return set.add(wrap(obj));
+  }
+
+  @Override
+  public boolean remove(Object obj) {
+    Preconditions.checkNotNull(obj, "Invalid object: null");
+    Wrapper<T> wrapper = wrapper();
+    boolean result = set.remove(wrapper.set(elementClass().cast(obj)));
+    wrapper.set(null); // don't hold a reference to the value
+    return result;
+  }
+
+  @Override
+  public boolean containsAll(Collection<?> collection) {
+    Preconditions.checkNotNull(collection, "Invalid collection: null");
+    return Iterables.all(collection, this::contains);
+  }
+
+  @Override
+  public boolean addAll(Collection<? extends T> collection) {
+    Preconditions.checkNotNull(collection, "Invalid collection: null");
+    return collection.stream().filter(this::add).count() != 0;
+  }
+
+  @Override
+  public boolean retainAll(Collection<?> collection) {
+    Preconditions.checkNotNull(collection, "Invalid collection: null");
+    Set<Wrapper<T>> toRetain =
+        collection.stream()
+            .map(obj -> Preconditions.checkNotNull(obj, "Invalid object: 
null"))
+            .map(elementClass()::cast)
+            .map(this::wrap)
+            .collect(Collectors.toSet());
+
+    return Iterables.retainAll(set, toRetain);
+  }
+
+  @Override
+  public boolean removeAll(Collection<?> collection) {
+    Preconditions.checkNotNull(collection, "Invalid collection: null");
+    return collection.stream().filter(this::remove).count() != 0;
+  }
+
+  @Override
+  public void clear() {
+    set.clear();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
+    } else if (!(other instanceof Set)) {
+      return false;
+    }
+
+    Set<?> that = (Set<?>) other;
+
+    if (size() != that.size()) {
+      return false;
+    }
+
+    try {
+      return containsAll(that);
+    } catch (ClassCastException | NullPointerException unused) {
+      return false;
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    return set.stream().mapToInt(Object::hashCode).sum();
+  }
+
+  @Override
+  public String toString() {
+    return Streams.stream(iterator())
+        .map(Object::toString)
+        .collect(Collectors.joining(", ", "[", "]"));
+  }
+}
diff --git a/core/src/test/java/org/apache/iceberg/util/TestDataFileSet.java 
b/core/src/test/java/org/apache/iceberg/util/TestDataFileSet.java
new file mode 100644
index 0000000000..0f298ad82e
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/util/TestDataFileSet.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.util;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Set;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataFiles;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.TestHelpers;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Testing {@link DataFileSet} is easier in iceberg-core since the data file 
builders are located
+ * here
+ */
+public class TestDataFileSet {
+
+  private static final DataFile FILE_A =
+      DataFiles.builder(PartitionSpec.unpartitioned())
+          .withPath("/path/to/data-a.parquet")
+          .withFileSizeInBytes(1)
+          .withRecordCount(1)
+          .build();
+  private static final DataFile FILE_B =
+      DataFiles.builder(PartitionSpec.unpartitioned())
+          .withPath("/path/to/data-b.parquet")
+          .withFileSizeInBytes(2)
+          .withRecordCount(2)
+          .build();
+  private static final DataFile FILE_C =
+      DataFiles.builder(PartitionSpec.unpartitioned())
+          .withPath("/path/to/data-c.parquet")
+          .withFileSizeInBytes(3)
+          .withRecordCount(3)
+          .build();
+  private static final DataFile FILE_D =
+      DataFiles.builder(PartitionSpec.unpartitioned())
+          .withPath("/path/to/data-d.parquet")
+          .withFileSizeInBytes(4)
+          .withRecordCount(4)
+          .build();
+
+  @Test
+  public void emptySet() {
+    assertThat(DataFileSet.create()).isEmpty();
+    assertThat(DataFileSet.create()).doesNotContain(FILE_A, FILE_B, FILE_C);
+  }
+
+  @Test
+  public void insertionOrderIsMaintained() {
+    DataFileSet set = DataFileSet.create();
+    set.addAll(ImmutableList.of(FILE_D, FILE_A, FILE_C));
+    set.add(FILE_B);
+    set.add(FILE_D);
+
+    assertThat(set).hasSize(4).containsExactly(FILE_D, FILE_A, FILE_C, FILE_B);
+  }
+
+  @Test
+  public void clear() {
+    DataFileSet set = DataFileSet.of(ImmutableList.of(FILE_A, FILE_B));
+    set.clear();
+    assertThat(set).isEmpty();
+  }
+
+  @Test
+  public void addAll() {
+    DataFileSet empty = DataFileSet.create();
+    assertThatThrownBy(() -> empty.add(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.addAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> empty.addAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.addAll(Arrays.asList(FILE_A, null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    DataFileSet set = DataFileSet.create();
+    set.addAll(ImmutableList.of(FILE_B, FILE_A, FILE_C, FILE_A));
+    assertThat(set).hasSize(3).containsExactly(FILE_B, FILE_A, FILE_C);
+  }
+
+  @Test
+  public void contains() {
+    DataFileSet set = DataFileSet.of(ImmutableList.of(FILE_A, FILE_B));
+    assertThatThrownBy(() -> set.contains(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThat(set)
+        .hasSize(2)
+        .containsExactly(FILE_A, FILE_B)
+        .doesNotContain(FILE_C)
+        .doesNotContain(FILE_D);
+
+    assertThatThrownBy(() -> DataFileSet.of(Arrays.asList(FILE_C, FILE_B, 
null, FILE_A)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+  }
+
+  @Test
+  public void containsAll() {
+    DataFileSet set = DataFileSet.of(ImmutableList.of(FILE_A, FILE_B));
+    assertThatThrownBy(() -> set.containsAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> set.containsAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> set.containsAll(Arrays.asList(FILE_A, null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThat(set.containsAll(ImmutableList.of(FILE_B, FILE_A))).isTrue();
+    assertThat(set.containsAll(ImmutableList.of(FILE_B, FILE_A, 
FILE_C))).isFalse();
+    assertThat(set.containsAll(ImmutableList.of(FILE_B))).isTrue();
+  }
+
+  @Test
+  public void toArray() {
+    DataFileSet set = DataFileSet.of(ImmutableList.of(FILE_B, FILE_A));
+    assertThat(set.toArray()).hasSize(2).containsExactly(FILE_B, FILE_A);
+
+    DataFile[] array = new DataFile[1];
+    assertThat(set.toArray(array)).hasSize(2).containsExactly(FILE_B, FILE_A);
+
+    array = new DataFile[0];
+    assertThat(set.toArray(array)).hasSize(2).containsExactly(FILE_B, FILE_A);
+
+    array = new DataFile[5];
+    assertThat(set.toArray(array)).hasSize(5).containsExactly(FILE_B, FILE_A, 
null, null, null);
+
+    array = new DataFile[2];
+    assertThat(set.toArray(array)).hasSize(2).containsExactly(FILE_B, FILE_A);
+  }
+
+  @Test
+  public void retainAll() {
+    DataFileSet empty = DataFileSet.create();
+    assertThatThrownBy(() -> empty.retainAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> empty.retainAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.retainAll(Arrays.asList(FILE_A, null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    DataFileSet set = DataFileSet.of(ImmutableList.of(FILE_A, FILE_B));
+    assertThat(set.retainAll(ImmutableList.of(FILE_C, FILE_D, FILE_A)))
+        .as("Set should have changed")
+        .isTrue();
+
+    assertThat(set).hasSize(1).containsExactly(FILE_A);
+
+    set = DataFileSet.of(ImmutableList.of(FILE_A, FILE_B));
+
+    assertThat(set.retainAll(ImmutableList.of(FILE_B, FILE_A)))
+        .as("Set should not have changed")
+        .isFalse();
+
+    assertThat(set.retainAll(ImmutableList.of(FILE_C, FILE_D)))
+        .as("Set should have changed")
+        .isTrue();
+
+    assertThat(set).isEmpty();
+  }
+
+  @Test
+  public void remove() {
+    DataFileSet set = DataFileSet.of(ImmutableList.of(FILE_A, FILE_B));
+    assertThatThrownBy(() -> set.remove(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    set.remove(FILE_C);
+    assertThat(set).containsExactly(FILE_A, FILE_B);
+    set.remove(FILE_B);
+    assertThat(set).containsExactly(FILE_A);
+    set.remove(FILE_A);
+    assertThat(set).isEmpty();
+  }
+
+  @Test
+  public void removeAll() {
+    DataFileSet empty = DataFileSet.create();
+    assertThatThrownBy(() -> empty.removeAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> empty.removeAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.removeAll(Arrays.asList(FILE_A, null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    DataFileSet set = DataFileSet.of(ImmutableList.of(FILE_A, FILE_B));
+
+    assertThat(set.removeAll(ImmutableList.of(FILE_C, FILE_D, FILE_A)))
+        .as("Set should have changed")
+        .isTrue();
+
+    assertThat(set).hasSize(1).containsExactly(FILE_B);
+
+    set = DataFileSet.of(ImmutableList.of(FILE_A, FILE_B));
+    assertThat(set.removeAll(ImmutableList.of(FILE_C, FILE_D)))
+        .as("Set should not have changed")
+        .isFalse();
+
+    assertThat(set.removeAll(ImmutableList.of(FILE_B, FILE_A)))
+        .as("Set should have changed")
+        .isTrue();
+
+    assertThat(set).isEmpty();
+  }
+
+  @Test
+  public void equalsAndHashCode() {
+    DataFileSet set1 = DataFileSet.create();
+    DataFileSet set2 = DataFileSet.create();
+
+    assertThat(set1).isEqualTo(set2);
+    assertThat(set1.hashCode()).isEqualTo(set2.hashCode());
+
+    set1.add(FILE_A);
+    set1.add(FILE_B);
+    set1.add(FILE_C);
+
+    // different DataFile instances but all use the same paths as set1
+    set2.add(
+        DataFiles.builder(PartitionSpec.unpartitioned())
+            .withPath(FILE_A.location())
+            .withFileSizeInBytes(10)
+            .withRecordCount(1)
+            .build());
+    set2.add(
+        DataFiles.builder(PartitionSpec.unpartitioned())
+            .withPath(FILE_B.location())
+            .withFileSizeInBytes(100)
+            .withRecordCount(10)
+            .build());
+    set2.add(
+        DataFiles.builder(PartitionSpec.unpartitioned())
+            .withPath(FILE_C.location())
+            .withFileSizeInBytes(1000)
+            .withRecordCount(100)
+            .build());
+
+    Set<DataFile> set3 = Collections.unmodifiableSet(set2);
+
+    assertThat(set1).isEqualTo(set2).isEqualTo(set3);
+    
assertThat(set1.hashCode()).isEqualTo(set2.hashCode()).isEqualTo(set3.hashCode());
+  }
+
+  @Test
+  public void kryoSerialization() throws Exception {
+    DataFileSet dataFiles = DataFileSet.of(ImmutableList.of(FILE_C, FILE_B, 
FILE_A));
+    
assertThat(TestHelpers.KryoHelpers.roundTripSerialize(dataFiles)).isEqualTo(dataFiles);
+  }
+
+  @Test
+  public void javaSerialization() throws Exception {
+    DataFileSet dataFiles = DataFileSet.of(ImmutableList.of(FILE_C, FILE_B, 
FILE_A));
+    DataFileSet deserialized = 
TestHelpers.deserialize(TestHelpers.serialize(dataFiles));
+    assertThat(deserialized).isEqualTo(dataFiles);
+  }
+}
diff --git a/core/src/test/java/org/apache/iceberg/util/TestDeleteFileSet.java 
b/core/src/test/java/org/apache/iceberg/util/TestDeleteFileSet.java
new file mode 100644
index 0000000000..5f4488a3a1
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/util/TestDeleteFileSet.java
@@ -0,0 +1,321 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.util;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Set;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.FileMetadata;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.TestHelpers;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Testing {@link DeleteFileSet} is easier in iceberg-core since the delete 
file builders are
+ * located here
+ */
+public class TestDeleteFileSet {
+
+  private static final DeleteFile FILE_A_DELETES =
+      FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
+          .ofPositionDeletes()
+          .withPath("/path/to/data-a-deletes.parquet")
+          .withFileSizeInBytes(1)
+          .withRecordCount(1)
+          .build();
+  private static final DeleteFile FILE_B_DELETES =
+      FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
+          .ofPositionDeletes()
+          .withPath("/path/to/data-b-deletes.parquet")
+          .withFileSizeInBytes(2)
+          .withRecordCount(2)
+          .build();
+  private static final DeleteFile FILE_C_DELETES =
+      FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
+          .ofPositionDeletes()
+          .withPath("/path/to/data-c-deletes.parquet")
+          .withFileSizeInBytes(3)
+          .withRecordCount(3)
+          .build();
+  private static final DeleteFile FILE_D_DELETES =
+      FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
+          .ofPositionDeletes()
+          .withPath("/path/to/data-d-deletes.parquet")
+          .withFileSizeInBytes(4)
+          .withRecordCount(4)
+          .build();
+
+  @Test
+  public void emptySet() {
+    assertThat(DeleteFileSet.create()).isEmpty();
+    assertThat(DeleteFileSet.create())
+        .doesNotContain(FILE_A_DELETES, FILE_B_DELETES, FILE_C_DELETES);
+  }
+
+  @Test
+  public void insertionOrderIsMaintained() {
+    DeleteFileSet set = DeleteFileSet.create();
+    set.addAll(ImmutableList.of(FILE_D_DELETES, FILE_A_DELETES, 
FILE_C_DELETES));
+    set.add(FILE_B_DELETES);
+    set.add(FILE_D_DELETES);
+
+    assertThat(set)
+        .hasSize(4)
+        .containsExactly(FILE_D_DELETES, FILE_A_DELETES, FILE_C_DELETES, 
FILE_B_DELETES);
+  }
+
+  @Test
+  public void clear() {
+    DeleteFileSet set = DeleteFileSet.of(ImmutableList.of(FILE_A_DELETES, 
FILE_B_DELETES));
+    set.clear();
+    assertThat(set).isEmpty();
+  }
+
+  @Test
+  public void addAll() {
+    DeleteFileSet empty = DeleteFileSet.create();
+    assertThatThrownBy(() -> empty.add(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.addAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> empty.addAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.addAll(Arrays.asList(FILE_A_DELETES, null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    DeleteFileSet set = DeleteFileSet.create();
+    set.addAll(ImmutableList.of(FILE_B_DELETES, FILE_A_DELETES, 
FILE_C_DELETES, FILE_A_DELETES));
+    assertThat(set).hasSize(3).containsExactly(FILE_B_DELETES, FILE_A_DELETES, 
FILE_C_DELETES);
+  }
+
+  @Test
+  public void contains() {
+    DeleteFileSet set = DeleteFileSet.of(ImmutableList.of(FILE_A_DELETES, 
FILE_B_DELETES));
+    assertThatThrownBy(() -> set.contains(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThat(set)
+        .hasSize(2)
+        .containsExactly(FILE_A_DELETES, FILE_B_DELETES)
+        .doesNotContain(FILE_C_DELETES)
+        .doesNotContain(FILE_D_DELETES);
+
+    assertThatThrownBy(
+            () ->
+                DeleteFileSet.of(
+                    Arrays.asList(FILE_C_DELETES, FILE_B_DELETES, null, 
FILE_A_DELETES)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+  }
+
+  @Test
+  public void containsAll() {
+    DeleteFileSet set = DeleteFileSet.of(ImmutableList.of(FILE_A_DELETES, 
FILE_B_DELETES));
+    assertThatThrownBy(() -> set.containsAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> set.containsAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> set.containsAll(Arrays.asList(FILE_A_DELETES, 
null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThat(set.containsAll(ImmutableList.of(FILE_B_DELETES, 
FILE_A_DELETES))).isTrue();
+    assertThat(set.containsAll(ImmutableList.of(FILE_B_DELETES, 
FILE_A_DELETES, FILE_C_DELETES)))
+        .isFalse();
+    assertThat(set.containsAll(ImmutableList.of(FILE_B_DELETES))).isTrue();
+  }
+
+  @Test
+  public void toArray() {
+    DeleteFileSet set = DeleteFileSet.of(ImmutableList.of(FILE_B_DELETES, 
FILE_A_DELETES));
+    assertThat(set.toArray()).hasSize(2).containsExactly(FILE_B_DELETES, 
FILE_A_DELETES);
+
+    DeleteFile[] array = new DeleteFile[1];
+    assertThat(set.toArray(array)).hasSize(2).containsExactly(FILE_B_DELETES, 
FILE_A_DELETES);
+
+    array = new DeleteFile[0];
+    assertThat(set.toArray(array)).hasSize(2).containsExactly(FILE_B_DELETES, 
FILE_A_DELETES);
+
+    array = new DeleteFile[5];
+    assertThat(set.toArray(array))
+        .hasSize(5)
+        .containsExactly(FILE_B_DELETES, FILE_A_DELETES, null, null, null);
+
+    array = new DeleteFile[2];
+    assertThat(set.toArray(array)).hasSize(2).containsExactly(FILE_B_DELETES, 
FILE_A_DELETES);
+  }
+
+  @Test
+  public void retainAll() {
+    DeleteFileSet empty = DeleteFileSet.create();
+    assertThatThrownBy(() -> empty.retainAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> empty.retainAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.retainAll(Arrays.asList(FILE_A_DELETES, 
null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    DeleteFileSet set = DeleteFileSet.of(ImmutableList.of(FILE_A_DELETES, 
FILE_B_DELETES));
+    assertThat(set.retainAll(ImmutableList.of(FILE_C_DELETES, FILE_D_DELETES, 
FILE_A_DELETES)))
+        .as("Set should have changed")
+        .isTrue();
+
+    assertThat(set).hasSize(1).containsExactly(FILE_A_DELETES);
+
+    set = DeleteFileSet.of(ImmutableList.of(FILE_A_DELETES, FILE_B_DELETES));
+
+    assertThat(set.retainAll(ImmutableList.of(FILE_B_DELETES, FILE_A_DELETES)))
+        .as("Set should not have changed")
+        .isFalse();
+
+    assertThat(set.retainAll(ImmutableList.of(FILE_C_DELETES, FILE_D_DELETES)))
+        .as("Set should have changed")
+        .isTrue();
+
+    assertThat(set).isEmpty();
+  }
+
+  @Test
+  public void remove() {
+    DeleteFileSet set = DeleteFileSet.of(ImmutableList.of(FILE_A_DELETES, 
FILE_B_DELETES));
+    assertThatThrownBy(() -> set.remove(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    set.remove(FILE_C_DELETES);
+    assertThat(set).containsExactly(FILE_A_DELETES, FILE_B_DELETES);
+    assertThat(set).containsExactly(FILE_A_DELETES, FILE_B_DELETES);
+    set.remove(FILE_B_DELETES);
+    assertThat(set).containsExactly(FILE_A_DELETES);
+    set.remove(FILE_A_DELETES);
+    assertThat(set).isEmpty();
+  }
+
+  @Test
+  public void removeAll() {
+    DeleteFileSet empty = DeleteFileSet.create();
+    assertThatThrownBy(() -> empty.removeAll(null))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid collection: null");
+
+    assertThatThrownBy(() -> empty.removeAll(Collections.singletonList(null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    assertThatThrownBy(() -> empty.removeAll(Arrays.asList(FILE_A_DELETES, 
null)))
+        .isInstanceOf(NullPointerException.class)
+        .hasMessage("Invalid object: null");
+
+    DeleteFileSet set = DeleteFileSet.of(ImmutableList.of(FILE_A_DELETES, 
FILE_B_DELETES));
+    assertThat(set.removeAll(ImmutableList.of(FILE_C_DELETES, FILE_D_DELETES, 
FILE_A_DELETES)))
+        .as("Set should have changed")
+        .isTrue();
+
+    assertThat(set).hasSize(1).containsExactly(FILE_B_DELETES);
+
+    set = DeleteFileSet.of(ImmutableList.of(FILE_A_DELETES, FILE_B_DELETES));
+    assertThat(set.removeAll(ImmutableList.of(FILE_C_DELETES, FILE_D_DELETES)))
+        .as("Set should not have changed")
+        .isFalse();
+
+    assertThat(set.removeAll(ImmutableList.of(FILE_B_DELETES, FILE_A_DELETES)))
+        .as("Set should have changed")
+        .isTrue();
+
+    assertThat(set).isEmpty();
+  }
+
+  @Test
+  public void equalsAndHashCode() {
+    DeleteFileSet set1 = DeleteFileSet.create();
+    DeleteFileSet set2 = DeleteFileSet.create();
+
+    assertThat(set1).isEqualTo(set2);
+    assertThat(set1.hashCode()).isEqualTo(set2.hashCode());
+
+    set1.add(FILE_A_DELETES);
+    set1.add(FILE_B_DELETES);
+    set1.add(FILE_C_DELETES);
+
+    // different DeleteFile instances but all use the same paths as set1
+    set2.add(
+        FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
+            .ofPositionDeletes()
+            .withPath(FILE_A_DELETES.location())
+            .withFileSizeInBytes(10)
+            .withRecordCount(1)
+            .build());
+    set2.add(
+        FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
+            .ofPositionDeletes()
+            .withPath(FILE_B_DELETES.location())
+            .withFileSizeInBytes(100)
+            .withRecordCount(10)
+            .build());
+    set2.add(
+        FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
+            .ofPositionDeletes()
+            .withPath(FILE_C_DELETES.location())
+            .withFileSizeInBytes(1000)
+            .withRecordCount(100)
+            .build());
+
+    Set<DeleteFile> set3 = Collections.unmodifiableSet(set2);
+
+    assertThat(set1).isEqualTo(set2).isEqualTo(set3);
+    
assertThat(set1.hashCode()).isEqualTo(set2.hashCode()).isEqualTo(set3.hashCode());
+  }
+
+  @Test
+  public void kryoSerialization() throws Exception {
+    DeleteFileSet deleteFiles =
+        DeleteFileSet.of(ImmutableList.of(FILE_C_DELETES, FILE_B_DELETES, 
FILE_A_DELETES));
+    
assertThat(TestHelpers.KryoHelpers.roundTripSerialize(deleteFiles)).isEqualTo(deleteFiles);
+  }
+
+  @Test
+  public void javaSerialization() throws Exception {
+    DeleteFileSet deleteFiles =
+        DeleteFileSet.of(ImmutableList.of(FILE_C_DELETES, FILE_B_DELETES, 
FILE_A_DELETES));
+    DeleteFileSet deserialize = 
TestHelpers.deserialize(TestHelpers.serialize(deleteFiles));
+    assertThat(deserialize).isEqualTo(deleteFiles);
+  }
+}


Reply via email to