This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 63392d9  Core: Use CharSequenceSet instead of Set<CharSequence> (#2712)
63392d9 is described below

commit 63392d95848b21b32e6619f500e87c5342824f08
Author: Eduard Tudenhöfner <[email protected]>
AuthorDate: Tue Jun 22 18:42:23 2021 +0200

    Core: Use CharSequenceSet instead of Set<CharSequence> (#2712)
    
    This also adds a test that makes sure a CharSequenceSet doesn't suffer
    from undefined equality behavior as reported by 
https://errorprone.info/bugpattern/CollectionUndefinedEquality
---
 .../org/apache/iceberg/util/CharSequenceSet.java   |  5 +-
 .../apache/iceberg/util/TestCharSequenceSet.java   | 53 ++++++++++++++++++++++
 .../main/java/org/apache/iceberg/BaseRowDelta.java |  3 +-
 .../org/apache/iceberg/ManifestFilterManager.java  | 11 +++--
 .../apache/iceberg/MergingSnapshotProducer.java    |  4 +-
 .../iceberg/deletes/PositionDeleteWriter.java      |  5 +-
 .../java/org/apache/iceberg/io/BaseTaskWriter.java |  3 +-
 .../apache/iceberg/io/SortedPosDeleteWriter.java   |  5 +-
 .../java/org/apache/iceberg/io/WriteResult.java    |  5 +-
 .../org/apache/iceberg/data/DeleteReadTests.java   |  5 +-
 .../java/org/apache/iceberg/data/FileHelpers.java  |  6 +--
 .../data/TestDataFileIndexStatsFilters.java        |  6 +--
 12 files changed, 83 insertions(+), 28 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java 
b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
index 5148191..80c8284 100644
--- a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
+++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
@@ -32,11 +32,11 @@ public class CharSequenceSet implements Set<CharSequence>, 
Serializable {
   private static final ThreadLocal<CharSequenceWrapper> wrappers = 
ThreadLocal.withInitial(
       () -> CharSequenceWrapper.wrap(null));
 
-  public static Set<CharSequence> of(Iterable<CharSequence> charSequences) {
+  public static CharSequenceSet of(Iterable<CharSequence> charSequences) {
     return new CharSequenceSet(charSequences);
   }
 
-  public static Set<CharSequence> empty() {
+  public static CharSequenceSet empty() {
     return new CharSequenceSet(ImmutableList.of());
   }
 
@@ -116,6 +116,7 @@ public class CharSequenceSet implements Set<CharSequence>, 
Serializable {
   }
 
   @Override
+  @SuppressWarnings("CollectionUndefinedEquality")
   public boolean containsAll(Collection<?> objects) {
     if (objects != null) {
       return Iterables.all(objects, this::contains);
diff --git a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java 
b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
new file mode 100644
index 0000000..d208de2
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.iceberg.util;
+
+import java.util.Arrays;
+import java.util.Set;
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+
+public class TestCharSequenceSet {
+
+  // This test just verifies 
https://errorprone.info/bugpattern/CollectionUndefinedEquality
+  @Test
+  public void testSearchingInCharSequenceCollection() {
+    Set<CharSequence> set = CharSequenceSet.of(Arrays.asList("abc", new 
StringBuffer("def")));
+    Assertions.assertThat(set).contains("abc");
+    Assertions.assertThat(set.stream().anyMatch("def"::contains)).isTrue();
+
+    // this would fail with a normal Set<CharSequence>
+    Assertions.assertThat(set.contains("def")).isTrue();
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/BaseRowDelta.java 
b/core/src/main/java/org/apache/iceberg/BaseRowDelta.java
index 7a86df2..485609f 100644
--- a/core/src/main/java/org/apache/iceberg/BaseRowDelta.java
+++ b/core/src/main/java/org/apache/iceberg/BaseRowDelta.java
@@ -19,14 +19,13 @@
 
 package org.apache.iceberg;
 
-import java.util.Set;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.util.CharSequenceSet;
 
 class BaseRowDelta extends MergingSnapshotProducer<RowDelta> implements 
RowDelta {
   private Long startingSnapshotId = null; // check all versions by default
-  private final Set<CharSequence> referencedDataFiles = 
CharSequenceSet.empty();
+  private final CharSequenceSet referencedDataFiles = CharSequenceSet.empty();
   private boolean validateDeletes = false;
   private Expression conflictDetectionFilter = null;
   private boolean caseSensitive = true;
diff --git a/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java 
b/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java
index 506d257..a81fd28 100644
--- a/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java
+++ b/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java
@@ -68,7 +68,7 @@ abstract class ManifestFilterManager<F extends 
ContentFile<F>> {
   private final Map<Integer, PartitionSpec> specsById;
   private final PartitionSet deleteFilePartitions;
   private final PartitionSet dropPartitions;
-  private final Set<CharSequence> deletePaths = CharSequenceSet.empty();
+  private final CharSequenceSet deletePaths = CharSequenceSet.empty();
   private Expression deleteExpression = Expressions.alwaysFalse();
   private long minSequenceNumber = 0;
   private boolean hasPathOnlyDeletes = false;
@@ -216,17 +216,18 @@ abstract class ManifestFilterManager<F extends 
ContentFile<F>> {
    *
    * @param manifests a set of filtered manifests
    */
+  @SuppressWarnings("CollectionUndefinedEquality")
   private void validateRequiredDeletes(ManifestFile... manifests) {
     if (failMissingDeletePaths) {
-      Set<CharSequence> deletedFiles = deletedFiles(manifests);
+      CharSequenceSet deletedFiles = deletedFiles(manifests);
       ValidationException.check(deletedFiles.containsAll(deletePaths),
           "Missing required files to delete: %s",
           COMMA.join(Iterables.filter(deletePaths, path -> 
!deletedFiles.contains(path))));
     }
   }
 
-  private Set<CharSequence> deletedFiles(ManifestFile[] manifests) {
-    Set<CharSequence> deletedFiles = CharSequenceSet.empty();
+  private CharSequenceSet deletedFiles(ManifestFile[] manifests) {
+    CharSequenceSet deletedFiles = CharSequenceSet.empty();
 
     if (manifests != null) {
       for (ManifestFile manifest : manifests) {
@@ -337,6 +338,7 @@ abstract class ManifestFilterManager<F extends 
ContentFile<F>> {
     return canContainExpressionDeletes || canContainDroppedPartitions || 
canContainDroppedFiles || canContainDropBySeq;
   }
 
+  @SuppressWarnings("CollectionUndefinedEquality")
   private boolean manifestHasDeletedFiles(
       StrictMetricsEvaluator metricsEvaluator, ManifestReader<F> reader) {
     boolean isDelete = reader.isDeleteManifestReader();
@@ -364,6 +366,7 @@ abstract class ManifestFilterManager<F extends 
ContentFile<F>> {
     return hasDeletedFiles;
   }
 
+  @SuppressWarnings("CollectionUndefinedEquality")
   private ManifestFile filterManifestWithDeletedFiles(
       StrictMetricsEvaluator metricsEvaluator, ManifestFile manifest, 
ManifestReader<F> reader) {
     boolean isDelete = reader.isDeleteManifestReader();
diff --git a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java 
b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
index 45e4326..f5c61f5 100644
--- a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
+++ b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
@@ -40,6 +40,7 @@ import 
org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterators;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.util.CharSequenceSet;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -296,8 +297,9 @@ abstract class MergingSnapshotProducer<ThisT> extends 
SnapshotProducer<ThisT> {
     }
   }
 
+  @SuppressWarnings("CollectionUndefinedEquality")
   protected void validateDataFilesExist(TableMetadata base, Long 
startingSnapshotId,
-                                        Set<CharSequence> requiredDataFiles, 
boolean skipDeletes) {
+                                        CharSequenceSet requiredDataFiles, 
boolean skipDeletes) {
     // if there is no current table state, no files have been removed
     if (base.currentSnapshot() == null) {
       return;
diff --git 
a/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java 
b/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java
index 7bacdc0..8c5eecf 100644
--- a/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java
+++ b/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java
@@ -22,7 +22,6 @@ package org.apache.iceberg.deletes;
 import java.io.Closeable;
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.util.Set;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.FileMetadata;
@@ -41,7 +40,7 @@ public class PositionDeleteWriter<T> implements Closeable {
   private final StructLike partition;
   private final ByteBuffer keyMetadata;
   private final PositionDelete<T> delete;
-  private final Set<CharSequence> pathSet;
+  private final CharSequenceSet pathSet;
   private DeleteFile deleteFile = null;
 
   public PositionDeleteWriter(FileAppender<StructLike> appender, FileFormat 
format, String location,
@@ -81,7 +80,7 @@ public class PositionDeleteWriter<T> implements Closeable {
     }
   }
 
-  public Set<CharSequence> referencedDataFiles() {
+  public CharSequenceSet referencedDataFiles() {
     return pathSet;
   }
 
diff --git a/core/src/main/java/org/apache/iceberg/io/BaseTaskWriter.java 
b/core/src/main/java/org/apache/iceberg/io/BaseTaskWriter.java
index 0ba8de1..d787c7c 100644
--- a/core/src/main/java/org/apache/iceberg/io/BaseTaskWriter.java
+++ b/core/src/main/java/org/apache/iceberg/io/BaseTaskWriter.java
@@ -23,7 +23,6 @@ import java.io.Closeable;
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.FileFormat;
@@ -44,7 +43,7 @@ import org.apache.iceberg.util.Tasks;
 public abstract class BaseTaskWriter<T> implements TaskWriter<T> {
   private final List<DataFile> completedDataFiles = Lists.newArrayList();
   private final List<DeleteFile> completedDeleteFiles = Lists.newArrayList();
-  private final Set<CharSequence> referencedDataFiles = 
CharSequenceSet.empty();
+  private final CharSequenceSet referencedDataFiles = CharSequenceSet.empty();
 
   private final PartitionSpec spec;
   private final FileFormat format;
diff --git 
a/core/src/main/java/org/apache/iceberg/io/SortedPosDeleteWriter.java 
b/core/src/main/java/org/apache/iceberg/io/SortedPosDeleteWriter.java
index 03a8040..0da2057 100644
--- a/core/src/main/java/org/apache/iceberg/io/SortedPosDeleteWriter.java
+++ b/core/src/main/java/org/apache/iceberg/io/SortedPosDeleteWriter.java
@@ -25,7 +25,6 @@ import java.io.UncheckedIOException;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.StructLike;
@@ -42,7 +41,7 @@ class SortedPosDeleteWriter<T> implements Closeable {
 
   private final Map<CharSequenceWrapper, List<PosRow<T>>> posDeletes = 
Maps.newHashMap();
   private final List<DeleteFile> completedFiles = Lists.newArrayList();
-  private final Set<CharSequence> referencedDataFiles = 
CharSequenceSet.empty();
+  private final CharSequenceSet referencedDataFiles = CharSequenceSet.empty();
   private final CharSequenceWrapper wrapper = CharSequenceWrapper.wrap(null);
 
   private final FileAppenderFactory<T> appenderFactory;
@@ -98,7 +97,7 @@ class SortedPosDeleteWriter<T> implements Closeable {
     return completedFiles;
   }
 
-  public Set<CharSequence> referencedDataFiles() {
+  public CharSequenceSet referencedDataFiles() {
     return referencedDataFiles;
   }
 
diff --git a/core/src/main/java/org/apache/iceberg/io/WriteResult.java 
b/core/src/main/java/org/apache/iceberg/io/WriteResult.java
index e620a61..100a37c 100644
--- a/core/src/main/java/org/apache/iceberg/io/WriteResult.java
+++ b/core/src/main/java/org/apache/iceberg/io/WriteResult.java
@@ -22,7 +22,6 @@ package org.apache.iceberg.io;
 import java.io.Serializable;
 import java.util.Collections;
 import java.util.List;
-import java.util.Set;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
@@ -36,7 +35,7 @@ public class WriteResult implements Serializable {
 
   private WriteResult(List<DataFile> dataFiles,
                       List<DeleteFile> deleteFiles,
-                      Set<CharSequence> referencedDataFiles) {
+                      CharSequenceSet referencedDataFiles) {
     this.dataFiles = dataFiles.toArray(new DataFile[0]);
     this.deleteFiles = deleteFiles.toArray(new DeleteFile[0]);
     this.referencedDataFiles = referencedDataFiles.toArray(new 
CharSequence[0]);
@@ -61,7 +60,7 @@ public class WriteResult implements Serializable {
   public static class Builder {
     private final List<DataFile> dataFiles;
     private final List<DeleteFile> deleteFiles;
-    private final Set<CharSequence> referencedDataFiles;
+    private final CharSequenceSet referencedDataFiles;
 
     private Builder() {
       this.dataFiles = Lists.newArrayList();
diff --git a/data/src/test/java/org/apache/iceberg/data/DeleteReadTests.java 
b/data/src/test/java/org/apache/iceberg/data/DeleteReadTests.java
index 4c58e56..70ac774 100644
--- a/data/src/test/java/org/apache/iceberg/data/DeleteReadTests.java
+++ b/data/src/test/java/org/apache/iceberg/data/DeleteReadTests.java
@@ -33,6 +33,7 @@ import 
org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.util.ArrayUtil;
+import org.apache.iceberg.util.CharSequenceSet;
 import org.apache.iceberg.util.Pair;
 import org.apache.iceberg.util.StructLikeSet;
 import org.apache.iceberg.util.StructProjection;
@@ -193,7 +194,7 @@ public abstract class DeleteReadTests {
         Pair.of(dataFile.path(), 6L) // id = 122
     );
 
-    Pair<DeleteFile, Set<CharSequence>> posDeletes = 
FileHelpers.writeDeleteFile(
+    Pair<DeleteFile, CharSequenceSet> posDeletes = FileHelpers.writeDeleteFile(
         table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
 
     table.newRowDelta()
@@ -225,7 +226,7 @@ public abstract class DeleteReadTests {
         Pair.of(dataFile.path(), 5L) // id = 121
     );
 
-    Pair<DeleteFile, Set<CharSequence>> posDeletes = 
FileHelpers.writeDeleteFile(
+    Pair<DeleteFile, CharSequenceSet> posDeletes = FileHelpers.writeDeleteFile(
         table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
 
     table.newRowDelta()
diff --git a/data/src/test/java/org/apache/iceberg/data/FileHelpers.java 
b/data/src/test/java/org/apache/iceberg/data/FileHelpers.java
index 64488e7..362f846 100644
--- a/data/src/test/java/org/apache/iceberg/data/FileHelpers.java
+++ b/data/src/test/java/org/apache/iceberg/data/FileHelpers.java
@@ -22,7 +22,6 @@ package org.apache.iceberg.data;
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.List;
-import java.util.Set;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DataFiles;
 import org.apache.iceberg.DeleteFile;
@@ -37,19 +36,20 @@ import org.apache.iceberg.io.FileAppender;
 import org.apache.iceberg.io.OutputFile;
 import org.apache.iceberg.parquet.Parquet;
 import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.CharSequenceSet;
 import org.apache.iceberg.util.Pair;
 
 public class FileHelpers {
   private FileHelpers() {
   }
 
-  public static Pair<DeleteFile, Set<CharSequence>> writeDeleteFile(Table 
table, OutputFile out,
+  public static Pair<DeleteFile, CharSequenceSet> writeDeleteFile(Table table, 
OutputFile out,
                                                                     
List<Pair<CharSequence, Long>> deletes)
       throws IOException {
     return writeDeleteFile(table, out, null, deletes);
   }
 
-  public static Pair<DeleteFile, Set<CharSequence>> writeDeleteFile(Table 
table, OutputFile out, StructLike partition,
+  public static Pair<DeleteFile, CharSequenceSet> writeDeleteFile(Table table, 
OutputFile out, StructLike partition,
                                                                     
List<Pair<CharSequence, Long>> deletes)
       throws IOException {
     PositionDeleteWriter<?> writer = Parquet.writeDeletes(out)
diff --git 
a/data/src/test/java/org/apache/iceberg/data/TestDataFileIndexStatsFilters.java 
b/data/src/test/java/org/apache/iceberg/data/TestDataFileIndexStatsFilters.java
index 8b1eeec..9a3a042 100644
--- 
a/data/src/test/java/org/apache/iceberg/data/TestDataFileIndexStatsFilters.java
+++ 
b/data/src/test/java/org/apache/iceberg/data/TestDataFileIndexStatsFilters.java
@@ -22,7 +22,6 @@ package org.apache.iceberg.data;
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
-import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DeleteFile;
@@ -35,6 +34,7 @@ import org.apache.iceberg.TestTables;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.CharSequenceSet;
 import org.apache.iceberg.util.Pair;
 import org.junit.After;
 import org.junit.Assert;
@@ -97,7 +97,7 @@ public class TestDataFileIndexStatsFilters {
     deletes.add(Pair.of(dataFile.path(), 0L));
     deletes.add(Pair.of(dataFile.path(), 1L));
 
-    Pair<DeleteFile, Set<CharSequence>> posDeletes = 
FileHelpers.writeDeleteFile(
+    Pair<DeleteFile, CharSequenceSet> posDeletes = FileHelpers.writeDeleteFile(
         table, Files.localOutput(temp.newFile()), deletes);
     table.newRowDelta()
         .addDeletes(posDeletes.first())
@@ -124,7 +124,7 @@ public class TestDataFileIndexStatsFilters {
     deletes.add(Pair.of("some-other-file.parquet", 0L));
     deletes.add(Pair.of("some-other-file.parquet", 1L));
 
-    Pair<DeleteFile, Set<CharSequence>> posDeletes = 
FileHelpers.writeDeleteFile(
+    Pair<DeleteFile, CharSequenceSet> posDeletes = FileHelpers.writeDeleteFile(
         table, Files.localOutput(temp.newFile()), deletes);
     table.newRowDelta()
         .addDeletes(posDeletes.first())

Reply via email to