This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 63392d9 Core: Use CharSequenceSet instead of Set<CharSequence> (#2712)
63392d9 is described below
commit 63392d95848b21b32e6619f500e87c5342824f08
Author: Eduard Tudenhöfner <[email protected]>
AuthorDate: Tue Jun 22 18:42:23 2021 +0200
Core: Use CharSequenceSet instead of Set<CharSequence> (#2712)
This also adds a test that makes sure a CharSequenceSet doesn't suffer
from undefined equality behavior as reported by
https://errorprone.info/bugpattern/CollectionUndefinedEquality
---
.../org/apache/iceberg/util/CharSequenceSet.java | 5 +-
.../apache/iceberg/util/TestCharSequenceSet.java | 53 ++++++++++++++++++++++
.../main/java/org/apache/iceberg/BaseRowDelta.java | 3 +-
.../org/apache/iceberg/ManifestFilterManager.java | 11 +++--
.../apache/iceberg/MergingSnapshotProducer.java | 4 +-
.../iceberg/deletes/PositionDeleteWriter.java | 5 +-
.../java/org/apache/iceberg/io/BaseTaskWriter.java | 3 +-
.../apache/iceberg/io/SortedPosDeleteWriter.java | 5 +-
.../java/org/apache/iceberg/io/WriteResult.java | 5 +-
.../org/apache/iceberg/data/DeleteReadTests.java | 5 +-
.../java/org/apache/iceberg/data/FileHelpers.java | 6 +--
.../data/TestDataFileIndexStatsFilters.java | 6 +--
12 files changed, 83 insertions(+), 28 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
index 5148191..80c8284 100644
--- a/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
+++ b/api/src/main/java/org/apache/iceberg/util/CharSequenceSet.java
@@ -32,11 +32,11 @@ public class CharSequenceSet implements Set<CharSequence>,
Serializable {
private static final ThreadLocal<CharSequenceWrapper> wrappers =
ThreadLocal.withInitial(
() -> CharSequenceWrapper.wrap(null));
- public static Set<CharSequence> of(Iterable<CharSequence> charSequences) {
+ public static CharSequenceSet of(Iterable<CharSequence> charSequences) {
return new CharSequenceSet(charSequences);
}
- public static Set<CharSequence> empty() {
+ public static CharSequenceSet empty() {
return new CharSequenceSet(ImmutableList.of());
}
@@ -116,6 +116,7 @@ public class CharSequenceSet implements Set<CharSequence>,
Serializable {
}
@Override
+ @SuppressWarnings("CollectionUndefinedEquality")
public boolean containsAll(Collection<?> objects) {
if (objects != null) {
return Iterables.all(objects, this::contains);
diff --git a/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
new file mode 100644
index 0000000..d208de2
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/util/TestCharSequenceSet.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.iceberg.util;
+
+import java.util.Arrays;
+import java.util.Set;
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+
+public class TestCharSequenceSet {
+
+ // This test just verifies
https://errorprone.info/bugpattern/CollectionUndefinedEquality
+ @Test
+ public void testSearchingInCharSequenceCollection() {
+ Set<CharSequence> set = CharSequenceSet.of(Arrays.asList("abc", new
StringBuffer("def")));
+ Assertions.assertThat(set).contains("abc");
+ Assertions.assertThat(set.stream().anyMatch("def"::contains)).isTrue();
+
+ // this would fail with a normal Set<CharSequence>
+ Assertions.assertThat(set.contains("def")).isTrue();
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/BaseRowDelta.java
b/core/src/main/java/org/apache/iceberg/BaseRowDelta.java
index 7a86df2..485609f 100644
--- a/core/src/main/java/org/apache/iceberg/BaseRowDelta.java
+++ b/core/src/main/java/org/apache/iceberg/BaseRowDelta.java
@@ -19,14 +19,13 @@
package org.apache.iceberg;
-import java.util.Set;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.util.CharSequenceSet;
class BaseRowDelta extends MergingSnapshotProducer<RowDelta> implements
RowDelta {
private Long startingSnapshotId = null; // check all versions by default
- private final Set<CharSequence> referencedDataFiles =
CharSequenceSet.empty();
+ private final CharSequenceSet referencedDataFiles = CharSequenceSet.empty();
private boolean validateDeletes = false;
private Expression conflictDetectionFilter = null;
private boolean caseSensitive = true;
diff --git a/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java
b/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java
index 506d257..a81fd28 100644
--- a/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java
+++ b/core/src/main/java/org/apache/iceberg/ManifestFilterManager.java
@@ -68,7 +68,7 @@ abstract class ManifestFilterManager<F extends
ContentFile<F>> {
private final Map<Integer, PartitionSpec> specsById;
private final PartitionSet deleteFilePartitions;
private final PartitionSet dropPartitions;
- private final Set<CharSequence> deletePaths = CharSequenceSet.empty();
+ private final CharSequenceSet deletePaths = CharSequenceSet.empty();
private Expression deleteExpression = Expressions.alwaysFalse();
private long minSequenceNumber = 0;
private boolean hasPathOnlyDeletes = false;
@@ -216,17 +216,18 @@ abstract class ManifestFilterManager<F extends
ContentFile<F>> {
*
* @param manifests a set of filtered manifests
*/
+ @SuppressWarnings("CollectionUndefinedEquality")
private void validateRequiredDeletes(ManifestFile... manifests) {
if (failMissingDeletePaths) {
- Set<CharSequence> deletedFiles = deletedFiles(manifests);
+ CharSequenceSet deletedFiles = deletedFiles(manifests);
ValidationException.check(deletedFiles.containsAll(deletePaths),
"Missing required files to delete: %s",
COMMA.join(Iterables.filter(deletePaths, path ->
!deletedFiles.contains(path))));
}
}
- private Set<CharSequence> deletedFiles(ManifestFile[] manifests) {
- Set<CharSequence> deletedFiles = CharSequenceSet.empty();
+ private CharSequenceSet deletedFiles(ManifestFile[] manifests) {
+ CharSequenceSet deletedFiles = CharSequenceSet.empty();
if (manifests != null) {
for (ManifestFile manifest : manifests) {
@@ -337,6 +338,7 @@ abstract class ManifestFilterManager<F extends
ContentFile<F>> {
return canContainExpressionDeletes || canContainDroppedPartitions ||
canContainDroppedFiles || canContainDropBySeq;
}
+ @SuppressWarnings("CollectionUndefinedEquality")
private boolean manifestHasDeletedFiles(
StrictMetricsEvaluator metricsEvaluator, ManifestReader<F> reader) {
boolean isDelete = reader.isDeleteManifestReader();
@@ -364,6 +366,7 @@ abstract class ManifestFilterManager<F extends
ContentFile<F>> {
return hasDeletedFiles;
}
+ @SuppressWarnings("CollectionUndefinedEquality")
private ManifestFile filterManifestWithDeletedFiles(
StrictMetricsEvaluator metricsEvaluator, ManifestFile manifest,
ManifestReader<F> reader) {
boolean isDelete = reader.isDeleteManifestReader();
diff --git a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
index 45e4326..f5c61f5 100644
--- a/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
+++ b/core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
@@ -40,6 +40,7 @@ import
org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Iterators;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.util.CharSequenceSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -296,8 +297,9 @@ abstract class MergingSnapshotProducer<ThisT> extends
SnapshotProducer<ThisT> {
}
}
+ @SuppressWarnings("CollectionUndefinedEquality")
protected void validateDataFilesExist(TableMetadata base, Long
startingSnapshotId,
- Set<CharSequence> requiredDataFiles,
boolean skipDeletes) {
+ CharSequenceSet requiredDataFiles,
boolean skipDeletes) {
// if there is no current table state, no files have been removed
if (base.currentSnapshot() == null) {
return;
diff --git
a/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java
b/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java
index 7bacdc0..8c5eecf 100644
--- a/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java
+++ b/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java
@@ -22,7 +22,6 @@ package org.apache.iceberg.deletes;
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.Set;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.FileMetadata;
@@ -41,7 +40,7 @@ public class PositionDeleteWriter<T> implements Closeable {
private final StructLike partition;
private final ByteBuffer keyMetadata;
private final PositionDelete<T> delete;
- private final Set<CharSequence> pathSet;
+ private final CharSequenceSet pathSet;
private DeleteFile deleteFile = null;
public PositionDeleteWriter(FileAppender<StructLike> appender, FileFormat
format, String location,
@@ -81,7 +80,7 @@ public class PositionDeleteWriter<T> implements Closeable {
}
}
- public Set<CharSequence> referencedDataFiles() {
+ public CharSequenceSet referencedDataFiles() {
return pathSet;
}
diff --git a/core/src/main/java/org/apache/iceberg/io/BaseTaskWriter.java
b/core/src/main/java/org/apache/iceberg/io/BaseTaskWriter.java
index 0ba8de1..d787c7c 100644
--- a/core/src/main/java/org/apache/iceberg/io/BaseTaskWriter.java
+++ b/core/src/main/java/org/apache/iceberg/io/BaseTaskWriter.java
@@ -23,7 +23,6 @@ import java.io.Closeable;
import java.io.IOException;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.FileFormat;
@@ -44,7 +43,7 @@ import org.apache.iceberg.util.Tasks;
public abstract class BaseTaskWriter<T> implements TaskWriter<T> {
private final List<DataFile> completedDataFiles = Lists.newArrayList();
private final List<DeleteFile> completedDeleteFiles = Lists.newArrayList();
- private final Set<CharSequence> referencedDataFiles =
CharSequenceSet.empty();
+ private final CharSequenceSet referencedDataFiles = CharSequenceSet.empty();
private final PartitionSpec spec;
private final FileFormat format;
diff --git
a/core/src/main/java/org/apache/iceberg/io/SortedPosDeleteWriter.java
b/core/src/main/java/org/apache/iceberg/io/SortedPosDeleteWriter.java
index 03a8040..0da2057 100644
--- a/core/src/main/java/org/apache/iceberg/io/SortedPosDeleteWriter.java
+++ b/core/src/main/java/org/apache/iceberg/io/SortedPosDeleteWriter.java
@@ -25,7 +25,6 @@ import java.io.UncheckedIOException;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.StructLike;
@@ -42,7 +41,7 @@ class SortedPosDeleteWriter<T> implements Closeable {
private final Map<CharSequenceWrapper, List<PosRow<T>>> posDeletes =
Maps.newHashMap();
private final List<DeleteFile> completedFiles = Lists.newArrayList();
- private final Set<CharSequence> referencedDataFiles =
CharSequenceSet.empty();
+ private final CharSequenceSet referencedDataFiles = CharSequenceSet.empty();
private final CharSequenceWrapper wrapper = CharSequenceWrapper.wrap(null);
private final FileAppenderFactory<T> appenderFactory;
@@ -98,7 +97,7 @@ class SortedPosDeleteWriter<T> implements Closeable {
return completedFiles;
}
- public Set<CharSequence> referencedDataFiles() {
+ public CharSequenceSet referencedDataFiles() {
return referencedDataFiles;
}
diff --git a/core/src/main/java/org/apache/iceberg/io/WriteResult.java
b/core/src/main/java/org/apache/iceberg/io/WriteResult.java
index e620a61..100a37c 100644
--- a/core/src/main/java/org/apache/iceberg/io/WriteResult.java
+++ b/core/src/main/java/org/apache/iceberg/io/WriteResult.java
@@ -22,7 +22,6 @@ package org.apache.iceberg.io;
import java.io.Serializable;
import java.util.Collections;
import java.util.List;
-import java.util.Set;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
@@ -36,7 +35,7 @@ public class WriteResult implements Serializable {
private WriteResult(List<DataFile> dataFiles,
List<DeleteFile> deleteFiles,
- Set<CharSequence> referencedDataFiles) {
+ CharSequenceSet referencedDataFiles) {
this.dataFiles = dataFiles.toArray(new DataFile[0]);
this.deleteFiles = deleteFiles.toArray(new DeleteFile[0]);
this.referencedDataFiles = referencedDataFiles.toArray(new
CharSequence[0]);
@@ -61,7 +60,7 @@ public class WriteResult implements Serializable {
public static class Builder {
private final List<DataFile> dataFiles;
private final List<DeleteFile> deleteFiles;
- private final Set<CharSequence> referencedDataFiles;
+ private final CharSequenceSet referencedDataFiles;
private Builder() {
this.dataFiles = Lists.newArrayList();
diff --git a/data/src/test/java/org/apache/iceberg/data/DeleteReadTests.java
b/data/src/test/java/org/apache/iceberg/data/DeleteReadTests.java
index 4c58e56..70ac774 100644
--- a/data/src/test/java/org/apache/iceberg/data/DeleteReadTests.java
+++ b/data/src/test/java/org/apache/iceberg/data/DeleteReadTests.java
@@ -33,6 +33,7 @@ import
org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.ArrayUtil;
+import org.apache.iceberg.util.CharSequenceSet;
import org.apache.iceberg.util.Pair;
import org.apache.iceberg.util.StructLikeSet;
import org.apache.iceberg.util.StructProjection;
@@ -193,7 +194,7 @@ public abstract class DeleteReadTests {
Pair.of(dataFile.path(), 6L) // id = 122
);
- Pair<DeleteFile, Set<CharSequence>> posDeletes =
FileHelpers.writeDeleteFile(
+ Pair<DeleteFile, CharSequenceSet> posDeletes = FileHelpers.writeDeleteFile(
table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
table.newRowDelta()
@@ -225,7 +226,7 @@ public abstract class DeleteReadTests {
Pair.of(dataFile.path(), 5L) // id = 121
);
- Pair<DeleteFile, Set<CharSequence>> posDeletes =
FileHelpers.writeDeleteFile(
+ Pair<DeleteFile, CharSequenceSet> posDeletes = FileHelpers.writeDeleteFile(
table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
table.newRowDelta()
diff --git a/data/src/test/java/org/apache/iceberg/data/FileHelpers.java
b/data/src/test/java/org/apache/iceberg/data/FileHelpers.java
index 64488e7..362f846 100644
--- a/data/src/test/java/org/apache/iceberg/data/FileHelpers.java
+++ b/data/src/test/java/org/apache/iceberg/data/FileHelpers.java
@@ -22,7 +22,6 @@ package org.apache.iceberg.data;
import java.io.Closeable;
import java.io.IOException;
import java.util.List;
-import java.util.Set;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.DeleteFile;
@@ -37,19 +36,20 @@ import org.apache.iceberg.io.FileAppender;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.CharSequenceSet;
import org.apache.iceberg.util.Pair;
public class FileHelpers {
private FileHelpers() {
}
- public static Pair<DeleteFile, Set<CharSequence>> writeDeleteFile(Table
table, OutputFile out,
+ public static Pair<DeleteFile, CharSequenceSet> writeDeleteFile(Table table,
OutputFile out,
List<Pair<CharSequence, Long>> deletes)
throws IOException {
return writeDeleteFile(table, out, null, deletes);
}
- public static Pair<DeleteFile, Set<CharSequence>> writeDeleteFile(Table
table, OutputFile out, StructLike partition,
+ public static Pair<DeleteFile, CharSequenceSet> writeDeleteFile(Table table,
OutputFile out, StructLike partition,
List<Pair<CharSequence, Long>> deletes)
throws IOException {
PositionDeleteWriter<?> writer = Parquet.writeDeletes(out)
diff --git
a/data/src/test/java/org/apache/iceberg/data/TestDataFileIndexStatsFilters.java
b/data/src/test/java/org/apache/iceberg/data/TestDataFileIndexStatsFilters.java
index 8b1eeec..9a3a042 100644
---
a/data/src/test/java/org/apache/iceberg/data/TestDataFileIndexStatsFilters.java
+++
b/data/src/test/java/org/apache/iceberg/data/TestDataFileIndexStatsFilters.java
@@ -22,7 +22,6 @@ package org.apache.iceberg.data;
import java.io.File;
import java.io.IOException;
import java.util.List;
-import java.util.Set;
import java.util.stream.Collectors;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
@@ -35,6 +34,7 @@ import org.apache.iceberg.TestTables;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.CharSequenceSet;
import org.apache.iceberg.util.Pair;
import org.junit.After;
import org.junit.Assert;
@@ -97,7 +97,7 @@ public class TestDataFileIndexStatsFilters {
deletes.add(Pair.of(dataFile.path(), 0L));
deletes.add(Pair.of(dataFile.path(), 1L));
- Pair<DeleteFile, Set<CharSequence>> posDeletes =
FileHelpers.writeDeleteFile(
+ Pair<DeleteFile, CharSequenceSet> posDeletes = FileHelpers.writeDeleteFile(
table, Files.localOutput(temp.newFile()), deletes);
table.newRowDelta()
.addDeletes(posDeletes.first())
@@ -124,7 +124,7 @@ public class TestDataFileIndexStatsFilters {
deletes.add(Pair.of("some-other-file.parquet", 0L));
deletes.add(Pair.of("some-other-file.parquet", 1L));
- Pair<DeleteFile, Set<CharSequence>> posDeletes =
FileHelpers.writeDeleteFile(
+ Pair<DeleteFile, CharSequenceSet> posDeletes = FileHelpers.writeDeleteFile(
table, Files.localOutput(temp.newFile()), deletes);
table.newRowDelta()
.addDeletes(posDeletes.first())