This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new ed228f79cd Core: Fix NPE during conflict handling of NULL partitions
(#10680)
ed228f79cd is described below
commit ed228f79cd3e569e04af8a8ab411811803bf3a29
Author: boroknagyz <[email protected]>
AuthorDate: Fri Jul 12 14:25:39 2024 +0200
Core: Fix NPE during conflict handling of NULL partitions (#10680)
* Core: Fix NPE during conflict handling of NULL partitions
Partition values can be NULLs, or we can have NULLs because of
the VOID transforms. If a conflict is found in such partitions
we get a NullPointerException instead of a proper error message.
* Fix style issues
* Use String.valueOf()
* Reduce visibility of constant
Co-authored-by: Eduard Tudenhoefner <[email protected]>
* Indentation
* Update core/src/main/java/org/apache/iceberg/util/PartitionSet.java
---------
Co-authored-by: Fokko Driesprong <[email protected]>
Co-authored-by: Eduard Tudenhoefner <[email protected]>
---
.../java/org/apache/iceberg/util/PartitionSet.java | 2 +-
.../org/apache/iceberg/TestReplacePartitions.java | 77 ++++++++++++++++++++++
2 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/iceberg/util/PartitionSet.java
b/core/src/main/java/org/apache/iceberg/util/PartitionSet.java
index eff37fa5a9..184f38b324 100644
--- a/core/src/main/java/org/apache/iceberg/util/PartitionSet.java
+++ b/core/src/main/java/org/apache/iceberg/util/PartitionSet.java
@@ -200,7 +200,7 @@ public class PartitionSet extends AbstractSet<Pair<Integer,
StructLike>> {
StringBuilder partitionStringBuilder = new StringBuilder();
partitionStringBuilder.append(structType.fields().get(i).name());
partitionStringBuilder.append("=");
- partitionStringBuilder.append(s.get(i, Object.class).toString());
+ partitionStringBuilder.append(s.get(i, Object.class));
partitionDataJoiner.add(partitionStringBuilder.toString());
}
}
diff --git a/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java
b/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java
index a25920a1d7..6fa77ae05c 100644
--- a/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java
+++ b/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java
@@ -67,6 +67,34 @@ public class TestReplacePartitions extends TestBase {
.withRecordCount(1)
.build();
+ static final DataFile FILE_NULL_PARTITION =
+ DataFiles.builder(SPEC)
+ .withPath("/path/to/data-null-partition.parquet")
+ .withFileSizeInBytes(0)
+ .withPartitionPath("data_bucket=__HIVE_DEFAULT_PARTITION__")
+ .withRecordCount(0)
+ .build();
+
+ // Partition spec with VOID partition transform ("alwaysNull" in Java code.)
+ static final PartitionSpec SPEC_VOID =
+ PartitionSpec.builderFor(SCHEMA).alwaysNull("id").bucket("data",
BUCKETS_NUMBER).build();
+
+ static final DataFile FILE_A_VOID_PARTITION =
+ DataFiles.builder(SPEC_VOID)
+ .withPath("/path/to/data-a-void-partition.parquet")
+ .withFileSizeInBytes(10)
+
.withPartitionPath("id_null=__HIVE_DEFAULT_PARTITION__/data_bucket=0")
+ .withRecordCount(1)
+ .build();
+
+ static final DataFile FILE_B_VOID_PARTITION =
+ DataFiles.builder(SPEC_VOID)
+ .withPath("/path/to/data-b-void-partition.parquet")
+ .withFileSizeInBytes(10)
+
.withPartitionPath("id_null=__HIVE_DEFAULT_PARTITION__/data_bucket=1")
+ .withRecordCount(10)
+ .build();
+
static final DeleteFile FILE_UNPARTITIONED_A_DELETES =
FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
.ofPositionDeletes()
@@ -317,6 +345,55 @@ public class TestReplacePartitions extends TestBase {
+ "[data_bucket=0, data_bucket=1]: [/path/to/data-a.parquet]");
}
+ @TestTemplate
+ public void testValidateWithNullPartition() {
+ commit(table, table.newReplacePartitions().addFile(FILE_NULL_PARTITION),
branch);
+
+ // Concurrent Replace Partitions should fail with ValidationException
+ ReplacePartitions replace = table.newReplacePartitions();
+ assertThatThrownBy(
+ () ->
+ commit(
+ table,
+ replace
+ .addFile(FILE_NULL_PARTITION)
+ .addFile(FILE_B)
+ .validateNoConflictingData()
+ .validateNoConflictingDeletes(),
+ branch))
+ .isInstanceOf(ValidationException.class)
+ .hasMessage(
+ "Found conflicting files that can contain records matching
partitions "
+ + "[data_bucket=null, data_bucket=1]:
[/path/to/data-null-partition.parquet]");
+ }
+
+ @TestTemplate
+ public void testValidateWithVoidTransform() throws IOException {
+ File tableDir = Files.createTempDirectory(temp, "junit").toFile();
+ assertThat(tableDir.delete()).isTrue();
+
+ Table tableVoid = TestTables.create(tableDir, "tablevoid", SCHEMA,
SPEC_VOID, formatVersion);
+ commit(tableVoid,
tableVoid.newReplacePartitions().addFile(FILE_A_VOID_PARTITION), branch);
+
+ // Concurrent Replace Partitions should fail with ValidationException
+ ReplacePartitions replace = tableVoid.newReplacePartitions();
+ assertThatThrownBy(
+ () ->
+ commit(
+ tableVoid,
+ replace
+ .addFile(FILE_A_VOID_PARTITION)
+ .addFile(FILE_B_VOID_PARTITION)
+ .validateNoConflictingData()
+ .validateNoConflictingDeletes(),
+ branch))
+ .isInstanceOf(ValidationException.class)
+ .hasMessage(
+ "Found conflicting files that can contain records matching
partitions "
+ + "[id_null=null, data_bucket=1, id_null=null, data_bucket=0]:
"
+ + "[/path/to/data-a-void-partition.parquet]");
+ }
+
@TestTemplate
public void testConcurrentReplaceConflict() {
commit(table, table.newFastAppend().appendFile(FILE_A).appendFile(FILE_B),
branch);