This is an automated email from the ASF dual-hosted git repository.

russellspitzer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 91e04c9c88 API: Add compatibility checks for Schemas with default 
values (#11434)
91e04c9c88 is described below

commit 91e04c9c88b63dc01d6c8e69dfdc8cd27ee811cc
Author: Ryan Blue <[email protected]>
AuthorDate: Wed Oct 30 14:58:25 2024 -0700

    API: Add compatibility checks for Schemas with default values (#11434)
    
    
    Co-authored-by: Russell Spitzer <[email protected]>
    Co-authored-by: Fokko Driesprong <[email protected]>
---
 api/src/main/java/org/apache/iceberg/Schema.java   |  38 +++++--
 .../test/java/org/apache/iceberg/TestSchema.java   | 111 +++++++++++++++++++++
 .../java/org/apache/iceberg/TestTableMetadata.java |   3 +-
 3 files changed, 143 insertions(+), 9 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/Schema.java 
b/api/src/main/java/org/apache/iceberg/Schema.java
index 9bcf691f5a..44f65ff56a 100644
--- a/api/src/main/java/org/apache/iceberg/Schema.java
+++ b/api/src/main/java/org/apache/iceberg/Schema.java
@@ -54,6 +54,7 @@ public class Schema implements Serializable {
   private static final Joiner NEWLINE = Joiner.on('\n');
   private static final String ALL_COLUMNS = "*";
   private static final int DEFAULT_SCHEMA_ID = 0;
+  private static final int DEFAULT_VALUES_MIN_FORMAT_VERSION = 3;
   private static final Map<Type.TypeID, Integer> MIN_FORMAT_VERSIONS =
       ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3);
 
@@ -586,16 +587,37 @@ public class Schema implements Serializable {
    * @param formatVersion table format version
    */
   public static void checkCompatibility(Schema schema, int formatVersion) {
-    // check the type in each field
+    // accumulate errors as a treemap to keep them in a reasonable order
+    Map<Integer, String> problems = Maps.newTreeMap();
+
+    // check each field's type and defaults
     for (NestedField field : schema.lazyIdToField().values()) {
       Integer minFormatVersion = 
MIN_FORMAT_VERSIONS.get(field.type().typeId());
-      Preconditions.checkState(
-          minFormatVersion == null || formatVersion >= minFormatVersion,
-          "Invalid type in v%s schema: %s %s is not supported until v%s",
-          formatVersion,
-          schema.findColumnName(field.fieldId()),
-          field.type(),
-          minFormatVersion);
+      if (minFormatVersion != null && formatVersion < minFormatVersion) {
+        problems.put(
+            field.fieldId(),
+            String.format(
+                "Invalid type for %s: %s is not supported until v%s",
+                schema.findColumnName(field.fieldId()), field.type(), 
minFormatVersion));
+      }
+
+      if (field.initialDefault() != null && formatVersion < 
DEFAULT_VALUES_MIN_FORMAT_VERSION) {
+        problems.put(
+            field.fieldId(),
+            String.format(
+                "Invalid initial default for %s: non-null default (%s) is not 
supported until v%s",
+                schema.findColumnName(field.fieldId()),
+                field.initialDefault(),
+                DEFAULT_VALUES_MIN_FORMAT_VERSION));
+      }
+    }
+
+    // throw if there are any compatibility problems
+    if (!problems.isEmpty()) {
+      throw new IllegalStateException(
+          String.format(
+              "Invalid schema for v%s:\n- %s",
+              formatVersion, Joiner.on("\n- ").join(problems.values())));
     }
   }
 }
diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java 
b/api/src/test/java/org/apache/iceberg/TestSchema.java
new file mode 100644
index 0000000000..fec7343c5c
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/TestSchema.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import static org.assertj.core.api.Assertions.assertThatCode;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+public class TestSchema {
+  private static final Schema TS_NANO_CASES =
+      new Schema(
+          Types.NestedField.required(1, "id", Types.LongType.get()),
+          Types.NestedField.optional(2, "ts", 
Types.TimestampNanoType.withZone()),
+          Types.NestedField.optional(
+              3, "arr", Types.ListType.ofRequired(4, 
Types.TimestampNanoType.withoutZone())),
+          Types.NestedField.required(
+              5,
+              "struct",
+              Types.StructType.of(
+                  Types.NestedField.optional(6, "inner_ts", 
Types.TimestampNanoType.withZone()),
+                  Types.NestedField.required(7, "data", 
Types.StringType.get()))),
+          Types.NestedField.optional(
+              8,
+              "struct_arr",
+              Types.StructType.of(
+                  Types.NestedField.optional(9, "ts", 
Types.TimestampNanoType.withoutZone()))));
+
+  private static final Schema INITIAL_DEFAULT_SCHEMA =
+      new Schema(
+          Types.NestedField.required(1, "id", Types.LongType.get()),
+          Types.NestedField.required("has_default")
+              .withId(2)
+              .ofType(Types.StringType.get())
+              .withInitialDefault("--")
+              .withWriteDefault("--")
+              .build());
+
+  private static final Schema WRITE_DEFAULT_SCHEMA =
+      new Schema(
+          Types.NestedField.required(1, "id", Types.LongType.get()),
+          Types.NestedField.required("has_default")
+              .withId(2)
+              .ofType(Types.StringType.get())
+              .withWriteDefault("--")
+              .build());
+
+  @ParameterizedTest
+  @ValueSource(ints = {1, 2})
+  public void testUnsupportedTimestampNano(int formatVersion) {
+    assertThatThrownBy(() -> Schema.checkCompatibility(TS_NANO_CASES, 
formatVersion))
+        .isInstanceOf(IllegalStateException.class)
+        .hasMessage(
+            "Invalid schema for v%s:\n"
+                + "- Invalid type for ts: timestamptz_ns is not supported 
until v3\n"
+                + "- Invalid type for arr.element: timestamp_ns is not 
supported until v3\n"
+                + "- Invalid type for struct.inner_ts: timestamptz_ns is not 
supported until v3\n"
+                + "- Invalid type for struct_arr.ts: timestamp_ns is not 
supported until v3",
+            formatVersion);
+  }
+
+  @Test
+  public void testSupportedTimestampNano() {
+    assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES, 
3)).doesNotThrowAnyException();
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {1, 2})
+  public void testUnsupportedInitialDefault(int formatVersion) {
+    assertThatThrownBy(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, 
formatVersion))
+        .isInstanceOf(IllegalStateException.class)
+        .hasMessage(
+            "Invalid schema for v%s:\n"
+                + "- Invalid initial default for has_default: "
+                + "non-null default (--) is not supported until v3",
+            formatVersion);
+  }
+
+  @Test
+  public void testSupportedInitialDefault() {
+    assertThatCode(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, 3))
+        .doesNotThrowAnyException();
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {1, 2, 3})
+  public void testSupportedWriteDefault(int formatVersion) {
+    // only the initial default is a forward-incompatible change
+    assertThatCode(() -> Schema.checkCompatibility(WRITE_DEFAULT_SCHEMA, 
formatVersion))
+        .doesNotThrowAnyException();
+  }
+}
diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java 
b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
index 5ada357657..71254b3abb 100644
--- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
+++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
@@ -1672,7 +1672,8 @@ public class TestTableMetadata {
                       unsupportedFormatVersion))
           .isInstanceOf(IllegalStateException.class)
           .hasMessage(
-              "Invalid type in v%s schema: struct.ts_nanos timestamptz_ns is 
not supported until v3",
+              "Invalid schema for v%s:\n"
+                  + "- Invalid type for struct.ts_nanos: timestamptz_ns is not 
supported until v3",
               unsupportedFormatVersion);
     }
 

Reply via email to