This is an automated email from the ASF dual-hosted git repository.
russellspitzer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 91e04c9c88 API: Add compatibility checks for Schemas with default
values (#11434)
91e04c9c88 is described below
commit 91e04c9c88b63dc01d6c8e69dfdc8cd27ee811cc
Author: Ryan Blue <[email protected]>
AuthorDate: Wed Oct 30 14:58:25 2024 -0700
API: Add compatibility checks for Schemas with default values (#11434)
Co-authored-by: Russell Spitzer <[email protected]>
Co-authored-by: Fokko Driesprong <[email protected]>
---
api/src/main/java/org/apache/iceberg/Schema.java | 38 +++++--
.../test/java/org/apache/iceberg/TestSchema.java | 111 +++++++++++++++++++++
.../java/org/apache/iceberg/TestTableMetadata.java | 3 +-
3 files changed, 143 insertions(+), 9 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/Schema.java
b/api/src/main/java/org/apache/iceberg/Schema.java
index 9bcf691f5a..44f65ff56a 100644
--- a/api/src/main/java/org/apache/iceberg/Schema.java
+++ b/api/src/main/java/org/apache/iceberg/Schema.java
@@ -54,6 +54,7 @@ public class Schema implements Serializable {
private static final Joiner NEWLINE = Joiner.on('\n');
private static final String ALL_COLUMNS = "*";
private static final int DEFAULT_SCHEMA_ID = 0;
+ private static final int DEFAULT_VALUES_MIN_FORMAT_VERSION = 3;
private static final Map<Type.TypeID, Integer> MIN_FORMAT_VERSIONS =
ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3);
@@ -586,16 +587,37 @@ public class Schema implements Serializable {
* @param formatVersion table format version
*/
public static void checkCompatibility(Schema schema, int formatVersion) {
- // check the type in each field
+ // accumulate errors as a treemap to keep them in a reasonable order
+ Map<Integer, String> problems = Maps.newTreeMap();
+
+ // check each field's type and defaults
for (NestedField field : schema.lazyIdToField().values()) {
Integer minFormatVersion =
MIN_FORMAT_VERSIONS.get(field.type().typeId());
- Preconditions.checkState(
- minFormatVersion == null || formatVersion >= minFormatVersion,
- "Invalid type in v%s schema: %s %s is not supported until v%s",
- formatVersion,
- schema.findColumnName(field.fieldId()),
- field.type(),
- minFormatVersion);
+ if (minFormatVersion != null && formatVersion < minFormatVersion) {
+ problems.put(
+ field.fieldId(),
+ String.format(
+ "Invalid type for %s: %s is not supported until v%s",
+ schema.findColumnName(field.fieldId()), field.type(),
minFormatVersion));
+ }
+
+ if (field.initialDefault() != null && formatVersion <
DEFAULT_VALUES_MIN_FORMAT_VERSION) {
+ problems.put(
+ field.fieldId(),
+ String.format(
+ "Invalid initial default for %s: non-null default (%s) is not
supported until v%s",
+ schema.findColumnName(field.fieldId()),
+ field.initialDefault(),
+ DEFAULT_VALUES_MIN_FORMAT_VERSION));
+ }
+ }
+
+ // throw if there are any compatibility problems
+ if (!problems.isEmpty()) {
+ throw new IllegalStateException(
+ String.format(
+ "Invalid schema for v%s:\n- %s",
+ formatVersion, Joiner.on("\n- ").join(problems.values())));
}
}
}
diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java
b/api/src/test/java/org/apache/iceberg/TestSchema.java
new file mode 100644
index 0000000000..fec7343c5c
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/TestSchema.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import static org.assertj.core.api.Assertions.assertThatCode;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+public class TestSchema {
+ private static final Schema TS_NANO_CASES =
+ new Schema(
+ Types.NestedField.required(1, "id", Types.LongType.get()),
+ Types.NestedField.optional(2, "ts",
Types.TimestampNanoType.withZone()),
+ Types.NestedField.optional(
+ 3, "arr", Types.ListType.ofRequired(4,
Types.TimestampNanoType.withoutZone())),
+ Types.NestedField.required(
+ 5,
+ "struct",
+ Types.StructType.of(
+ Types.NestedField.optional(6, "inner_ts",
Types.TimestampNanoType.withZone()),
+ Types.NestedField.required(7, "data",
Types.StringType.get()))),
+ Types.NestedField.optional(
+ 8,
+ "struct_arr",
+ Types.StructType.of(
+ Types.NestedField.optional(9, "ts",
Types.TimestampNanoType.withoutZone()))));
+
+ private static final Schema INITIAL_DEFAULT_SCHEMA =
+ new Schema(
+ Types.NestedField.required(1, "id", Types.LongType.get()),
+ Types.NestedField.required("has_default")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withInitialDefault("--")
+ .withWriteDefault("--")
+ .build());
+
+ private static final Schema WRITE_DEFAULT_SCHEMA =
+ new Schema(
+ Types.NestedField.required(1, "id", Types.LongType.get()),
+ Types.NestedField.required("has_default")
+ .withId(2)
+ .ofType(Types.StringType.get())
+ .withWriteDefault("--")
+ .build());
+
+ @ParameterizedTest
+ @ValueSource(ints = {1, 2})
+ public void testUnsupportedTimestampNano(int formatVersion) {
+ assertThatThrownBy(() -> Schema.checkCompatibility(TS_NANO_CASES,
formatVersion))
+ .isInstanceOf(IllegalStateException.class)
+ .hasMessage(
+ "Invalid schema for v%s:\n"
+ + "- Invalid type for ts: timestamptz_ns is not supported
until v3\n"
+ + "- Invalid type for arr.element: timestamp_ns is not
supported until v3\n"
+ + "- Invalid type for struct.inner_ts: timestamptz_ns is not
supported until v3\n"
+ + "- Invalid type for struct_arr.ts: timestamp_ns is not
supported until v3",
+ formatVersion);
+ }
+
+ @Test
+ public void testSupportedTimestampNano() {
+ assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES,
3)).doesNotThrowAnyException();
+ }
+
+ @ParameterizedTest
+ @ValueSource(ints = {1, 2})
+ public void testUnsupportedInitialDefault(int formatVersion) {
+ assertThatThrownBy(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA,
formatVersion))
+ .isInstanceOf(IllegalStateException.class)
+ .hasMessage(
+ "Invalid schema for v%s:\n"
+ + "- Invalid initial default for has_default: "
+ + "non-null default (--) is not supported until v3",
+ formatVersion);
+ }
+
+ @Test
+ public void testSupportedInitialDefault() {
+ assertThatCode(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, 3))
+ .doesNotThrowAnyException();
+ }
+
+ @ParameterizedTest
+ @ValueSource(ints = {1, 2, 3})
+ public void testSupportedWriteDefault(int formatVersion) {
+ // only the initial default is a forward-incompatible change
+ assertThatCode(() -> Schema.checkCompatibility(WRITE_DEFAULT_SCHEMA,
formatVersion))
+ .doesNotThrowAnyException();
+ }
+}
diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
index 5ada357657..71254b3abb 100644
--- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
+++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
@@ -1672,7 +1672,8 @@ public class TestTableMetadata {
unsupportedFormatVersion))
.isInstanceOf(IllegalStateException.class)
.hasMessage(
- "Invalid type in v%s schema: struct.ts_nanos timestamptz_ns is
not supported until v3",
+ "Invalid schema for v%s:\n"
+ + "- Invalid type for struct.ts_nanos: timestamptz_ns is not
supported until v3",
unsupportedFormatVersion);
}