fbocse commented on a change in pull request #1177:
URL: https://github.com/apache/iceberg/pull/1177#discussion_r454374562



##########
File path: core/src/test/java/org/apache/iceberg/TestSchemaUpdateSync.java
##########
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+
+import com.google.common.collect.Lists;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.iceberg.types.Type.PrimitiveType;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.BinaryType;
+import org.apache.iceberg.types.Types.BooleanType;
+import org.apache.iceberg.types.Types.DateType;
+import org.apache.iceberg.types.Types.DecimalType;
+import org.apache.iceberg.types.Types.DoubleType;
+import org.apache.iceberg.types.Types.FixedType;
+import org.apache.iceberg.types.Types.FloatType;
+import org.apache.iceberg.types.Types.IntegerType;
+import org.apache.iceberg.types.Types.LongType;
+import org.apache.iceberg.types.Types.NestedField;
+import org.apache.iceberg.types.Types.StringType;
+import org.apache.iceberg.types.Types.TimeType;
+import org.apache.iceberg.types.Types.TimestampType;
+import org.apache.iceberg.types.Types.UUIDType;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class TestSchemaUpdateSync {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  private static List<? extends PrimitiveType> primitiveTypes() {
+    return Lists.newArrayList(StringType.get(),
+        TimeType.get(),
+        TimestampType.withoutZone(),
+        TimestampType.withZone(),
+        UUIDType.get(),
+        DateType.get(),
+        BooleanType.get(),
+        BinaryType.get(),
+        DoubleType.get(),
+        IntegerType.get(),
+        FixedType.ofLength(10),
+        DecimalType.of(10, 2),
+        LongType.get(),
+        FloatType.get()
+    );
+  }
+
+  private static NestedField[] primitiveFields(Integer initialValue, List<? 
extends PrimitiveType> primitiveTypes) {
+    AtomicInteger i = new AtomicInteger(initialValue);
+    return primitiveTypes.stream()
+        .map(type -> optional(i.incrementAndGet(), type.toString(),
+            
Types.fromPrimitiveString(type.toString()))).toArray(NestedField[]::new);
+  }
+
+  @Test
+  public void testAddTopLevelPrimitives() {
+    Schema newSchema = new Schema(primitiveFields(0, primitiveTypes()));
+    Schema applied = new SchemaUpdate(new Schema(), 
0).upsertSchema(newSchema).apply();
+    Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
+  }
+
+  @Test
+  public void testAddTopLevelListOfPrimitives() {
+    for(PrimitiveType primitiveType : primitiveTypes()) {
+      Schema newSchema = new Schema(optional(1, "aList", 
Types.ListType.ofOptional(2, primitiveType)));
+      Schema applied = new SchemaUpdate(new Schema(), 
0).upsertSchema(newSchema).apply();
+      Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
+    }
+  }
+
+  @Test
+  public void testAddTopLevelMapOfPrimitives() {
+    for(PrimitiveType primitiveType : primitiveTypes()) {
+      Schema newSchema = new Schema(optional(1, "aMap", 
Types.MapType.ofOptional(2, 3, primitiveType, primitiveType)));
+      Schema applied = new SchemaUpdate(new Schema(), 
0).upsertSchema(newSchema).apply();
+      Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
+    }
+  }
+
+  @Test
+  public void testAddTopLevelStructOfPrimitives() {
+    for(PrimitiveType primitiveType : primitiveTypes()) {
+      Schema currentSchema = new Schema(optional(1, "aStruct", 
Types.StructType.of(
+          optional(2, "primitive", primitiveType))));
+      Schema applied = new SchemaUpdate(new Schema(), 
0).upsertSchema(currentSchema).apply();
+      Assert.assertEquals(currentSchema.asStruct(), applied.asStruct());
+    }
+  }
+
+  @Test
+  public void testAddNestedPrimitive() {
+    for(PrimitiveType primitiveType : primitiveTypes()) {
+      Schema currentSchema = new Schema(optional(1, "aStruct", 
Types.StructType.of()));
+      Schema newSchema = new Schema(optional(1, "aStruct", Types.StructType.of(
+          optional(2, "primitive", primitiveType))));
+      Schema applied = new SchemaUpdate(currentSchema, 
1).upsertSchema(newSchema).apply();
+      Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
+    }
+  }
+
+  @Test
+  public void testAddNestedPrimitives() {
+    Schema currentSchema = new Schema(optional(1, "aStruct", 
Types.StructType.of()));
+    Schema newSchema = new Schema(optional(1, "aStruct", Types.StructType.of(
+        primitiveFields(1, primitiveTypes()))));
+    Schema applied = new SchemaUpdate(currentSchema, 
1).upsertSchema(newSchema).apply();
+    Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
+  }
+
+  @Test
+  public void testAddNestedLists() {
+    Schema newSchema = new Schema(optional(1, "aList",
+        Types.ListType.ofOptional(2,
+            Types.ListType.ofOptional(3,
+                Types.ListType.ofOptional(4,
+                    Types.ListType.ofOptional(5,
+                        Types.ListType.ofOptional(6,
+                            Types.ListType.ofOptional(7,
+                                Types.ListType.ofOptional(8,
+                                    Types.ListType.ofOptional(9,
+                                        Types.ListType.ofOptional(10,
+                                            DecimalType.of(11, 20))))))))))));
+    Schema applied = new SchemaUpdate(new Schema(), 
0).upsertSchema(newSchema).apply();
+    Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
+  }
+
+  @Test
+  public void testAddNestedStruct() {
+    Schema newSchema = new Schema(optional(1, "struct1", Types.StructType.of(
+            optional(2, "struct2", Types.StructType.of(
+                optional(3, "struct3", Types.StructType.of(
+                    optional(4, "struct4", Types.StructType.of(
+                        optional(5, "struct5", Types.StructType.of(
+                            optional(6, "struct6", Types.StructType.of(
+                                optional(7, "aString", 
StringType.get()))))))))))))));
+    Schema applied = new SchemaUpdate(new Schema(), 
0).upsertSchema(newSchema).apply();
+    Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
+  }
+
+  @Test
+  public void testAddNestedMaps() {
+    Schema newSchema = new Schema(optional(1, "struct", 
Types.MapType.ofOptional(
+        2, 3, StringType.get(), Types.MapType.ofOptional(
+            4, 5, StringType.get(), Types.MapType.ofOptional(
+                6, 7, StringType.get(), Types.MapType.ofOptional(
+                    8 ,9, StringType.get(), Types.MapType.ofOptional(
+                        10 ,11, StringType.get(), Types.MapType.ofOptional(
+                            12, 13, StringType.get(), StringType.get()))))))));
+    Schema applied = new SchemaUpdate(new Schema(), 
0).upsertSchema(newSchema).apply();
+    Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
+  }
+
+  @Test
+  public void testDetectInvalidTopLevelList() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Cannot change column type: aList.element: string -> 
long");
+
+    Schema currentSchema = new Schema(optional(1, "aList",
+        Types.ListType.ofOptional(2, StringType.get())));
+    Schema newSchema = new Schema(optional(1, "aList",
+        Types.ListType.ofOptional(2, LongType.get())));
+    new SchemaUpdate(currentSchema, 2).upsertSchema(newSchema).apply();
+  }
+
+  @Test
+  public void testDetectInvalidTopLevelMapValue() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Cannot change column type: aMap.value: string -> 
long");
+
+    Schema currentSchema = new Schema(optional(1, "aMap",
+        Types.MapType.ofOptional(2,3, StringType.get(), StringType.get())));
+    Schema newSchema = new Schema(optional(1, "aMap",
+        Types.MapType.ofOptional(2,3, StringType.get(), LongType.get())));
+    Schema apply = new SchemaUpdate(currentSchema, 
3).upsertSchema(newSchema).apply();
+    System.out.println(apply.toString());
+  }
+
+  @Test
+  public void testDetectInvalidTopLevelMapKey() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Cannot change column type: aMap.key: string -> 
uuid");
+
+    Schema currentSchema = new Schema(optional(1, "aMap",
+        Types.MapType.ofOptional(2,3 , StringType.get(), StringType.get())));
+    Schema newSchema = new Schema(optional(1, "aMap",
+        Types.MapType.ofOptional(2,3 , UUIDType.get(), StringType.get())));
+    new SchemaUpdate(currentSchema, 3).upsertSchema(newSchema).apply();
+  }
+
+  @Test
+  // int       32-bit signed integers -> Can promote to long
+  public void testTypePromoteIntegerToLong() {
+    Schema currentSchema = new Schema(required(1, "aCol", IntegerType.get()));
+    Schema newSchema = new Schema(required(1, "aCol", LongType.get()));
+
+    Schema applied = new SchemaUpdate(currentSchema, 
1).upsertSchema(newSchema).apply();
+    Assert.assertEquals(1, applied.asStruct().fields().size());
+    Assert.assertEquals(LongType.get(), 
applied.asStruct().fields().get(0).type());
+  }
+
+  @Test
+  // float     32-bit IEEE 754 floating point -> Can promote to double
+  public void testTypePromoteFloatToDouble() {
+    Schema currentSchema = new Schema(required(1, "aCol", FloatType.get()));
+    Schema newSchema = new Schema(required(1, "aCol", DoubleType.get()));
+
+    Schema applied = new SchemaUpdate(currentSchema, 
1).upsertSchema(newSchema).apply();
+    Assert.assertEquals(1, applied.asStruct().fields().size());
+    Assert.assertEquals(DoubleType.get(), 
applied.asStruct().fields().get(0).type());
+    // Doesn't work Assert.assertEquals(newSchema.asStruct(), 
applied.asStruct());
+    // java.lang.AssertionError:
+    // Expected :struct<1: aCol: required double>
+    // Actual   :struct<1: aCol: required double ()>
+  }
+
+  @Test
+  public void testInvalidTypePromoteDoubleToFloat() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Cannot change column type: aCol: double -> float");
+
+    Schema currentSchema = new Schema(required(1, "aCol", DoubleType.get()));
+    Schema newSchema = new Schema(required(1, "aCol", FloatType.get()));
+
+    new SchemaUpdate(currentSchema, 1).upsertSchema(newSchema).apply();
+  }
+
+  @Test
+  // decimal(P,S)      Fixed-point decimal; precision P, scale S       -> 
Scale is fixed [1], precision must be 38 or less
+  public void testTypePromoteDecimalToFixedScaleWithWiderPrecision() {
+    Schema currentSchema = new Schema(required(1, "aCol", DecimalType.of(20, 
1)));
+    Schema newSchema = new Schema(required(1, "aCol", DecimalType.of(22, 1)));
+
+    Schema applied = new SchemaUpdate(currentSchema, 
1).upsertSchema(newSchema).apply();
+    Assert.assertEquals(newSchema.asStruct(), applied.asStruct());
+  }
+
+  @Test
+  public void testAddPrimitiveToNestedStruct() {
+    Schema schema = new Schema(
+        required(1, "struct1", Types.StructType.of(
+          optional(2, "struct2", Types.StructType.of(
+                  optional(3, "list", Types.ListType.ofOptional(
+                      4, Types.StructType.of(
+                          optional(5, "value", StringType.get())))))))));
+
+    Schema newSchema = new Schema(
+        required(1, "struct1", Types.StructType.of(
+            optional(2, "struct2", Types.StructType.of(
+                optional(3, "list", Types.ListType.ofOptional(
+                    4, Types.StructType.of(
+                        optional(5, "time", TimeType.get())))))))));

Review comment:
       Dunno really, do you need renames in your use-case?
   If so, do you have a plan on how to push those changes down to Iceberg APIs, 
do you know before committing to Iceberg what the effective columns you're 
renaming are? Would we rely on Iceberg to perform the renames implicitly, how 
would it differentiate between an add or a rename?
    
   In the particular use-case I've described I didn't account for that 
requirement. Observed schemas don't require column renames.
   However I assume that the only way we could reliably implement this is to 
reconcile column name changes by relying on the field ids though, right? 
   Also for this particular use-case of sub-schemas getting unioned to support 
column renames the operations are additive so they can be running out of order 
- eventually the union schema is the same no matter the order in which the 
observed schemas were committed. 
   But for things like renames and deletes we'd need to account for order too, 
right?
   I hate building things that need to account for order cause most of the time 
I get them wrong.
   So I'd say that I don't think that we can combine the two implementations 
into a single method...
   Wdyt?
   
   




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to