rdblue commented on a change in pull request #2465:
URL: https://github.com/apache/iceberg/pull/2465#discussion_r620719667
##########
File path: core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java
##########
@@ -1223,4 +1223,232 @@ public void testMoveBetweenStructsFails() {
.moveBefore("s2.x", "s1.a")
.apply());
}
+
+ @Test
+ public void testAddExistingIdentifierFields() {
+ Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .setIdentifierFields(Sets.newHashSet("id"))
+ .apply();
+
+ Assert.assertEquals("add an existing field as identifier field should
succeed",
+ Sets.newHashSet(newSchema.findField("id").fieldId()),
+ newSchema.identifierFieldIds());
+ }
+
+ @Test
+ public void testAddNewIdentifierFieldColumns() {
+ Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .addColumn("new_field", Types.StringType.get())
+ .setIdentifierFields(Sets.newHashSet("id", "new_field"))
+ .apply();
+
+ Assert.assertEquals("add column then set as identifier should succeed",
+ Sets.newHashSet(newSchema.findField("id").fieldId(),
newSchema.findField("new_field").fieldId()),
+ newSchema.identifierFieldIds());
+
+ newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .setIdentifierFields(Sets.newHashSet("id", "new_field"))
+ .addColumn("new_field", Types.StringType.get())
+ .apply();
+
+ Assert.assertEquals("set identifier then add column should succeed",
+ Sets.newHashSet(newSchema.findField("id").fieldId(),
newSchema.findField("new_field").fieldId()),
+ newSchema.identifierFieldIds());
+ }
+
+ @Test
+ public void testAddNestedIdentifierFieldColumns() {
+ Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .setIdentifierFields(Sets.newHashSet("preferences.feature1"))
+ .apply();
+
+ Assert.assertEquals("set existing nested field as identifier should
succeed",
+ Sets.newHashSet(newSchema.findField("preferences.feature1").fieldId()),
+ newSchema.identifierFieldIds());
+
+ newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .addColumn("new", Types.StructType.of(
+ Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 1, "field",
Types.StringType.get())
+ ))
+ .setIdentifierFields(Sets.newHashSet("new.field"))
+ .apply();
+
+ Assert.assertEquals("set newly added nested field as identifier should
succeed",
+ Sets.newHashSet(newSchema.findField("new.field").fieldId()),
+ newSchema.identifierFieldIds());
+
+ newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .addColumn("new", Types.StructType.of(
+ Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 1, "field",
Types.StructType.of(
+ Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 2,
"nested", Types.StringType.get())))))
+ .setIdentifierFields(Sets.newHashSet("new.field.nested"))
+ .apply();
+
+ Assert.assertEquals("set newly added multi-layer nested field as
identifier should succeed",
+ Sets.newHashSet(newSchema.findField("new.field.nested").fieldId()),
+ newSchema.identifierFieldIds());
+ }
+
+ @Test
+ public void testAddDottedIdentifierFieldColumns() {
+ Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .addColumn(null, "dot.field", Types.StringType.get())
+ .setIdentifierFields(Sets.newHashSet("id", "dot.field"))
+ .apply();
+
+ Assert.assertEquals("add a field with dot as identifier should succeed",
+ Sets.newHashSet(newSchema.findField("id").fieldId(),
newSchema.findField("dot.field").fieldId()),
+ newSchema.identifierFieldIds());
+ }
+
+ @Test
+ public void testRemoveIdentifierFields() {
+ Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .addColumn("new_field", Types.StringType.get())
+ .addColumn("new_field2", Types.StringType.get())
+ .setIdentifierFields(Sets.newHashSet("id", "new_field", "new_field2"))
+ .apply();
+
+ newSchema = new SchemaUpdate(newSchema, SCHEMA_LAST_COLUMN_ID)
+ .setIdentifierFields(Sets.newHashSet("new_field", "new_field2"))
+ .apply();
+
+ Assert.assertEquals("remove an identifier field should succeed",
+ Sets.newHashSet(newSchema.findField("new_field").fieldId(),
newSchema.findField("new_field2").fieldId()),
+ newSchema.identifierFieldIds());
+
+ newSchema = new SchemaUpdate(newSchema, SCHEMA_LAST_COLUMN_ID)
+ .setIdentifierFields(Sets.newHashSet())
+ .apply();
+
+ Assert.assertEquals("remove all identifier fields should succeed",
+ Sets.newHashSet(),
+ newSchema.identifierFieldIds());
+ }
+
+ @Test
+ public void testSetIdentifierFieldsFails() {
+ AssertHelpers.assertThrows("add a field with name not exist should fail",
+ IllegalArgumentException.class,
+ "not found in current schema or added columns",
+ () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .setIdentifierFields(Sets.newHashSet("unknown"))
+ .apply());
+
+ AssertHelpers.assertThrows("add a field of non-primitive type should fail",
+ IllegalArgumentException.class,
+ "not a primitive type field",
+ () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
+ .setIdentifierFields(Sets.newHashSet("locations"))
+ .apply());
+
+ AssertHelpers.assertThrows("add a nested field in map should fail",
Review comment:
Can you add a test for trying to reference a field in a map value struct?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]