This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 7001d95 Avro: Schema conversions should preserve field docs (#1991)
7001d95 is described below
commit 7001d95b8635d58f54795e1ec4c588697e86f47d
Author: Kyle Bendickson <[email protected]>
AuthorDate: Mon Dec 28 10:47:58 2020 -0800
Avro: Schema conversions should preserve field docs (#1991)
---
.../main/java/org/apache/iceberg/avro/SchemaToType.java | 4 ++--
.../main/java/org/apache/iceberg/avro/TypeToSchema.java | 2 +-
.../test/java/org/apache/iceberg/TestMergeAppend.java | 6 ++++--
.../org/apache/iceberg/avro/TestSchemaConversions.java | 16 ++++++++++++++++
4 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java
b/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java
index d35ed7f..577fa23 100644
--- a/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java
+++ b/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java
@@ -93,9 +93,9 @@ class SchemaToType extends AvroSchemaVisitor<Type> {
int fieldId = getId(field);
if (AvroSchemaUtil.isOptionSchema(field.schema())) {
- newFields.add(Types.NestedField.optional(fieldId, field.name(),
fieldType));
+ newFields.add(Types.NestedField.optional(fieldId, field.name(),
fieldType, field.doc()));
} else {
- newFields.add(Types.NestedField.required(fieldId, field.name(),
fieldType));
+ newFields.add(Types.NestedField.required(fieldId, field.name(),
fieldType, field.doc()));
}
}
diff --git a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java
b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java
index fc72f4b..3f1885b 100644
--- a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java
+++ b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java
@@ -102,7 +102,7 @@ class TypeToSchema extends TypeUtil.SchemaVisitor<Schema> {
boolean isValidFieldName = AvroSchemaUtil.validAvroName(origFieldName);
String fieldName = isValidFieldName ? origFieldName :
AvroSchemaUtil.sanitize(origFieldName);
Schema.Field field = new Schema.Field(
- fieldName, fieldSchemas.get(i), null,
+ fieldName, fieldSchemas.get(i), structField.doc(),
structField.isOptional() ? JsonProperties.NULL_VALUE : null);
if (!isValidFieldName) {
field.addProp(AvroSchemaUtil.ICEBERG_FIELD_NAME_PROP, origFieldName);
diff --git a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java
b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java
index de4456f..353f051 100644
--- a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java
+++ b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java
@@ -239,8 +239,10 @@ public class TestMergeAppend extends TableTestBase {
public void testManifestMergeMinCount() throws IOException {
Assert.assertEquals("Table should start empty", 0,
listManifestFiles().size());
table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "2")
- // each manifest file is 5227 bytes, so 12000 bytes limit will give us
2 bins with 3 manifest/data files.
- .set(TableProperties.MANIFEST_TARGET_SIZE_BYTES, "12000")
+ // Each initial v1/v2 ManifestFile is 5661/6397 bytes respectively.
Merging two of the given
+ // manifests make one v1/v2 ManifestFile of 5672/6408 bytes
respectively, so 12850 bytes
+ // limit will give us two bins with three manifest/data files.
+ .set(TableProperties.MANIFEST_TARGET_SIZE_BYTES, "12850")
.commit();
TableMetadata base = readMetadata();
diff --git
a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java
b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java
index ebea696..4d649dc 100644
--- a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java
+++ b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java
@@ -309,4 +309,20 @@ public class TestSchemaConversions {
expectedOrigNames.set(1, null); // Name at pos 1 is valid so
ICEBERG_FIELD_NAME_PROP is not set
Assert.assertEquals(expectedOrigNames, origNames);
}
+
+ @Test
+ public void testFieldDocsArePreserved() {
+ List<String> fieldDocs = Lists.newArrayList(null, "iceberg originating
field doc");
+ org.apache.iceberg.Schema icebergSchema = new org.apache.iceberg.Schema(
+ required(1, "id", Types.IntegerType.get(), fieldDocs.get(0)),
+ optional(2, "data", Types.StringType.get(), fieldDocs.get(1)));
+
+ Schema avroSchema = AvroSchemaUtil.convert(icebergSchema.asStruct());
+ List<String> avroFieldDocs =
Lists.newArrayList(Iterables.transform(avroSchema.getFields(),
Schema.Field::doc));
+ Assert.assertEquals(avroFieldDocs, fieldDocs);
+
+ org.apache.iceberg.Schema origSchema =
AvroSchemaUtil.toIceberg(avroSchema);
+ List<String> origFieldDocs =
Lists.newArrayList(Iterables.transform(origSchema.columns(),
Types.NestedField::doc));
+ Assert.assertEquals(origFieldDocs, fieldDocs);
+ }
}