This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 7001d95  Avro: Schema conversions should preserve field docs (#1991)
7001d95 is described below

commit 7001d95b8635d58f54795e1ec4c588697e86f47d
Author: Kyle Bendickson <[email protected]>
AuthorDate: Mon Dec 28 10:47:58 2020 -0800

    Avro: Schema conversions should preserve field docs (#1991)
---
 .../main/java/org/apache/iceberg/avro/SchemaToType.java  |  4 ++--
 .../main/java/org/apache/iceberg/avro/TypeToSchema.java  |  2 +-
 .../test/java/org/apache/iceberg/TestMergeAppend.java    |  6 ++++--
 .../org/apache/iceberg/avro/TestSchemaConversions.java   | 16 ++++++++++++++++
 4 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java 
b/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java
index d35ed7f..577fa23 100644
--- a/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java
+++ b/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java
@@ -93,9 +93,9 @@ class SchemaToType extends AvroSchemaVisitor<Type> {
       int fieldId = getId(field);
 
       if (AvroSchemaUtil.isOptionSchema(field.schema())) {
-        newFields.add(Types.NestedField.optional(fieldId, field.name(), 
fieldType));
+        newFields.add(Types.NestedField.optional(fieldId, field.name(), 
fieldType, field.doc()));
       } else {
-        newFields.add(Types.NestedField.required(fieldId, field.name(), 
fieldType));
+        newFields.add(Types.NestedField.required(fieldId, field.name(), 
fieldType, field.doc()));
       }
     }
 
diff --git a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java 
b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java
index fc72f4b..3f1885b 100644
--- a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java
+++ b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java
@@ -102,7 +102,7 @@ class TypeToSchema extends TypeUtil.SchemaVisitor<Schema> {
       boolean isValidFieldName = AvroSchemaUtil.validAvroName(origFieldName);
       String fieldName =  isValidFieldName ? origFieldName : 
AvroSchemaUtil.sanitize(origFieldName);
       Schema.Field field = new Schema.Field(
-          fieldName, fieldSchemas.get(i), null,
+          fieldName, fieldSchemas.get(i), structField.doc(),
           structField.isOptional() ? JsonProperties.NULL_VALUE : null);
       if (!isValidFieldName) {
         field.addProp(AvroSchemaUtil.ICEBERG_FIELD_NAME_PROP, origFieldName);
diff --git a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java 
b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java
index de4456f..353f051 100644
--- a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java
+++ b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java
@@ -239,8 +239,10 @@ public class TestMergeAppend extends TableTestBase {
   public void testManifestMergeMinCount() throws IOException {
     Assert.assertEquals("Table should start empty", 0, 
listManifestFiles().size());
     table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "2")
-        // each manifest file is 5227 bytes, so 12000 bytes limit will give us 
2 bins with 3 manifest/data files.
-        .set(TableProperties.MANIFEST_TARGET_SIZE_BYTES, "12000")
+        // Each initial v1/v2 ManifestFile is 5661/6397 bytes respectively. 
Merging two of the given
+        // manifests make one v1/v2 ManifestFile of 5672/6408 bytes 
respectively, so 12850 bytes
+        // limit will give us two bins with three manifest/data files.
+        .set(TableProperties.MANIFEST_TARGET_SIZE_BYTES, "12850")
         .commit();
 
     TableMetadata base = readMetadata();
diff --git 
a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java 
b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java
index ebea696..4d649dc 100644
--- a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java
+++ b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java
@@ -309,4 +309,20 @@ public class TestSchemaConversions {
     expectedOrigNames.set(1, null);  // Name at pos 1 is valid so 
ICEBERG_FIELD_NAME_PROP is not set
     Assert.assertEquals(expectedOrigNames, origNames);
   }
+
+  @Test
+  public void testFieldDocsArePreserved() {
+    List<String> fieldDocs = Lists.newArrayList(null, "iceberg originating 
field doc");
+    org.apache.iceberg.Schema icebergSchema = new org.apache.iceberg.Schema(
+        required(1, "id", Types.IntegerType.get(), fieldDocs.get(0)),
+        optional(2, "data", Types.StringType.get(), fieldDocs.get(1)));
+
+    Schema avroSchema = AvroSchemaUtil.convert(icebergSchema.asStruct());
+    List<String> avroFieldDocs = 
Lists.newArrayList(Iterables.transform(avroSchema.getFields(), 
Schema.Field::doc));
+    Assert.assertEquals(avroFieldDocs, fieldDocs);
+
+    org.apache.iceberg.Schema origSchema = 
AvroSchemaUtil.toIceberg(avroSchema);
+    List<String> origFieldDocs = 
Lists.newArrayList(Iterables.transform(origSchema.columns(), 
Types.NestedField::doc));
+    Assert.assertEquals(origFieldDocs, fieldDocs);
+  }
 }

Reply via email to