rdblue commented on a change in pull request #1391:
URL: https://github.com/apache/iceberg/pull/1391#discussion_r516841726



##########
File path: 
parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java
##########
@@ -19,187 +19,50 @@
 
 package org.apache.iceberg.parquet;
 
-import java.util.LinkedList;
-import java.util.List;
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Type;
 import org.apache.iceberg.types.Types;
-import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
-import org.apache.parquet.schema.PrimitiveType;
-import org.apache.parquet.schema.Type;
 
 /**
  * Visitor for traversing a Parquet type with a companion Iceberg type.
  *
  * @param <T> the Java class returned by the visitor
  */
-public class TypeWithSchemaVisitor<T> {
-  @SuppressWarnings({"checkstyle:VisibilityModifier", 
"checkstyle:IllegalType"})
-  protected LinkedList<String> fieldNames = Lists.newLinkedList();
+public class TypeWithSchemaVisitor<T> extends 
ParquetTypeWithPartnerVisitor<Type, T> {
 
-  @SuppressWarnings("checkstyle:CyclomaticComplexity")
-  public static <T> T visit(org.apache.iceberg.types.Type iType, Type type, 
TypeWithSchemaVisitor<T> visitor) {
-    if (type instanceof MessageType) {
-      Types.StructType struct = iType != null ? iType.asStructType() : null;
-      return visitor.message(struct, (MessageType) type,
-          visitFields(struct, type.asGroupType(), visitor));
-
-    } else if (type.isPrimitive()) {
-      org.apache.iceberg.types.Type.PrimitiveType iPrimitive = iType != null ?
-          iType.asPrimitiveType() : null;
-      return visitor.primitive(iPrimitive, type.asPrimitiveType());
-
-    } else {
-      // if not a primitive, the typeId must be a group
-      GroupType group = type.asGroupType();
-      OriginalType annotation = group.getOriginalType();
-      if (annotation != null) {
-        switch (annotation) {
-          case LIST:
-            
Preconditions.checkArgument(!group.isRepetition(Type.Repetition.REPEATED),
-                "Invalid list: top-level group is repeated: %s", group);
-            Preconditions.checkArgument(group.getFieldCount() == 1,
-                "Invalid list: does not contain single repeated field: %s", 
group);
-
-            GroupType repeatedElement = group.getFields().get(0).asGroupType();
-            
Preconditions.checkArgument(repeatedElement.isRepetition(Type.Repetition.REPEATED),
-                "Invalid list: inner group is not repeated");
-            Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1,
-                "Invalid list: repeated group is not a single field: %s", 
group);
-
-            Types.ListType list = null;
-            Types.NestedField element = null;
-            if (iType != null) {
-              list = iType.asListType();
-              element = list.fields().get(0);
-            }
-
-            visitor.fieldNames.push(repeatedElement.getName());
-            try {
-              T elementResult = null;
-              if (repeatedElement.getFieldCount() > 0) {
-                elementResult = visitField(element, 
repeatedElement.getType(0), visitor);
-              }
-
-              return visitor.list(list, group, elementResult);
-            } finally {
-              visitor.fieldNames.pop();
-            }
-
-          case MAP:
-            
Preconditions.checkArgument(!group.isRepetition(Type.Repetition.REPEATED),
-                "Invalid map: top-level group is repeated: %s", group);
-            Preconditions.checkArgument(group.getFieldCount() == 1,
-                "Invalid map: does not contain single repeated field: %s", 
group);
-
-            GroupType repeatedKeyValue = group.getType(0).asGroupType();
-            
Preconditions.checkArgument(repeatedKeyValue.isRepetition(Type.Repetition.REPEATED),
-                "Invalid map: inner group is not repeated");
-            Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2,
-                "Invalid map: repeated group does not have 2 fields");
-
-            Types.MapType map = null;
-            Types.NestedField keyField = null;
-            Types.NestedField valueField = null;
-            if (iType != null) {
-              map = iType.asMapType();
-              keyField = map.fields().get(0);
-              valueField = map.fields().get(1);
-            }
-
-            visitor.fieldNames.push(repeatedKeyValue.getName());
-            try {
-              T keyResult = null;
-              T valueResult = null;
-              switch (repeatedKeyValue.getFieldCount()) {
-                case 2:
-                  // if there are 2 fields, both key and value are projected
-                  keyResult = visitField(keyField, 
repeatedKeyValue.getType(0), visitor);
-                  valueResult = visitField(valueField, 
repeatedKeyValue.getType(1), visitor);
-                  break;
-                case 1:
-                  // if there is just one, use the name to determine what it is
-                  Type keyOrValue = repeatedKeyValue.getType(0);
-                  if (keyOrValue.getName().equalsIgnoreCase("key")) {
-                    keyResult = visitField(keyField, keyOrValue, visitor);
-                    // value result remains null
-                  } else {
-                    valueResult = visitField(valueField, keyOrValue, visitor);
-                    // key result remains null
-                  }
-                  break;
-                default:
-                  // both results will remain null
-              }
-
-              return visitor.map(map, group, keyResult, valueResult);
-
-            } finally {
-              visitor.fieldNames.pop();
-            }
-
-          default:
-        }
-      }
-
-      Types.StructType struct = iType != null ? iType.asStructType() : null;
-      return visitor.struct(struct, group, visitFields(struct, group, 
visitor));
+  @Override
+  protected Type arrayElementType(Type arrayType) {
+    if (arrayType == null) {
+      return null;
     }
-  }
 
-  private static <T> T visitField(Types.NestedField iField, Type field, 
TypeWithSchemaVisitor<T> visitor) {
-    visitor.fieldNames.push(field.getName());
-    try {
-      return visit(iField != null ? iField.type() : null, field, visitor);
-    } finally {
-      visitor.fieldNames.pop();
-    }
+    return arrayType.asListType().elementType();
   }
 
-  private static <T> List<T> visitFields(Types.StructType struct, GroupType 
group, TypeWithSchemaVisitor<T> visitor) {
-    List<T> results = 
Lists.newArrayListWithExpectedSize(group.getFieldCount());
-    for (Type field : group.getFields()) {
-      int id = -1;
-      if (field.getId() != null) {
-        id = field.getId().intValue();
-      }
-      Types.NestedField iField = (struct != null && id >= 0) ? 
struct.field(id) : null;

Review comment:
       There are two ways to traverse the schemas together, by name and by ID. 
When we are building a reader, we use the ID method because names don't 
necessarily match between when the file was written and the current table 
schema. The columns are identified by ID, so this is correct in that case.
   
   Traversing two schemas by name is only done when we know that the names 
between the two match. For example, when Spark runs a CTAS operation, we 
convert the Spark schema to an Iceberg schema and we know that the two have the 
same structure and field names, but the Spark schema has no IDs. So when we 
build a writer, we have to match by name but we know that this is safe. (The 
two schemas are needed to build writers that convert, like short -> int.)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to