chenjunjiedada commented on a change in pull request #1391:
URL: https://github.com/apache/iceberg/pull/1391#discussion_r515805748



##########
File path: 
parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeWithPartnerVisitor.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.parquet;
+
+import java.util.Deque;
+import java.util.List;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.util.Pair;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+
+public abstract class ParquetTypeWithPartnerVisitor<P, T> {
+  private final Deque<String> fieldNames = Lists.newLinkedList();
+
+  public static <P, T> T visit(P partnerType, Type type, 
ParquetTypeWithPartnerVisitor<P, T> visitor) {
+    if (type instanceof MessageType) {

Review comment:
       @JingsongLi , The `visit` is also called by `vistitList` and `visitMap` 
where we cannot guarantee that the inner type is not null. So I think we should 
allow null partner here. Does that make sense to you?

##########
File path: 
parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeWithPartnerVisitor.java
##########
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.parquet;
+
+import java.util.Deque;
+import java.util.List;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+
+public abstract class ParquetTypeWithPartnerVisitor<P, T> {
+  private final Deque<String> fieldNames = Lists.newLinkedList();
+
+  public static <P, T> T visit(P partnerType, Type type, 
ParquetTypeWithPartnerVisitor<P, T> visitor) {
+    if (type instanceof MessageType) {
+      return visitor.message(partnerType, (MessageType) type, 
visitFields(partnerType, type.asGroupType(), visitor));
+    } else if (type.isPrimitive()) {
+      return visitor.primitive(partnerType, type.asPrimitiveType());
+    } else {
+      // if not a primitive, the typeId must be a group
+      GroupType group = type.asGroupType();
+      OriginalType annotation = group.getOriginalType();
+      if (annotation != null) {
+        switch (annotation) {
+          case LIST:
+            return visitList(partnerType, group, visitor);
+          case MAP:
+            return visitMap(partnerType, group, visitor);
+          default:
+        }
+      }
+      return visitor.struct(partnerType, group, visitFields(partnerType, 
group, visitor));
+    }
+  }
+
+  private static <P, T> T visitList(P list, GroupType group, 
ParquetTypeWithPartnerVisitor<P, T> visitor) {
+    Preconditions.checkArgument(!group.isRepetition(Type.Repetition.REPEATED),
+        "Invalid list: top-level group is repeated: %s", group);
+    Preconditions.checkArgument(group.getFieldCount() == 1,
+        "Invalid list: does not contain single repeated field: %s", group);
+
+    GroupType repeatedElement = group.getFields().get(0).asGroupType();
+    
Preconditions.checkArgument(repeatedElement.isRepetition(Type.Repetition.REPEATED),
+        "Invalid list: inner group is not repeated");
+    Preconditions.checkArgument(repeatedElement.getFieldCount() <= 1,
+        "Invalid list: repeated group is not a single field: %s", group);
+
+    visitor.beforeRepeatedElement(repeatedElement);
+    try {
+      T elementResult = null;
+      if (repeatedElement.getFieldCount() > 0) {
+        Type elementField = repeatedElement.getType(0);
+        try {
+          visitor.beforeElementField(elementField);
+          elementResult = visit(visitor.arrayElementType(list), elementField, 
visitor);
+        } finally {
+          visitor.afterElementField(elementField);
+        }
+      }
+      return visitor.list(list, group, elementResult);
+    } finally {
+      visitor.afterRepeatedElement(repeatedElement);
+    }
+  }
+
+  private static <P, T> T visitMap(P map, GroupType group, 
ParquetTypeWithPartnerVisitor<P, T> visitor) {
+    Preconditions.checkArgument(!group.isRepetition(Type.Repetition.REPEATED),
+        "Invalid map: top-level group is repeated: %s", group);
+    Preconditions.checkArgument(group.getFieldCount() == 1,
+        "Invalid map: does not contain single repeated field: %s", group);
+
+    GroupType repeatedKeyValue = group.getType(0).asGroupType();
+    
Preconditions.checkArgument(repeatedKeyValue.isRepetition(Type.Repetition.REPEATED),
+        "Invalid map: inner group is not repeated");
+    Preconditions.checkArgument(repeatedKeyValue.getFieldCount() <= 2,
+        "Invalid map: repeated group does not have 2 fields");
+
+    visitor.beforeRepeatedKeyValue(repeatedKeyValue);
+    try {
+      T keyResult = null;
+      T valueResult = null;
+      switch (repeatedKeyValue.getFieldCount()) {
+        case 2:
+          // if there are 2 fields, both key and value are projected
+          Type keyType = repeatedKeyValue.getType(0);
+          visitor.beforeKeyField(keyType);
+          try {
+            keyResult = visit(visitor.mapKeyType(map), keyType, visitor);
+          } finally {
+            visitor.afterKeyField(keyType);
+          }
+          Type valueType = repeatedKeyValue.getType(1);
+          visitor.beforeValueField(valueType);
+          try {
+            valueResult = visit(visitor.mapValueType(map), 
repeatedKeyValue.getType(1), visitor);
+          } finally {
+            visitor.afterValueField(valueType);
+          }
+          break;
+        case 1:
+          // if there is just one, use the name to determine what it is
+          Type keyOrValue = repeatedKeyValue.getType(0);
+          if (keyOrValue.getName().equalsIgnoreCase("key")) {
+            visitor.beforeKeyField(keyOrValue);
+            try {
+              keyResult = visit(visitor.mapKeyType(map), keyOrValue, visitor);
+            } finally {
+              visitor.afterKeyField(keyOrValue);
+            }
+            // value result remains null
+          } else {
+            visitor.beforeValueField(keyOrValue);
+            try {
+              valueResult = visit(visitor.mapValueType(map), keyOrValue, 
visitor);
+            } finally {
+              visitor.afterValueField(keyOrValue);
+            }
+            // key result remains null
+          }
+          break;
+        default:
+          // both results will remain null
+      }
+
+      return visitor.map(map, group, keyResult, valueResult);
+
+    } finally {
+      visitor.afterRepeatedKeyValue(repeatedKeyValue);
+    }
+  }
+
+  protected static <P, T> List<T> visitFields(P struct, GroupType group, 
ParquetTypeWithPartnerVisitor<P, T> visitor) {
+    List<T> results = 
Lists.newArrayListWithExpectedSize(group.getFieldCount());
+    for (int i = 0; i < group.getFieldCount(); i += 1) {
+      Type field = group.getFields().get(i);
+      visitor.beforeField(field);
+      Integer fieldId = field.getId() == null ? null : 
field.getId().intValue();
+      results.add(visit(visitor.fieldType(struct, i, fieldId), field, 
visitor));
+      visitor.afterField(field);

Review comment:
       Yes, I think so. Nice catch!




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to