edgarRd commented on a change in pull request #1140:
URL: https://github.com/apache/iceberg/pull/1140#discussion_r447132389



##########
File path: orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
##########
@@ -403,114 +398,150 @@ private static boolean isRequired(TypeDescription 
orcType) {
         Types.NestedField.optional(icebergID, name, type);
   }
 
-  private static Types.NestedField convertOrcToIceberg(TypeDescription 
orcType, String name,
-                                                       TypeUtil.NextID nextID) 
{
+  private static class OrcToIcebergVisitor extends 
OrcSchemaWithTypeVisitor<Optional<Types.NestedField>> {
 
-    final int icebergID = icebergID(orcType).orElseGet(nextID::get);
-    final boolean isRequired = isRequired(orcType);
+    private final Map<Integer, OrcField> icebergToOrcMapping;
 
-    switch (orcType.getCategory()) {
-      case BOOLEAN:
-        return getIcebergType(icebergID, name, Types.BooleanType.get(), 
isRequired);
-      case BYTE:
-      case SHORT:
-      case INT:
-        return getIcebergType(icebergID, name, Types.IntegerType.get(), 
isRequired);
-      case LONG:
-        String longAttributeValue = 
orcType.getAttributeValue(ICEBERG_LONG_TYPE_ATTRIBUTE);
-        LongType longType = longAttributeValue == null ? LongType.LONG : 
LongType.valueOf(longAttributeValue);
-        switch (longType) {
-          case TIME:
-            return getIcebergType(icebergID, name, Types.TimeType.get(), 
isRequired);
-          case LONG:
-            return getIcebergType(icebergID, name, Types.LongType.get(), 
isRequired);
-          default:
-            throw new IllegalStateException("Invalid Long type found in ORC 
type attribute");
-        }
-      case FLOAT:
-        return getIcebergType(icebergID, name, Types.FloatType.get(), 
isRequired);
-      case DOUBLE:
-        return getIcebergType(icebergID, name, Types.DoubleType.get(), 
isRequired);
-      case STRING:
-      case CHAR:
-      case VARCHAR:
-        return getIcebergType(icebergID, name, Types.StringType.get(), 
isRequired);
-      case BINARY:
-        String binaryAttributeValue = 
orcType.getAttributeValue(ICEBERG_BINARY_TYPE_ATTRIBUTE);
-        BinaryType binaryType = binaryAttributeValue == null ? 
BinaryType.BINARY :
-            BinaryType.valueOf(binaryAttributeValue);
-        switch (binaryType) {
-          case UUID:
-            return getIcebergType(icebergID, name, Types.UUIDType.get(), 
isRequired);
-          case FIXED:
-            int fixedLength = 
Integer.parseInt(orcType.getAttributeValue(ICEBERG_FIELD_LENGTH));
-            return getIcebergType(icebergID, name, 
Types.FixedType.ofLength(fixedLength), isRequired);
-          case BINARY:
-            return getIcebergType(icebergID, name, Types.BinaryType.get(), 
isRequired);
-          default:
-            throw new IllegalStateException("Invalid Binary type found in ORC 
type attribute");
-        }
-      case DATE:
-        return getIcebergType(icebergID, name, Types.DateType.get(), 
isRequired);
-      case TIMESTAMP:
-        return getIcebergType(icebergID, name, 
Types.TimestampType.withoutZone(), isRequired);
-      case TIMESTAMP_INSTANT:
-        return getIcebergType(icebergID, name, Types.TimestampType.withZone(), 
isRequired);
-      case DECIMAL:
-        return getIcebergType(icebergID, name,
-            Types.DecimalType.of(orcType.getPrecision(), orcType.getScale()),
-            isRequired);
-      case STRUCT: {
-        List<String> fieldNames = orcType.getFieldNames();
-        List<TypeDescription> fieldTypes = orcType.getChildren();
-        List<Types.NestedField> fields = new ArrayList<>(fieldNames.size());
-        for (int c = 0; c < fieldNames.size(); ++c) {
-          String childName = fieldNames.get(c);
-          TypeDescription type = fieldTypes.get(c);
-          Types.NestedField field = convertOrcToIceberg(type, childName, 
nextID);
-          fields.add(field);
-        }
+    OrcToIcebergVisitor(Map<Integer, OrcField> icebergToOrcMapping) {
+      this.icebergToOrcMapping = icebergToOrcMapping;
+    }
 
-        return getIcebergType(icebergID, name, Types.StructType.of(fields), 
isRequired);
+    @Override
+    public Optional<Types.NestedField> record(Types.StructType iStruct, 
TypeDescription record, List<String> names,
+                                              
List<Optional<Types.NestedField>> fields) {
+      boolean isRequired = isRequired(record);
+      Optional<Integer> icebergIdOpt = icebergID(record);
+      if (!icebergIdOpt.isPresent() || fields.size() == 0) {
+        return Optional.empty();
       }
-      case LIST: {
-        TypeDescription elementType = orcType.getChildren().get(0);
-        Types.NestedField element = convertOrcToIceberg(elementType, 
"element", nextID);
-
-        Types.ListType listTypeWithElem = isRequired(elementType) ?
-            Types.ListType.ofRequired(element.fieldId(), element.type()) :
-            Types.ListType.ofOptional(element.fieldId(), element.type());
-        return isRequired ?
-            Types.NestedField.required(icebergID, name, listTypeWithElem) :
-            Types.NestedField.optional(icebergID, name, listTypeWithElem);
-      }
-      case MAP: {
-        TypeDescription keyType = orcType.getChildren().get(0);
-        Types.NestedField key = convertOrcToIceberg(keyType, "key", nextID);
-        TypeDescription valueType = orcType.getChildren().get(1);
-        Types.NestedField value = convertOrcToIceberg(valueType, "value", 
nextID);
 
-        Types.MapType mapTypeWithKV = isRequired(valueType) ?
-            Types.MapType.ofRequired(key.fieldId(), value.fieldId(), 
key.type(), value.type()) :
-            Types.MapType.ofOptional(key.fieldId(), value.fieldId(), 
key.type(), value.type());
+      Types.StructType structType = Types.StructType.of(
+          
fields.stream().filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList()));
+      return Optional.of(getIcebergType(icebergIdOpt.get(), 
icebergToOrcMapping.get(icebergIdOpt.get()).name(),
+          structType, isRequired));
+    }
 
-        return getIcebergType(icebergID, name, mapTypeWithKV, isRequired);
+    @Override
+    public Optional<Types.NestedField> list(Types.ListType iList, 
TypeDescription array,
+                                            Optional<Types.NestedField> 
element) {
+      boolean isRequired = isRequired(array);
+      Optional<Integer> icebergIdOpt = icebergID(array);
+
+      if (!icebergIdOpt.isPresent() || !element.isPresent()) {
+        return Optional.empty();
       }
-      default:
-        // We don't have an answer for union types.
-        throw new IllegalArgumentException("Can't handle " + orcType);
+
+      Types.NestedField foundElement = element.get();
+      Types.ListType listTypeWithElem = isRequired(array.getChildren().get(0)) 
?
+          Types.ListType.ofRequired(foundElement.fieldId(), 
foundElement.type()) :
+          Types.ListType.ofOptional(foundElement.fieldId(), 
foundElement.type());
+      return Optional.of(getIcebergType(icebergIdOpt.get(),
+          icebergToOrcMapping.get(icebergIdOpt.get()).name(), 
listTypeWithElem, isRequired));
     }
-  }
 
-  private static int getMaxIcebergId(TypeDescription originalOrcSchema) {
-    int maxId = icebergID(originalOrcSchema).orElse(0);
-    final List<TypeDescription> children = 
Optional.ofNullable(originalOrcSchema.getChildren())
-        .orElse(Collections.emptyList());
-    for (TypeDescription child : children) {
-      maxId = Math.max(maxId, getMaxIcebergId(child));
+    @Override
+    public Optional<Types.NestedField> map(Types.MapType iMap, TypeDescription 
map, Optional<Types.NestedField> key,
+                                           Optional<Types.NestedField> value) {
+      boolean isRequired = isRequired(map);
+      Optional<Integer> icebergIdOpt = icebergID(map);
+
+      if (!icebergIdOpt.isPresent() || !key.isPresent() || !value.isPresent()) 
{
+        return Optional.empty();
+      }
+
+      Types.NestedField foundKey = key.get();
+      Types.NestedField foundValue = value.get();
+      Types.MapType mapTypeWithKV = isRequired(map.getChildren().get(1)) ?
+          Types.MapType.ofRequired(foundKey.fieldId(), foundValue.fieldId(), 
foundKey.type(), foundValue.type()) :
+          Types.MapType.ofOptional(foundKey.fieldId(), foundValue.fieldId(), 
foundKey.type(), foundValue.type());
+
+      return Optional.of(getIcebergType(icebergIdOpt.get(), 
icebergToOrcMapping.get(icebergIdOpt.get()).name(),
+          mapTypeWithKV, isRequired));
     }
 
-    return maxId;
+    @Override
+    public Optional<Types.NestedField> primitive(Type.PrimitiveType 
iPrimitive, TypeDescription primitive) {

Review comment:
       Yeah, I think I can add a `OrcSchemaVisitor` for this case. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to