rdblue commented on a change in pull request #1140:
URL: https://github.com/apache/iceberg/pull/1140#discussion_r448024156



##########
File path: orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
##########
@@ -403,114 +405,150 @@ private static boolean isRequired(TypeDescription 
orcType) {
         Types.NestedField.optional(icebergID, name, type);
   }
 
-  private static Types.NestedField convertOrcToIceberg(TypeDescription 
orcType, String name,
-                                                       TypeUtil.NextID nextID) 
{
+  private static class OrcToIcebergVisitor extends 
OrcSchemaVisitor<Optional<Types.NestedField>> {
 
-    final int icebergID = icebergID(orcType).orElseGet(nextID::get);
-    final boolean isRequired = isRequired(orcType);
+    private final Map<Integer, OrcField> icebergToOrcMapping;
 
-    switch (orcType.getCategory()) {
-      case BOOLEAN:
-        return getIcebergType(icebergID, name, Types.BooleanType.get(), 
isRequired);
-      case BYTE:
-      case SHORT:
-      case INT:
-        return getIcebergType(icebergID, name, Types.IntegerType.get(), 
isRequired);
-      case LONG:
-        String longAttributeValue = 
orcType.getAttributeValue(ICEBERG_LONG_TYPE_ATTRIBUTE);
-        LongType longType = longAttributeValue == null ? LongType.LONG : 
LongType.valueOf(longAttributeValue);
-        switch (longType) {
-          case TIME:
-            return getIcebergType(icebergID, name, Types.TimeType.get(), 
isRequired);
-          case LONG:
-            return getIcebergType(icebergID, name, Types.LongType.get(), 
isRequired);
-          default:
-            throw new IllegalStateException("Invalid Long type found in ORC 
type attribute");
-        }
-      case FLOAT:
-        return getIcebergType(icebergID, name, Types.FloatType.get(), 
isRequired);
-      case DOUBLE:
-        return getIcebergType(icebergID, name, Types.DoubleType.get(), 
isRequired);
-      case STRING:
-      case CHAR:
-      case VARCHAR:
-        return getIcebergType(icebergID, name, Types.StringType.get(), 
isRequired);
-      case BINARY:
-        String binaryAttributeValue = 
orcType.getAttributeValue(ICEBERG_BINARY_TYPE_ATTRIBUTE);
-        BinaryType binaryType = binaryAttributeValue == null ? 
BinaryType.BINARY :
-            BinaryType.valueOf(binaryAttributeValue);
-        switch (binaryType) {
-          case UUID:
-            return getIcebergType(icebergID, name, Types.UUIDType.get(), 
isRequired);
-          case FIXED:
-            int fixedLength = 
Integer.parseInt(orcType.getAttributeValue(ICEBERG_FIELD_LENGTH));
-            return getIcebergType(icebergID, name, 
Types.FixedType.ofLength(fixedLength), isRequired);
-          case BINARY:
-            return getIcebergType(icebergID, name, Types.BinaryType.get(), 
isRequired);
-          default:
-            throw new IllegalStateException("Invalid Binary type found in ORC 
type attribute");
-        }
-      case DATE:
-        return getIcebergType(icebergID, name, Types.DateType.get(), 
isRequired);
-      case TIMESTAMP:
-        return getIcebergType(icebergID, name, 
Types.TimestampType.withoutZone(), isRequired);
-      case TIMESTAMP_INSTANT:
-        return getIcebergType(icebergID, name, Types.TimestampType.withZone(), 
isRequired);
-      case DECIMAL:
-        return getIcebergType(icebergID, name,
-            Types.DecimalType.of(orcType.getPrecision(), orcType.getScale()),
-            isRequired);
-      case STRUCT: {
-        List<String> fieldNames = orcType.getFieldNames();
-        List<TypeDescription> fieldTypes = orcType.getChildren();
-        List<Types.NestedField> fields = new ArrayList<>(fieldNames.size());
-        for (int c = 0; c < fieldNames.size(); ++c) {
-          String childName = fieldNames.get(c);
-          TypeDescription type = fieldTypes.get(c);
-          Types.NestedField field = convertOrcToIceberg(type, childName, 
nextID);
-          fields.add(field);
-        }
+    OrcToIcebergVisitor(Map<Integer, OrcField> icebergToOrcMapping) {
+      this.icebergToOrcMapping = icebergToOrcMapping;
+    }
 
-        return getIcebergType(icebergID, name, Types.StructType.of(fields), 
isRequired);
-      }
-      case LIST: {
-        TypeDescription elementType = orcType.getChildren().get(0);
-        Types.NestedField element = convertOrcToIceberg(elementType, 
"element", nextID);
-
-        Types.ListType listTypeWithElem = isRequired(elementType) ?
-            Types.ListType.ofRequired(element.fieldId(), element.type()) :
-            Types.ListType.ofOptional(element.fieldId(), element.type());
-        return isRequired ?
-            Types.NestedField.required(icebergID, name, listTypeWithElem) :
-            Types.NestedField.optional(icebergID, name, listTypeWithElem);
+    @Override
+    public Optional<Types.NestedField> record(TypeDescription record, 
List<String> names,
+                                              
List<Optional<Types.NestedField>> fields) {
+      boolean isRequired = isRequired(record);
+      Optional<Integer> icebergIdOpt = icebergID(record);
+      if (!icebergIdOpt.isPresent() || fields.size() == 0) {

Review comment:
       Should this also check that any of the fields are defined?
   
   ```
   if (!icebergIdOpt.isPresent() || 
!fields.stream().anyMatch(Optional::isPresent)) {
     return Optional.empty();
   }
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to