rdblue commented on a change in pull request #1140:
URL: https://github.com/apache/iceberg/pull/1140#discussion_r448026091



##########
File path: orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
##########
@@ -403,114 +405,150 @@ private static boolean isRequired(TypeDescription 
orcType) {
         Types.NestedField.optional(icebergID, name, type);
   }
 
-  private static Types.NestedField convertOrcToIceberg(TypeDescription 
orcType, String name,
-                                                       TypeUtil.NextID nextID) 
{
+  private static class OrcToIcebergVisitor extends 
OrcSchemaVisitor<Optional<Types.NestedField>> {
 
-    final int icebergID = icebergID(orcType).orElseGet(nextID::get);
-    final boolean isRequired = isRequired(orcType);
+    private final Map<Integer, OrcField> icebergToOrcMapping;
 
-    switch (orcType.getCategory()) {
-      case BOOLEAN:
-        return getIcebergType(icebergID, name, Types.BooleanType.get(), 
isRequired);
-      case BYTE:
-      case SHORT:
-      case INT:
-        return getIcebergType(icebergID, name, Types.IntegerType.get(), 
isRequired);
-      case LONG:
-        String longAttributeValue = 
orcType.getAttributeValue(ICEBERG_LONG_TYPE_ATTRIBUTE);
-        LongType longType = longAttributeValue == null ? LongType.LONG : 
LongType.valueOf(longAttributeValue);
-        switch (longType) {
-          case TIME:
-            return getIcebergType(icebergID, name, Types.TimeType.get(), 
isRequired);
-          case LONG:
-            return getIcebergType(icebergID, name, Types.LongType.get(), 
isRequired);
-          default:
-            throw new IllegalStateException("Invalid Long type found in ORC 
type attribute");
-        }
-      case FLOAT:
-        return getIcebergType(icebergID, name, Types.FloatType.get(), 
isRequired);
-      case DOUBLE:
-        return getIcebergType(icebergID, name, Types.DoubleType.get(), 
isRequired);
-      case STRING:
-      case CHAR:
-      case VARCHAR:
-        return getIcebergType(icebergID, name, Types.StringType.get(), 
isRequired);
-      case BINARY:
-        String binaryAttributeValue = 
orcType.getAttributeValue(ICEBERG_BINARY_TYPE_ATTRIBUTE);
-        BinaryType binaryType = binaryAttributeValue == null ? 
BinaryType.BINARY :
-            BinaryType.valueOf(binaryAttributeValue);
-        switch (binaryType) {
-          case UUID:
-            return getIcebergType(icebergID, name, Types.UUIDType.get(), 
isRequired);
-          case FIXED:
-            int fixedLength = 
Integer.parseInt(orcType.getAttributeValue(ICEBERG_FIELD_LENGTH));
-            return getIcebergType(icebergID, name, 
Types.FixedType.ofLength(fixedLength), isRequired);
-          case BINARY:
-            return getIcebergType(icebergID, name, Types.BinaryType.get(), 
isRequired);
-          default:
-            throw new IllegalStateException("Invalid Binary type found in ORC 
type attribute");
-        }
-      case DATE:
-        return getIcebergType(icebergID, name, Types.DateType.get(), 
isRequired);
-      case TIMESTAMP:
-        return getIcebergType(icebergID, name, 
Types.TimestampType.withoutZone(), isRequired);
-      case TIMESTAMP_INSTANT:
-        return getIcebergType(icebergID, name, Types.TimestampType.withZone(), 
isRequired);
-      case DECIMAL:
-        return getIcebergType(icebergID, name,
-            Types.DecimalType.of(orcType.getPrecision(), orcType.getScale()),
-            isRequired);
-      case STRUCT: {
-        List<String> fieldNames = orcType.getFieldNames();
-        List<TypeDescription> fieldTypes = orcType.getChildren();
-        List<Types.NestedField> fields = new ArrayList<>(fieldNames.size());
-        for (int c = 0; c < fieldNames.size(); ++c) {
-          String childName = fieldNames.get(c);
-          TypeDescription type = fieldTypes.get(c);
-          Types.NestedField field = convertOrcToIceberg(type, childName, 
nextID);
-          fields.add(field);
-        }
+    OrcToIcebergVisitor(Map<Integer, OrcField> icebergToOrcMapping) {

Review comment:
       Also, if field names are passed into the `record` visitor method, why is 
this needed? The visitor could use a before/after field callback to track names:
   
   ```
   private LinkedList<String> fieldNames;
   public beforeField(String name, TypeDescription type) {
      fieldNames.push(name);
   }
   
   public afterField(String name, TypeDescription type) {
     fieldNames.pop();
   }
   
   private Strign currentFieldName() {
     return fieldNames.peek();
   }
   ```

##########
File path: orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
##########
@@ -403,114 +405,150 @@ private static boolean isRequired(TypeDescription 
orcType) {
         Types.NestedField.optional(icebergID, name, type);
   }
 
-  private static Types.NestedField convertOrcToIceberg(TypeDescription 
orcType, String name,
-                                                       TypeUtil.NextID nextID) 
{
+  private static class OrcToIcebergVisitor extends 
OrcSchemaVisitor<Optional<Types.NestedField>> {
 
-    final int icebergID = icebergID(orcType).orElseGet(nextID::get);
-    final boolean isRequired = isRequired(orcType);
+    private final Map<Integer, OrcField> icebergToOrcMapping;
 
-    switch (orcType.getCategory()) {
-      case BOOLEAN:
-        return getIcebergType(icebergID, name, Types.BooleanType.get(), 
isRequired);
-      case BYTE:
-      case SHORT:
-      case INT:
-        return getIcebergType(icebergID, name, Types.IntegerType.get(), 
isRequired);
-      case LONG:
-        String longAttributeValue = 
orcType.getAttributeValue(ICEBERG_LONG_TYPE_ATTRIBUTE);
-        LongType longType = longAttributeValue == null ? LongType.LONG : 
LongType.valueOf(longAttributeValue);
-        switch (longType) {
-          case TIME:
-            return getIcebergType(icebergID, name, Types.TimeType.get(), 
isRequired);
-          case LONG:
-            return getIcebergType(icebergID, name, Types.LongType.get(), 
isRequired);
-          default:
-            throw new IllegalStateException("Invalid Long type found in ORC 
type attribute");
-        }
-      case FLOAT:
-        return getIcebergType(icebergID, name, Types.FloatType.get(), 
isRequired);
-      case DOUBLE:
-        return getIcebergType(icebergID, name, Types.DoubleType.get(), 
isRequired);
-      case STRING:
-      case CHAR:
-      case VARCHAR:
-        return getIcebergType(icebergID, name, Types.StringType.get(), 
isRequired);
-      case BINARY:
-        String binaryAttributeValue = 
orcType.getAttributeValue(ICEBERG_BINARY_TYPE_ATTRIBUTE);
-        BinaryType binaryType = binaryAttributeValue == null ? 
BinaryType.BINARY :
-            BinaryType.valueOf(binaryAttributeValue);
-        switch (binaryType) {
-          case UUID:
-            return getIcebergType(icebergID, name, Types.UUIDType.get(), 
isRequired);
-          case FIXED:
-            int fixedLength = 
Integer.parseInt(orcType.getAttributeValue(ICEBERG_FIELD_LENGTH));
-            return getIcebergType(icebergID, name, 
Types.FixedType.ofLength(fixedLength), isRequired);
-          case BINARY:
-            return getIcebergType(icebergID, name, Types.BinaryType.get(), 
isRequired);
-          default:
-            throw new IllegalStateException("Invalid Binary type found in ORC 
type attribute");
-        }
-      case DATE:
-        return getIcebergType(icebergID, name, Types.DateType.get(), 
isRequired);
-      case TIMESTAMP:
-        return getIcebergType(icebergID, name, 
Types.TimestampType.withoutZone(), isRequired);
-      case TIMESTAMP_INSTANT:
-        return getIcebergType(icebergID, name, Types.TimestampType.withZone(), 
isRequired);
-      case DECIMAL:
-        return getIcebergType(icebergID, name,
-            Types.DecimalType.of(orcType.getPrecision(), orcType.getScale()),
-            isRequired);
-      case STRUCT: {
-        List<String> fieldNames = orcType.getFieldNames();
-        List<TypeDescription> fieldTypes = orcType.getChildren();
-        List<Types.NestedField> fields = new ArrayList<>(fieldNames.size());
-        for (int c = 0; c < fieldNames.size(); ++c) {
-          String childName = fieldNames.get(c);
-          TypeDescription type = fieldTypes.get(c);
-          Types.NestedField field = convertOrcToIceberg(type, childName, 
nextID);
-          fields.add(field);
-        }
+    OrcToIcebergVisitor(Map<Integer, OrcField> icebergToOrcMapping) {

Review comment:
       Also, if field names are passed into the `record` visitor method, why is 
this needed? The visitor could use a before/after field callback to track names:
   
   ```java
   private LinkedList<String> fieldNames;
   
   public beforeField(String name, TypeDescription type) {
      fieldNames.push(name);
   }
   
   public afterField(String name, TypeDescription type) {
     fieldNames.pop();
   }
   
   private Strign currentFieldName() {
     return fieldNames.peek();
   }
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to