rdblue commented on a change in pull request #207: Add external schema mappings 
for files written with name-based schemas #40
URL: https://github.com/apache/incubator-iceberg/pull/207#discussion_r331300121
 
 

 ##########
 File path: core/src/main/java/org/apache/iceberg/avro/PruneColumns.java
 ##########
 @@ -138,28 +166,98 @@ public Schema array(Schema array, Schema element) {
 
   @Override
   public Schema map(Schema map, Schema value) {
-    int keyId = AvroSchemaUtil.getKeyId(map);
-    int valueId = AvroSchemaUtil.getValueId(map);
+    Integer keyId = id(map, AvroSchemaUtil.KEY_ID_PROP, "key");
+    Integer valueId = id(map, AvroSchemaUtil.VALUE_ID_PROP, "value");
+    if (keyId == null) {
+      if (valueId != null) {
+        LOG.warn("Map schema {} has value-id but not key-id", map);
+      }
+      return null;
+    }
     // if either key or value is selected, the whole map must be projected
     if (selectedIds.contains(keyId) || selectedIds.contains(valueId)) {
-      return map;
+      // Assign ids. Ids may not always be present in the schema
+      return mapWithIds(map, keyId, valueId);
     } else if (value != null) {
       if (value != map.getValueType()) {
         // the value must be a projection
-        return Schema.createMap(value);
+        return mapWithIds(Schema.createMap(value), keyId, valueId);
       }
       return map;
     }
 
     return null;
   }
 
+  private Schema arrayWithId(Schema array, Integer elementId) {
+    if (!AvroSchemaUtil.hasProperty(array, AvroSchemaUtil.ELEMENT_ID_PROP)) {
+      Schema result = Schema.createArray(array.getElementType());
+      result.addProp(AvroSchemaUtil.ELEMENT_ID_PROP, elementId);
+      return result;
+    }
+    return array;
+  }
+
+  private Schema complexMapWithIds(Schema map, Integer keyId, Integer valueId) 
{
+    Schema keyValue = map.getElementType();
+    if (!AvroSchemaUtil.hasFieldId(keyValue.getField("key"))) {
+      return AvroSchemaUtil.createMap(
+          keyId, keyValue.getField("key").schema(),
+          valueId, keyValue.getField("value").schema());
+    }
+    return map;
+  }
+
+  private Schema mapWithIds(Schema map, Integer keyId, Integer valueId) {
+    if (!AvroSchemaUtil.hasProperty(map, AvroSchemaUtil.KEY_ID_PROP)) {
+      Schema result = Schema.createMap(map.getValueType());
+      result.addProp(AvroSchemaUtil.KEY_ID_PROP, keyId);
+      result.addProp(AvroSchemaUtil.VALUE_ID_PROP, valueId);
+      return result;
+    }
+    return map;
+  }
+
   @Override
   public Schema primitive(Schema primitive) {
     // primitives are not selected directly
     return null;
   }
 
+  private Integer id(Schema schema, String propertyName, String mappedName) {
+    if (AvroSchemaUtil.hasProperty(schema, propertyName)) {
+      return AvroSchemaUtil.getId(schema, propertyName);
+    } else {
+      MappedField mappedField = mappedField(mappedName);
+      if (mappedField != null) {
+        return mappedField.id();
+      } else {
+        return null;
+      }
+    }
+  }
+
+  private Integer fieldId(Schema.Field field) {
 
 Review comment:
   What about updating `AvroSchemaUtil.getFieldId` to accept a mapping? Is that 
used in more places where you wouldn't want to add a mapping? Seems like the 
changes might be simpler if the mapping was supported in util.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to