ihuzenko commented on a change in pull request #1844: DRILL-7326: Support 
repeated lists for CTAS parquet format
URL: https://github.com/apache/drill/pull/1844#discussion_r316135192
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRecordWriter.java
 ##########
 @@ -287,40 +290,70 @@ private Type getType(MaterializedField field) {
     DataMode dataMode = field.getType().getMode();
     switch (minorType) {
       case MAP:
-        List<Type> types = Lists.newArrayList();
-        for (MaterializedField childField : field.getChildren()) {
-          types.add(getType(childField));
-        }
+        List<Type> types = getChildrenTypes(field);
         return new GroupType(dataMode == DataMode.REPEATED ? 
Repetition.REPEATED : Repetition.OPTIONAL, field.getName(), types);
       case LIST:
-        MaterializedField elementField = field.getChildren().iterator().next();
+        MaterializedField elementField = getDataField(field);
         ListBuilder<GroupType> listBuilder = org.apache.parquet.schema.Types
             .list(dataMode == DataMode.OPTIONAL ? Repetition.OPTIONAL : 
Repetition.REQUIRED);
         addElementType(listBuilder, elementField);
         GroupType listType = listBuilder.named(field.getName());
         return listType;
       case NULL:
         MaterializedField newField = field.withType(
-          
TypeProtos.MajorType.newBuilder().setMinorType(MinorType.INT).setMode(DataMode.OPTIONAL).build());
+            
TypeProtos.MajorType.newBuilder().setMinorType(MinorType.INT).setMode(DataMode.OPTIONAL).build());
         return getPrimitiveType(newField);
       default:
         return getPrimitiveType(field);
     }
   }
 
+  /**
+   * Helper method for conversion of map child
+   * fields.
+   *
+   * @param field map
+   * @return converted child fields
+   */
+  private List<Type> getChildrenTypes(MaterializedField field) {
+    return field.getChildren().stream()
+        .map(this::getType)
+        .collect(Collectors.toList());
+  }
+
+  /**
+   * Finds data child field of list or repeated type.
+   *
+   * @param field parent repeated field
+   * @return child data field
+   */
+  private MaterializedField getDataField(MaterializedField field) {
+    return field.getChildren().stream()
+        .filter(child -> 
BaseRepeatedValueVector.DATA_VECTOR_NAME.equals(child.getName()))
+        .findFirst()
 
 Review comment:
   What's wrong with ```findFirst``` if this is simply sequential stream with 
one data and possibly one offset field ? 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to