paul-rogers commented on a change in pull request #2026: DRILL-7330: Implement 
metadata usage for all format plugins
URL: https://github.com/apache/drill/pull/2026#discussion_r392610460
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
 ##########
 @@ -292,39 +289,86 @@ public static int getPartitionDepth(FileSelection 
selection) {
    * @param includeFileImplicitColumns if file implicit columns should be 
included into the result
    * @param fs                         file system
    * @param index                      index of row group to populate
+   * @param start                      start of row group to populate
+   * @param length                     length of row group to populate
    * @return implicit columns map
    */
   public Map<String, String> populateImplicitAndInternalColumns(Path filePath,
       List<String> partitionValues, boolean includeFileImplicitColumns, 
FileSystem fs, int index, long start, long length) {
 
     Map<String, String> implicitValues =
-        new LinkedHashMap<>(populateImplicitColumns(filePath, partitionValues, 
includeFileImplicitColumns));
+        new LinkedHashMap<>(populateImplicitAndInternalColumns(filePath, 
partitionValues, includeFileImplicitColumns, fs));
 
-    selectedInternalColumns.forEach((key, value) -> {
-      switch (value) {
+    selectedInternalColumns.forEach(
+        (key, value) -> implicitValues.put(key, getImplicitColumnValue(value, 
filePath, fs, index, start, length)));
+
+    return implicitValues;
+  }
+
+  /**
+   * Returns implicit column value for specified implicit file column.
+   *
+   * @param column   implicit file column
+   * @param filePath file path, used to populate file implicit columns
+   * @param fs       file system
+   * @param index    row group index
+   * @param start    row group start
+   * @param length   row group length
+   * @return implicit column value for specified implicit file column
+   */
+  private static String getImplicitColumnValue(ImplicitFileColumn column, Path 
filePath,
+      FileSystem fs, Integer index, Long start, Long length) {
+    if (column instanceof ImplicitFileColumns) {
+      ImplicitFileColumns fileColumn = (ImplicitFileColumns) column;
+      return fileColumn.getValue(filePath);
+    } else if (column instanceof ImplicitInternalFileColumns) {
+      ImplicitInternalFileColumns fileColumn = (ImplicitInternalFileColumns) 
column;
+      switch (fileColumn) {
         case ROW_GROUP_INDEX:
-          implicitValues.put(key, String.valueOf(index));
-          break;
+          return index != null ? String.valueOf(index) : null;
         case ROW_GROUP_START:
-          implicitValues.put(key, String.valueOf(start));
-          break;
+          return start != null ? String.valueOf(start) : null;
         case ROW_GROUP_LENGTH:
-          implicitValues.put(key, String.valueOf(length));
-          break;
+          return length != null ? String.valueOf(length) : null;
         case PROJECT_METADATA:
-          implicitValues.put(key, Boolean.TRUE.toString());
-          break;
+          return Boolean.TRUE.toString();
         case LAST_MODIFIED_TIME:
           try {
-            implicitValues.put(key, 
String.valueOf(fs.getFileStatus(filePath).getModificationTime()));
+            return fs != null ? 
String.valueOf(fs.getFileStatus(filePath).getModificationTime()) : null;
           } catch (IOException e) {
             throw new DrillRuntimeException(e);
           }
-          break;
       }
-    });
+    }
+    return null;
+  }
 
-    return implicitValues;
+  /**
+   * Returns implicit column value for specified implicit file column.
+   *
+   * @param column   implicit file column
+   * @param filePath file path
+   * @param fs       file system
+   * @return implicit column value for specified implicit file column
+   */
+  public static String getImplicitColumnValue(ImplicitFileColumn column, Path 
filePath, FileSystem fs) {
+    return getImplicitColumnValue(column, filePath, fs, null, null, null);
+  }
+
+  /**
+   * Returns list of implicit file columns which includes all elements from 
{@link ImplicitFileColumns},
+   * {@link ImplicitInternalFileColumns#LAST_MODIFIED_TIME} and {@link 
ImplicitInternalFileColumns#PROJECT_METADATA}
+   * columns.
+   *
+   * @return list of implicit file columns
+   */
+  public static List<ImplicitFileColumn> getImplicitFileColumns() {
+    List<ImplicitFileColumn> implicitColumns = new ArrayList<>();
+    Collections.addAll(implicitColumns, ImplicitFileColumns.values());
+
+    implicitColumns.add(ImplicitInternalFileColumns.LAST_MODIFIED_TIME);
+    implicitColumns.add(ImplicitInternalFileColumns.PROJECT_METADATA);
 
 Review comment:
   Please explain this one. What does it mean and how is it used? I'm a bit 
confused because the reader does not know how its data will be used. When would 
the reader/scan set the value for this field? Only in that EOF case discussed 
above? If so, then may not be needed for the "new" scan. (And, perhaps we 
should fix the case in the old one since it seems silly to include billions of 
copies of this value so we can mark one or two empty batches, if I understand 
the meaning correctly.)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to