vvysotskyi commented on a change in pull request #2026: DRILL-7330: Implement metadata usage for all format plugins URL: https://github.com/apache/drill/pull/2026#discussion_r392721861
########## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java ########## @@ -292,39 +289,86 @@ public static int getPartitionDepth(FileSelection selection) { * @param includeFileImplicitColumns if file implicit columns should be included into the result * @param fs file system * @param index index of row group to populate + * @param start start of row group to populate + * @param length length of row group to populate * @return implicit columns map */ public Map<String, String> populateImplicitAndInternalColumns(Path filePath, List<String> partitionValues, boolean includeFileImplicitColumns, FileSystem fs, int index, long start, long length) { Map<String, String> implicitValues = - new LinkedHashMap<>(populateImplicitColumns(filePath, partitionValues, includeFileImplicitColumns)); + new LinkedHashMap<>(populateImplicitAndInternalColumns(filePath, partitionValues, includeFileImplicitColumns, fs)); - selectedInternalColumns.forEach((key, value) -> { - switch (value) { + selectedInternalColumns.forEach( + (key, value) -> implicitValues.put(key, getImplicitColumnValue(value, filePath, fs, index, start, length))); + + return implicitValues; + } + + /** + * Returns implicit column value for specified implicit file column. + * + * @param column implicit file column + * @param filePath file path, used to populate file implicit columns + * @param fs file system + * @param index row group index + * @param start row group start + * @param length row group length + * @return implicit column value for specified implicit file column + */ + private static String getImplicitColumnValue(ImplicitFileColumn column, Path filePath, + FileSystem fs, Integer index, Long start, Long length) { + if (column instanceof ImplicitFileColumns) { + ImplicitFileColumns fileColumn = (ImplicitFileColumns) column; + return fileColumn.getValue(filePath); + } else if (column instanceof ImplicitInternalFileColumns) { + ImplicitInternalFileColumns fileColumn = (ImplicitInternalFileColumns) column; + switch (fileColumn) { case ROW_GROUP_INDEX: - implicitValues.put(key, String.valueOf(index)); - break; + return index != null ? String.valueOf(index) : null; case ROW_GROUP_START: - implicitValues.put(key, String.valueOf(start)); - break; + return start != null ? String.valueOf(start) : null; case ROW_GROUP_LENGTH: - implicitValues.put(key, String.valueOf(length)); - break; + return length != null ? String.valueOf(length) : null; case PROJECT_METADATA: - implicitValues.put(key, Boolean.TRUE.toString()); - break; + return Boolean.TRUE.toString(); case LAST_MODIFIED_TIME: try { - implicitValues.put(key, String.valueOf(fs.getFileStatus(filePath).getModificationTime())); + return fs != null ? String.valueOf(fs.getFileStatus(filePath).getModificationTime()) : null; } catch (IOException e) { throw new DrillRuntimeException(e); } - break; } - }); + } + return null; + } - return implicitValues; + /** + * Returns implicit column value for specified implicit file column. + * + * @param column implicit file column + * @param filePath file path + * @param fs file system + * @return implicit column value for specified implicit file column + */ + public static String getImplicitColumnValue(ImplicitFileColumn column, Path filePath, FileSystem fs) { + return getImplicitColumnValue(column, filePath, fs, null, null, null); + } + + /** + * Returns list of implicit file columns which includes all elements from {@link ImplicitFileColumns}, + * {@link ImplicitInternalFileColumns#LAST_MODIFIED_TIME} and {@link ImplicitInternalFileColumns#PROJECT_METADATA} + * columns. + * + * @return list of implicit file columns + */ + public static List<ImplicitFileColumn> getImplicitFileColumns() { + List<ImplicitFileColumn> implicitColumns = new ArrayList<>(); + Collections.addAll(implicitColumns, ImplicitFileColumns.values()); + + implicitColumns.add(ImplicitInternalFileColumns.LAST_MODIFIED_TIME); + implicitColumns.add(ImplicitInternalFileColumns.PROJECT_METADATA); Review comment: Yes, that's correct that reader will set this column value only for the case of EOF. I have made changes to populate the non-null column value only for the case of EOF for the old scan, so this value wouldn't be copied. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services