paul-rogers commented on a change in pull request #2026: DRILL-7330: Implement
metadata usage for all format plugins
URL: https://github.com/apache/drill/pull/2026#discussion_r392610460
##########
File path:
exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
##########
@@ -292,39 +289,86 @@ public static int getPartitionDepth(FileSelection
selection) {
* @param includeFileImplicitColumns if file implicit columns should be
included into the result
* @param fs file system
* @param index index of row group to populate
+ * @param start start of row group to populate
+ * @param length length of row group to populate
* @return implicit columns map
*/
public Map<String, String> populateImplicitAndInternalColumns(Path filePath,
List<String> partitionValues, boolean includeFileImplicitColumns,
FileSystem fs, int index, long start, long length) {
Map<String, String> implicitValues =
- new LinkedHashMap<>(populateImplicitColumns(filePath, partitionValues,
includeFileImplicitColumns));
+ new LinkedHashMap<>(populateImplicitAndInternalColumns(filePath,
partitionValues, includeFileImplicitColumns, fs));
- selectedInternalColumns.forEach((key, value) -> {
- switch (value) {
+ selectedInternalColumns.forEach(
+ (key, value) -> implicitValues.put(key, getImplicitColumnValue(value,
filePath, fs, index, start, length)));
+
+ return implicitValues;
+ }
+
+ /**
+ * Returns implicit column value for specified implicit file column.
+ *
+ * @param column implicit file column
+ * @param filePath file path, used to populate file implicit columns
+ * @param fs file system
+ * @param index row group index
+ * @param start row group start
+ * @param length row group length
+ * @return implicit column value for specified implicit file column
+ */
+ private static String getImplicitColumnValue(ImplicitFileColumn column, Path
filePath,
+ FileSystem fs, Integer index, Long start, Long length) {
+ if (column instanceof ImplicitFileColumns) {
+ ImplicitFileColumns fileColumn = (ImplicitFileColumns) column;
+ return fileColumn.getValue(filePath);
+ } else if (column instanceof ImplicitInternalFileColumns) {
+ ImplicitInternalFileColumns fileColumn = (ImplicitInternalFileColumns)
column;
+ switch (fileColumn) {
case ROW_GROUP_INDEX:
- implicitValues.put(key, String.valueOf(index));
- break;
+ return index != null ? String.valueOf(index) : null;
case ROW_GROUP_START:
- implicitValues.put(key, String.valueOf(start));
- break;
+ return start != null ? String.valueOf(start) : null;
case ROW_GROUP_LENGTH:
- implicitValues.put(key, String.valueOf(length));
- break;
+ return length != null ? String.valueOf(length) : null;
case PROJECT_METADATA:
- implicitValues.put(key, Boolean.TRUE.toString());
- break;
+ return Boolean.TRUE.toString();
case LAST_MODIFIED_TIME:
try {
- implicitValues.put(key,
String.valueOf(fs.getFileStatus(filePath).getModificationTime()));
+ return fs != null ?
String.valueOf(fs.getFileStatus(filePath).getModificationTime()) : null;
} catch (IOException e) {
throw new DrillRuntimeException(e);
}
- break;
}
- });
+ }
+ return null;
+ }
- return implicitValues;
+ /**
+ * Returns implicit column value for specified implicit file column.
+ *
+ * @param column implicit file column
+ * @param filePath file path
+ * @param fs file system
+ * @return implicit column value for specified implicit file column
+ */
+ public static String getImplicitColumnValue(ImplicitFileColumn column, Path
filePath, FileSystem fs) {
+ return getImplicitColumnValue(column, filePath, fs, null, null, null);
+ }
+
+ /**
+ * Returns list of implicit file columns which includes all elements from
{@link ImplicitFileColumns},
+ * {@link ImplicitInternalFileColumns#LAST_MODIFIED_TIME} and {@link
ImplicitInternalFileColumns#PROJECT_METADATA}
+ * columns.
+ *
+ * @return list of implicit file columns
+ */
+ public static List<ImplicitFileColumn> getImplicitFileColumns() {
+ List<ImplicitFileColumn> implicitColumns = new ArrayList<>();
+ Collections.addAll(implicitColumns, ImplicitFileColumns.values());
+
+ implicitColumns.add(ImplicitInternalFileColumns.LAST_MODIFIED_TIME);
+ implicitColumns.add(ImplicitInternalFileColumns.PROJECT_METADATA);
Review comment:
Please explain this one. What does it mean and how is it used? I'm a bit
confused because the reader does not know how its data will be used. When would
the reader/scan set the value for this field? Only in that EOF case discussed
above? If so, then may not be needed for the "new" scan. (And, perhaps we
should fix the case in the old one since it seems silly to include billions of
copies of this value so we can mark one or two empty batches, if I understand
the meaning correctly.)
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services