ihuzenko commented on a change in pull request #1810: DRILL-7271: Refactor
Metadata interfaces and classes to contain all needed information for the File
based Metastore
URL: https://github.com/apache/drill/pull/1810#discussion_r296225077
##########
File path:
exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/BaseParquetMetadataProvider.java
##########
@@ -361,6 +365,102 @@ public FileMetadata getFileMetadata(Path location) {
return new ArrayList<>(getFilesMetadataMap().values());
}
+ @Override
+ public List<SegmentMetadata> getSegmentsMetadata() {
+ return new ArrayList<>(getSegmentsMetadataMap().values());
+ }
+
+ @Override
+ public Map<Path, SegmentMetadata> getSegmentsMetadataMap() {
+ if (segments == null) {
+ if (entries.isEmpty() || !collectMetadata) {
+ return Collections.emptyMap();
+ }
+
+ segments = new LinkedHashMap<>();
+
+ Path fileLocation = getFilesMetadata().iterator().next().getPath();
+ int levelsCount = fileLocation.depth() - tableLocation.depth();
+
+ Map<Path, FileMetadata> filesMetadata = getFilesMetadataMap();
+ int segmentsIndex = levelsCount - 1;
+ Map<Path, SegmentMetadata> segmentMetadata =
getSegmentsForMetadata(filesMetadata,
+ SchemaPath.getSimplePath(MetadataInfo.DEFAULT_COLUMN_PREFIX +
segmentsIndex));
+ segments.putAll(segmentMetadata);
+ for (int i = segmentsIndex - 1; i >= 0; i--) {
+ String segmentColumn = MetadataInfo.DEFAULT_COLUMN_PREFIX + i;
+ segmentMetadata = getMetadataForSegments(segmentMetadata,
+ SchemaPath.getSimplePath(segmentColumn));
+ segments.putAll(segmentMetadata);
+ }
+
+ }
+ return segments;
+ }
+
+ private static <T extends BaseMetadata & LocationProvider> Map<Path,
SegmentMetadata> getSegmentsForMetadata(
+ Map<Path, T> metadata, SchemaPath column) {
+ Multimap<Path, T> metadataMultimap = LinkedListMultimap.create();
+ metadata.forEach((key, value) -> metadataMultimap.put(key.getParent(),
value));
+
+ Map<Path, SegmentMetadata> result = new HashMap<>();
+ metadataMultimap.asMap().forEach((key, value) -> result.put(key,
combineToSegmentMetadata(value, column)));
+
+ return result;
+ }
+
+ private static Map<Path, SegmentMetadata> getMetadataForSegments(Map<Path,
SegmentMetadata> metadata, SchemaPath column) {
+ Multimap<Path, SegmentMetadata> metadataMultimap =
LinkedListMultimap.create();
+ metadata.forEach((key, value) -> metadataMultimap.put(key.getParent(),
value));
+
+ Map<Path, SegmentMetadata> result = new HashMap<>();
+ metadataMultimap.asMap().forEach((key, value) -> result.put(key,
combineSegmentMetadata(value, column)));
+
+ return result;
+ }
+
+ private static <T extends BaseMetadata & LocationProvider> SegmentMetadata
combineToSegmentMetadata(Collection<T> metadataList, SchemaPath column) {
+ List<Path> metadataLocations = metadataList.stream()
+ .map(metadata -> metadata.getPath()) // used lambda instead of method
reference due to JDK-8141508
+ .collect(Collectors.toList());
+ return combineToSegmentMetadata(metadataList, column, metadataLocations);
+ }
+
+ private static SegmentMetadata
combineSegmentMetadata(Collection<SegmentMetadata> metadataList, SchemaPath
column) {
+ List<Path> metadataLocations = metadataList.stream()
+ .flatMap(metadata -> metadata.getLocations().stream())
+ .collect(Collectors.toList());
+
+ return combineToSegmentMetadata(metadataList, column, metadataLocations);
+ }
+
+ /**
+ * Returns {@link SegmentMetadata} which is combined metadata of list of
specified metadata
+ *
+ * @param metadataList metadata to combine
+ * @param column segment column
+ * @param metadataLocations locations of metadata combined in resulting
segment
+ * @param <T> type of metadata to combine
+ * @return {@link SegmentMetadata} from combined metadata
+ */
+ private static <T extends BaseMetadata & LocationProvider> SegmentMetadata
combineToSegmentMetadata(Collection<T> metadataList,
+ SchemaPath column, List<Path> metadataLocations) {
+ List<StatisticsHolder> segmentStatistics =
+ Collections.singletonList(
+ new StatisticsHolder<>(
+ TableStatisticsKind.ROW_COUNT.mergeStatistics(metadataList),
+ TableStatisticsKind.ROW_COUNT));
+ // this code is used only to collect segment metadata to be used only
during filtering,
+ // so metadata identifier is not required here and in other places in this
class
+ MetadataInfo metadataInfo = new MetadataInfo(MetadataType.SEGMENT,
MetadataInfo.GENERAL_INFO_KEY, null);
+ T firstMetadata = metadataList.iterator().next();
+
+ return new SegmentMetadata(firstMetadata.getTableInfo(), metadataInfo,
column, firstMetadata.getSchema(),
Review comment:
It's 10 arguments constructor. Maybe it's time for builder ? :)
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services