edgarRd commented on a change in pull request #1167:
URL: https://github.com/apache/iceberg/pull/1167#discussion_r450460429
##########
File path: orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java
##########
@@ -209,64 +206,17 @@ private static Metrics buildOrcMetrics(final long
numOfRows, final TypeDescripti
return Optional.ofNullable(Conversions.toByteBuffer(column.type(), max));
}
- private static Set<TypeDescription> findColumnsInContainers(Schema schema,
- TypeDescription
orcSchema) {
- ColumnsInContainersVisitor visitor = new ColumnsInContainersVisitor();
- OrcSchemaWithTypeVisitor.visit(schema, orcSchema, visitor);
- return visitor.getColumnsInContainers();
+ private static Set<Integer> statsColumns(TypeDescription schema) {
+ return OrcSchemaVisitor.visit(schema, new StatsColumnsVisitor());
}
- private static class ColumnsInContainersVisitor extends
OrcSchemaWithTypeVisitor<TypeDescription> {
-
- private final Set<TypeDescription> columnsInContainers;
-
- private ColumnsInContainersVisitor() {
- columnsInContainers = Sets.newHashSet();
- }
-
- public Set<TypeDescription> getColumnsInContainers() {
- return columnsInContainers;
- }
-
- private Set<TypeDescription> flatten(TypeDescription rootType) {
- if (rootType == null) {
- return ImmutableSet.of();
- }
-
- final Set<TypeDescription> flatTypes =
Sets.newHashSetWithExpectedSize(rootType.getMaximumId());
- final Queue<TypeDescription> queue = Queues.newLinkedBlockingQueue();
- queue.add(rootType);
- while (!queue.isEmpty()) {
- TypeDescription type = queue.remove();
- flatTypes.add(type);
-
queue.addAll(Optional.ofNullable(type.getChildren()).orElse(ImmutableList.of()));
- }
- return flatTypes;
- }
-
- @Override
- public TypeDescription record(Types.StructType iStruct, TypeDescription
record,
- List<String> names, List<TypeDescription>
fields) {
- return record;
- }
-
- @Override
- public TypeDescription list(Types.ListType iList, TypeDescription array,
TypeDescription element) {
- columnsInContainers.addAll(flatten(element));
- return array;
- }
-
- @Override
- public TypeDescription map(Types.MapType iMap, TypeDescription map,
- TypeDescription key, TypeDescription value) {
- columnsInContainers.addAll(flatten(key));
- columnsInContainers.addAll(flatten(value));
- return map;
- }
-
+ private static class StatsColumnsVisitor extends
OrcSchemaVisitor<Set<Integer>> {
@Override
- public TypeDescription primitive(Type.PrimitiveType iPrimitive,
TypeDescription primitive) {
- return primitive;
+ public Set<Integer> record(TypeDescription record, List<String> names,
List<Set<Integer>> fields) {
+ ImmutableSet.Builder<Integer> result = ImmutableSet.builder();
+ fields.stream().filter(Objects::nonNull).forEach(result::addAll);
+
record.getChildren().stream().map(ORCSchemaUtil::fieldId).forEach(result::add);
Review comment:
If the ORC files are written by Iceberg it should be fine. I was
thinking for the case of importing existing ORC files although we'd need to
implement name mapping fallback strategy.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]