szehon-ho commented on code in PR #7539:
URL: https://github.com/apache/iceberg/pull/7539#discussion_r1191925271
##########
core/src/main/java/org/apache/iceberg/BaseEntriesTable.java:
##########
@@ -125,31 +130,120 @@ ManifestFile manifest() {
@Override
public CloseableIterable<StructLike> rows() {
- // Project data-file fields
- CloseableIterable<StructLike> prunedRows;
- if (manifest.content() == ManifestContent.DATA) {
- prunedRows =
+ Types.NestedField readableMetricsField =
projection.findField(MetricsUtil.READABLE_METRICS);
+
+ if (readableMetricsField == null) {
+ CloseableIterable<StructLike> entryAsStruct =
CloseableIterable.transform(
- ManifestFiles.read(manifest, io).project(fileSchema).entries(),
- file -> (GenericManifestEntry<DataFile>) file);
+ entries(fileProjection),
+ entry -> (GenericManifestEntry<? extends ContentFile<?>>)
entry);
+
+ StructProjection structProjection = projectNonReadable(projection);
+ return CloseableIterable.transform(entryAsStruct,
structProjection::wrap);
} else {
- prunedRows =
- CloseableIterable.transform(
- ManifestFiles.readDeleteManifest(manifest, io, specsById)
- .project(fileSchema)
- .entries(),
- file -> (GenericManifestEntry<DeleteFile>) file);
+ Schema requiredFileProjection = requiredFileProjection();
+ Schema actualProjection = removeReadableMetrics(readableMetricsField);
+ StructProjection structProjection =
projectNonReadable(actualProjection);
+
+ return CloseableIterable.transform(
+ entries(requiredFileProjection),
+ entry -> withReadableMetrics(structProjection, entry,
readableMetricsField));
}
+ }
+
+ /**
+ * Remove virtual columns from the file projection and ensure that the
underlying metrics used
+ * to create those columns are part of the file projection
+ *
+ * @return file projection with required columns to read readable metrics
+ */
+ private Schema requiredFileProjection() {
+ Schema projectionForReadableMetrics =
+ new Schema(
+ MetricsUtil.READABLE_METRIC_COLS.stream()
+ .map(MetricsUtil.ReadableMetricColDefinition::originalCol)
+ .collect(Collectors.toList()));
+ return TypeUtil.join(fileProjection, projectionForReadableMetrics);
+ }
- // Project non-readable fields
- Schema readSchema = ManifestEntry.wrapFileSchema(fileSchema.asStruct());
- StructProjection projection = StructProjection.create(readSchema,
schema);
- return CloseableIterable.transform(prunedRows, projection::wrap);
+ private Schema removeReadableMetrics(Types.NestedField
readableMetricsField) {
+ Set<Integer> readableMetricsIds =
TypeUtil.getProjectedIds(readableMetricsField.type());
+ return TypeUtil.selectNot(projection, readableMetricsIds);
+ }
+
+ private StructProjection projectNonReadable(Schema projectedSchema) {
Review Comment:
Quesiton, while we make one more change, what do you think of renaming this
method to structProjection()? Realize it's from the original comment but Im
not 100% sure what that was referring to. Looks like it is just a standard
projection of ManifestEntry to me? I may be missing something though
##########
core/src/main/java/org/apache/iceberg/BaseEntriesTable.java:
##########
@@ -125,31 +130,120 @@ ManifestFile manifest() {
@Override
public CloseableIterable<StructLike> rows() {
- // Project data-file fields
- CloseableIterable<StructLike> prunedRows;
- if (manifest.content() == ManifestContent.DATA) {
- prunedRows =
+ Types.NestedField readableMetricsField =
projection.findField(MetricsUtil.READABLE_METRICS);
+
+ if (readableMetricsField == null) {
+ CloseableIterable<StructLike> entryAsStruct =
CloseableIterable.transform(
- ManifestFiles.read(manifest, io).project(fileSchema).entries(),
- file -> (GenericManifestEntry<DataFile>) file);
+ entries(fileProjection),
+ entry -> (GenericManifestEntry<? extends ContentFile<?>>)
entry);
+
+ StructProjection structProjection = projectNonReadable(projection);
+ return CloseableIterable.transform(entryAsStruct,
structProjection::wrap);
} else {
- prunedRows =
- CloseableIterable.transform(
- ManifestFiles.readDeleteManifest(manifest, io, specsById)
- .project(fileSchema)
- .entries(),
- file -> (GenericManifestEntry<DeleteFile>) file);
+ Schema requiredFileProjection = requiredFileProjection();
+ Schema actualProjection = removeReadableMetrics(readableMetricsField);
+ StructProjection structProjection =
projectNonReadable(actualProjection);
+
+ return CloseableIterable.transform(
+ entries(requiredFileProjection),
+ entry -> withReadableMetrics(structProjection, entry,
readableMetricsField));
}
+ }
+
+ /**
+ * Remove virtual columns from the file projection and ensure that the
underlying metrics used
+ * to create those columns are part of the file projection
+ *
+ * @return file projection with required columns to read readable metrics
+ */
+ private Schema requiredFileProjection() {
+ Schema projectionForReadableMetrics =
+ new Schema(
+ MetricsUtil.READABLE_METRIC_COLS.stream()
+ .map(MetricsUtil.ReadableMetricColDefinition::originalCol)
+ .collect(Collectors.toList()));
+ return TypeUtil.join(fileProjection, projectionForReadableMetrics);
+ }
- // Project non-readable fields
- Schema readSchema = ManifestEntry.wrapFileSchema(fileSchema.asStruct());
- StructProjection projection = StructProjection.create(readSchema,
schema);
- return CloseableIterable.transform(prunedRows, projection::wrap);
+ private Schema removeReadableMetrics(Types.NestedField
readableMetricsField) {
+ Set<Integer> readableMetricsIds =
TypeUtil.getProjectedIds(readableMetricsField.type());
+ return TypeUtil.selectNot(projection, readableMetricsIds);
+ }
+
+ private StructProjection projectNonReadable(Schema projectedSchema) {
Review Comment:
Quesiton, while we make one more change, what do you think of renaming this
method to structProjection()? Realize it's from the original comment but Im
not 100% sure what that was referring to, what is non-readable? Looks like it
is just a standard projection of ManifestEntry to me? I may be missing
something though
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]