This is an automated email from the ASF dual-hosted git repository.

amogh-jahagirdar pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new b84b37f430 Core: Replace string-based schema projection with selection 
on field-id (#16184)
b84b37f430 is described below

commit b84b37f430b6c6e6b80e0a11d16c4d842b9da2a0
Author: Hongyue/Steve Zhang <[email protected]>
AuthorDate: Wed May 6 09:09:39 2026 -0700

    Core: Replace string-based schema projection with selection on field-id 
(#16184)
---
 .../org/apache/iceberg/FileCleanupStrategy.java    |  19 +--
 .../java/org/apache/iceberg/ManifestReader.java    |  10 +-
 .../java/org/apache/iceberg/PartitionsTable.java   | 127 +++++++++++++--------
 3 files changed, 95 insertions(+), 61 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/FileCleanupStrategy.java 
b/core/src/main/java/org/apache/iceberg/FileCleanupStrategy.java
index dd92d33cda..573aef057f 100644
--- a/core/src/main/java/org/apache/iceberg/FileCleanupStrategy.java
+++ b/core/src/main/java/org/apache/iceberg/FileCleanupStrategy.java
@@ -26,7 +26,9 @@ import org.apache.iceberg.io.BulkDeletionFailureException;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.io.SupportsBulkOperations;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.types.TypeUtil;
 import org.apache.iceberg.util.Tasks;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -78,14 +80,15 @@ abstract class FileCleanupStrategy {
       ExpireSnapshots.CleanupLevel cleanupLevel);
 
   private static final Schema MANIFEST_PROJECTION =
-      ManifestFile.schema()
-          .select(
-              "manifest_path",
-              "manifest_length",
-              "partition_spec_id",
-              "added_snapshot_id",
-              "added_files_count",
-              "deleted_files_count");
+      TypeUtil.select(
+          ManifestFile.schema(),
+          ImmutableSet.of(
+              ManifestFile.PATH.fieldId(),
+              ManifestFile.LENGTH.fieldId(),
+              ManifestFile.SPEC_ID.fieldId(),
+              ManifestFile.SNAPSHOT_ID.fieldId(),
+              ManifestFile.ADDED_FILES_COUNT.fieldId(),
+              ManifestFile.DELETED_FILES_COUNT.fieldId()));
 
   protected CloseableIterable<ManifestFile> readManifests(Snapshot snapshot) {
     if (snapshot.manifestListLocation() != null) {
diff --git a/core/src/main/java/org/apache/iceberg/ManifestReader.java 
b/core/src/main/java/org/apache/iceberg/ManifestReader.java
index 668a3764de..09bbe8b0cc 100644
--- a/core/src/main/java/org/apache/iceberg/ManifestReader.java
+++ b/core/src/main/java/org/apache/iceberg/ManifestReader.java
@@ -43,6 +43,7 @@ import 
org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.types.TypeUtil;
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.util.PartitionSet;
 import org.slf4j.Logger;
@@ -68,6 +69,11 @@ public class ManifestReader<F extends ContentFile<F>> 
extends CloseableGroup
           "upper_bounds",
           "record_count");
 
+  private static final Schema STATUS_ONLY_PROJECTION =
+      TypeUtil.select(
+          ManifestEntry.getSchema(Types.StructType.of()),
+          ImmutableSet.of(ManifestEntry.STATUS.fieldId()));
+
   protected enum FileType {
     DATA_FILES(GenericDataFile.class),
     DELETE_FILES(GenericDeleteFile.class);
@@ -157,9 +163,7 @@ public class ManifestReader<F extends ContentFile<F>> 
extends CloseableGroup
     Map<String, String> metadata;
     try {
       try (CloseableIterable<ManifestEntry<T>> headerReader =
-          InternalData.read(FileFormat.AVRO, inputFile)
-              
.project(ManifestEntry.getSchema(Types.StructType.of()).select("status"))
-              .build()) {
+          InternalData.read(FileFormat.AVRO, 
inputFile).project(STATUS_ONLY_PROJECTION).build()) {
 
         if (headerReader instanceof AvroIterable) {
           metadata = ((AvroIterable<ManifestEntry<T>>) 
headerReader).getMetadata();
diff --git a/core/src/main/java/org/apache/iceberg/PartitionsTable.java 
b/core/src/main/java/org/apache/iceberg/PartitionsTable.java
index 09c6e7893b..10366db5a5 100644
--- a/core/src/main/java/org/apache/iceberg/PartitionsTable.java
+++ b/core/src/main/java/org/apache/iceberg/PartitionsTable.java
@@ -27,7 +27,9 @@ import java.util.List;
 import org.apache.iceberg.expressions.ManifestEvaluator;
 import org.apache.iceberg.io.CloseableIterable;
 import 
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.types.Comparators;
+import org.apache.iceberg.types.TypeUtil;
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.util.ParallelIterable;
 import org.apache.iceberg.util.PartitionUtil;
@@ -37,6 +39,58 @@ import org.apache.iceberg.util.StructProjection;
 /** A {@link Table} implementation that exposes a table's partitions as rows. 
*/
 public class PartitionsTable extends BaseMetadataTable {
 
+  private static final int PARTITION_FIELD_ID = 1;
+
+  private static final Types.NestedField SPEC_ID =
+      Types.NestedField.required(4, "spec_id", Types.IntegerType.get());
+  private static final Types.NestedField RECORD_COUNT =
+      Types.NestedField.required(
+          2, "record_count", Types.LongType.get(), "Count of records in data 
files");
+  private static final Types.NestedField FILE_COUNT =
+      Types.NestedField.required(3, "file_count", Types.IntegerType.get(), 
"Count of data files");
+  private static final Types.NestedField TOTAL_DATA_FILE_SIZE_IN_BYTES =
+      Types.NestedField.required(
+          11,
+          "total_data_file_size_in_bytes",
+          Types.LongType.get(),
+          "Total size in bytes of data files");
+  private static final Types.NestedField POSITION_DELETE_RECORD_COUNT =
+      Types.NestedField.required(
+          5,
+          "position_delete_record_count",
+          Types.LongType.get(),
+          "Count of records in position delete files");
+  private static final Types.NestedField POSITION_DELETE_FILE_COUNT =
+      Types.NestedField.required(
+          6,
+          "position_delete_file_count",
+          Types.IntegerType.get(),
+          "Count of position delete files");
+  private static final Types.NestedField EQUALITY_DELETE_RECORD_COUNT =
+      Types.NestedField.required(
+          7,
+          "equality_delete_record_count",
+          Types.LongType.get(),
+          "Count of records in equality delete files");
+  private static final Types.NestedField EQUALITY_DELETE_FILE_COUNT =
+      Types.NestedField.required(
+          8,
+          "equality_delete_file_count",
+          Types.IntegerType.get(),
+          "Count of equality delete files");
+  private static final Types.NestedField LAST_UPDATED_AT =
+      Types.NestedField.optional(
+          9,
+          "last_updated_at",
+          Types.TimestampType.withZone(),
+          "Commit time of snapshot that last updated this partition");
+  private static final Types.NestedField LAST_UPDATED_SNAPSHOT_ID =
+      Types.NestedField.optional(
+          10,
+          "last_updated_snapshot_id",
+          Types.LongType.get(),
+          "Id of snapshot that last updated this partition");
+
   private final Schema schema;
 
   private final boolean unpartitionedTable;
@@ -50,47 +104,18 @@ public class PartitionsTable extends BaseMetadataTable {
 
     this.schema =
         new Schema(
-            Types.NestedField.required(1, "partition", 
Partitioning.partitionType(table)),
-            Types.NestedField.required(4, "spec_id", Types.IntegerType.get()),
-            Types.NestedField.required(
-                2, "record_count", Types.LongType.get(), "Count of records in 
data files"),
-            Types.NestedField.required(
-                3, "file_count", Types.IntegerType.get(), "Count of data 
files"),
-            Types.NestedField.required(
-                11,
-                "total_data_file_size_in_bytes",
-                Types.LongType.get(),
-                "Total size in bytes of data files"),
             Types.NestedField.required(
-                5,
-                "position_delete_record_count",
-                Types.LongType.get(),
-                "Count of records in position delete files"),
-            Types.NestedField.required(
-                6,
-                "position_delete_file_count",
-                Types.IntegerType.get(),
-                "Count of position delete files"),
-            Types.NestedField.required(
-                7,
-                "equality_delete_record_count",
-                Types.LongType.get(),
-                "Count of records in equality delete files"),
-            Types.NestedField.required(
-                8,
-                "equality_delete_file_count",
-                Types.IntegerType.get(),
-                "Count of equality delete files"),
-            Types.NestedField.optional(
-                9,
-                "last_updated_at",
-                Types.TimestampType.withZone(),
-                "Commit time of snapshot that last updated this partition"),
-            Types.NestedField.optional(
-                10,
-                "last_updated_snapshot_id",
-                Types.LongType.get(),
-                "Id of snapshot that last updated this partition"));
+                PARTITION_FIELD_ID, "partition", 
Partitioning.partitionType(table)),
+            SPEC_ID,
+            RECORD_COUNT,
+            FILE_COUNT,
+            TOTAL_DATA_FILE_SIZE_IN_BYTES,
+            POSITION_DELETE_RECORD_COUNT,
+            POSITION_DELETE_FILE_COUNT,
+            EQUALITY_DELETE_RECORD_COUNT,
+            EQUALITY_DELETE_FILE_COUNT,
+            LAST_UPDATED_AT,
+            LAST_UPDATED_SNAPSHOT_ID);
     this.unpartitionedTable = 
Partitioning.partitionType(table).fields().isEmpty();
   }
 
@@ -102,16 +127,18 @@ public class PartitionsTable extends BaseMetadataTable {
   @Override
   public Schema schema() {
     if (unpartitionedTable) {
-      return schema.select(
-          "record_count",
-          "file_count",
-          "total_data_file_size_in_bytes",
-          "position_delete_record_count",
-          "position_delete_file_count",
-          "equality_delete_record_count",
-          "equality_delete_file_count",
-          "last_updated_at",
-          "last_updated_snapshot_id");
+      return TypeUtil.select(
+          schema,
+          ImmutableSet.of(
+              RECORD_COUNT.fieldId(),
+              FILE_COUNT.fieldId(),
+              TOTAL_DATA_FILE_SIZE_IN_BYTES.fieldId(),
+              POSITION_DELETE_RECORD_COUNT.fieldId(),
+              POSITION_DELETE_FILE_COUNT.fieldId(),
+              EQUALITY_DELETE_RECORD_COUNT.fieldId(),
+              EQUALITY_DELETE_FILE_COUNT.fieldId(),
+              LAST_UPDATED_AT.fieldId(),
+              LAST_UPDATED_SNAPSHOT_ID.fieldId()));
     }
     return schema;
   }

Reply via email to