nastra commented on code in PR #14166: URL: https://github.com/apache/iceberg/pull/14166#discussion_r2374897201
########## core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java: ########## @@ -887,6 +887,103 @@ public void testTableCredential(String oauth2ServerUri) { oauth2ServerUri); } + @Test + public void testTableRefreshLoadsRefsOnly() { + RESTCatalogAdapter adapter = Mockito.spy(new RESTCatalogAdapter(backendCatalog)); + + RESTCatalog catalog = + new RESTCatalog(SessionCatalog.SessionContext.createEmpty(), (config) -> adapter); + catalog.initialize( + "test", + ImmutableMap.of( + CatalogProperties.URI, + "ignored", + CatalogProperties.FILE_IO_IMPL, + "org.apache.iceberg.inmemory.InMemoryFileIO", + // default loading to refs only + RESTCatalogProperties.SNAPSHOT_LOADING_MODE, + SnapshotMode.REFS.name())); + + if (requiresNamespaceCreate()) { + catalog.createNamespace(TABLE.namespace()); + } + + // Create a table with multiple snapshots + Table table = catalog.createTable(TABLE, SCHEMA); + table + .newFastAppend() + .appendFile( + DataFiles.builder(PartitionSpec.unpartitioned()) + .withPath("/path/to/data-a.parquet") + .withFileSizeInBytes(10) + .withRecordCount(2) + .build()) + .commit(); + + table + .newFastAppend() + .appendFile( + DataFiles.builder(PartitionSpec.unpartitioned()) + .withPath("/path/to/data-b.parquet") + .withFileSizeInBytes(10) + .withRecordCount(2) + .build()) + .commit(); + + ResourcePaths paths = ResourcePaths.forCatalogProperties(Maps.newHashMap()); + + // Respond with only referenced snapshots + Answer<?> refsAnswer = + invocation -> { + LoadTableResponse originalResponse = (LoadTableResponse) invocation.callRealMethod(); + TableMetadata refsMetadata = + TableMetadata.buildFrom(originalResponse.tableMetadata()) + .suppressHistoricalSnapshots() + .build(); + + // don't call snapshots() directly as that would cause to load all snapshots. Instead, + // make sure the snapshots field holds exactly 1 snapshot + assertThat(refsMetadata) + .extracting("snapshots") + .asInstanceOf(InstanceOfAssertFactories.list(Snapshot.class)) + .hasSize(1); + + return LoadTableResponse.builder() + .withTableMetadata(refsMetadata) + .addAllConfig(originalResponse.config()) + .build(); + }; + + Mockito.doAnswer(refsAnswer) + .when(adapter) + .execute( + reqMatcher(HTTPMethod.GET, paths.table(TABLE), Map.of(), Map.of("snapshots", "refs")), + eq(LoadTableResponse.class), + any(), + any()); + + Table refsTable = catalog.loadTable(TABLE); + refsTable.refresh(); Review Comment: > or in other works in case current.snapshotsLoaded is false I actually didn't mean to refer to this flag and we don't want to expose this, since this is internal information. What I meant is that it's not helpful to have ref loading in a place when the calling code potentially triggers an implicit call of `snapshots()`, which would do another request to load the remaining snapshots. That also means if there are cases where `snapshots()` isn't triggered and the table has lots and lots of snapshots, then this would be a good use case to think about adding ref loading mode there. I don't want us to add this feature to all places just for the sake of adding it but rather we want to add it where it's actually helpful and makes a difference in terms of loading times of a table -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org