This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new 068a565c84 Allows limiting per tablet information retrieved (#5700) 068a565c84 is described below commit 068a565c8406b1195c8ea293b1044d28cc9d67e9 Author: Keith Turner <ktur...@apache.org> AuthorDate: Fri Aug 29 12:25:55 2025 -0400 Allows limiting per tablet information retrieved (#5700) When calling TableOperations.getTabletInformation() it currently retrieves a lot of tablet metadata. If this information is not of interest then a lot of time could be spent gathering information that is never used (especially for the per tablet file data). Modified TableOperations.getTabletInformation() to allow specifying a subset of tablet metadata to fetch. --- .../core/client/admin/TableOperations.java | 6 +- .../core/client/admin/TabletInformation.java | 38 +++++++ .../core/clientImpl/TableOperationsImpl.java | 30 ++++- .../core/clientImpl/TabletInformationImpl.java | 43 +++++--- .../shell/commands/ListTabletsCommandTest.java | 6 +- .../apache/accumulo/test/TableOperationsIT.java | 122 +++++++++++++++++++++ 6 files changed, 218 insertions(+), 27 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java b/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java index ed169e9152..8639e30d55 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java +++ b/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java @@ -1057,12 +1057,14 @@ public interface TableOperations { } /** + * @param fields can optionally narrow the data retrieved per tablet, which can speed up streaming + * over tablets. If this list is empty then all fields are fetched. * @return a stream of tablet information for tablets that fall in the specified range. The stream * may be backed by a scanner, so it's best to close the stream. * @since 4.0.0 */ - default Stream<TabletInformation> getTabletInformation(final String tableName, final Range range) - throws TableNotFoundException { + default Stream<TabletInformation> getTabletInformation(final String tableName, final Range range, + TabletInformation.Field... fields) throws TableNotFoundException { throw new UnsupportedOperationException(); } diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/TabletInformation.java b/core/src/main/java/org/apache/accumulo/core/client/admin/TabletInformation.java index 2c31c22255..adf0427231 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/admin/TabletInformation.java +++ b/core/src/main/java/org/apache/accumulo/core/client/admin/TabletInformation.java @@ -20,6 +20,7 @@ package org.apache.accumulo.core.client.admin; import java.util.Optional; +import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.TabletId; /** @@ -27,38 +28,66 @@ import org.apache.accumulo.core.data.TabletId; */ public interface TabletInformation { + /** + * Used to limit what information is obtained per tablet when calling + * {@link TableOperations#getTabletInformation(String, Range, Field...)} + * + * @since 4.0.0 + */ + enum Field { + LOCATION, FILES, AVAILABILITY, MERGEABILITY + } + /** * @return the TabletId for this tablet. */ TabletId getTabletId(); /** + * Requires {@link Field#FILES} to be specified at acquisition otherwise an exception will be + * thrown. + * * @return the number of files in the tablet directory. */ int getNumFiles(); /** + * Requires {@link Field#FILES} to be specified at acquisition otherwise an exception will be + * thrown. + * * @return the number of write-ahead logs associated with the tablet. */ int getNumWalLogs(); /** + * Requires {@link Field#FILES} to be specified at acquisition otherwise an exception will be + * thrown. + * * @return an estimated number of entries in the tablet. */ long getEstimatedEntries(); /** + * Requires {@link Field#FILES} to be specified at acquisition otherwise an exception will be + * thrown. + * * @return an estimated size of the tablet data on disk, which is likely the compressed size of * the data. */ long getEstimatedSize(); /** + * Requires {@link Field#LOCATION} to be specified at acquisition otherwise an exception will be + * thrown. + * * @return the tablet hosting state. */ String getTabletState(); /** + * Requires {@link Field#LOCATION} to be specified at acquisition otherwise an exception will be + * thrown. + * * @return the Location of the tablet as a String or empty if the location in the TabletMetadata * does not exist. When not empty, the String will be of the form * "{@code <location type>:<host>:<port>}", where the location type is one of @@ -67,16 +96,25 @@ public interface TabletInformation { Optional<String> getLocation(); /** + * Requires {@link Field#FILES} to be specified at acquisition otherwise an exception will be + * thrown. + * * @return the directory name of the tablet. */ String getTabletDir(); /** + * Requires {@link Field#AVAILABILITY} to be specified at acquisition otherwise an exception will + * be thrown. + * * @return the TabletAvailability object. */ TabletAvailability getTabletAvailability(); /** + * Requires {@link Field#MERGEABILITY} to be specified at acquisition otherwise an exception will + * be thrown. + * * @return the TabletMergeabilityInfo object * * @since 4.0.0 diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java index 356fb6d129..95b81a86cf 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java @@ -2254,17 +2254,35 @@ public class TableOperationsImpl extends TableOperationsHelper { } @Override - public Stream<TabletInformation> getTabletInformation(final String tableName, final Range range) - throws TableNotFoundException { + public Stream<TabletInformation> getTabletInformation(final String tableName, final Range range, + TabletInformation.Field... fields) throws TableNotFoundException { EXISTING_TABLE_NAME.validate(tableName); final Text scanRangeStart = (range.getStartKey() == null) ? null : range.getStartKey().getRow(); TableId tableId = context.getTableId(tableName); + List<TabletMetadata.ColumnType> columns = new ArrayList<>(); + EnumSet<TabletInformation.Field> fieldSet = + fields.length == 0 ? EnumSet.allOf(TabletInformation.Field.class) + : EnumSet.noneOf(TabletInformation.Field.class); + Collections.addAll(fieldSet, fields); + if (fieldSet.contains(TabletInformation.Field.FILES)) { + Collections.addAll(columns, DIR, FILES, LOGS); + } + if (fieldSet.contains(TabletInformation.Field.LOCATION)) { + Collections.addAll(columns, LOCATION, LAST, SUSPEND); + } + if (fieldSet.contains(TabletInformation.Field.AVAILABILITY)) { + Collections.addAll(columns, AVAILABILITY); + } + if (fieldSet.contains(TabletInformation.Field.MERGEABILITY)) { + Collections.addAll(columns, MERGEABILITY); + } + columns.add(PREV_ROW); + TabletsMetadata tabletsMetadata = context.getAmple().readTablets().forTable(tableId).overlapping(scanRangeStart, true, null) - .fetch(AVAILABILITY, LOCATION, DIR, PREV_ROW, FILES, LAST, LOGS, SUSPEND, MERGEABILITY) - .checkConsistency().build(); + .fetch(columns.toArray(new TabletMetadata.ColumnType[0])).checkConsistency().build(); Set<TServerInstance> liveTserverSet = TabletMetadata.getLiveTServers(context); @@ -2285,8 +2303,8 @@ public class TableOperationsImpl extends TableOperationsHelper { } }).takeWhile(tm -> tm.getPrevEndRow() == null || !range.afterEndKey(new Key(tm.getPrevEndRow()).followingKey(PartialKey.ROW))) - .map(tm -> new TabletInformationImpl(tm, TabletState.compute(tm, liveTserverSet).toString(), - currentTime)); + .map(tm -> new TabletInformationImpl(tm, + () -> TabletState.compute(tm, liveTserverSet).toString(), currentTime)); } } diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletInformationImpl.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletInformationImpl.java index b72c8a0a78..10c86c172c 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletInformationImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletInformationImpl.java @@ -32,23 +32,36 @@ import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metadata.schema.TabletMetadata.Location; +import com.google.common.base.Suppliers; + public class TabletInformationImpl implements TabletInformation { private final TabletMetadata tabletMetadata; - private long estimatedSize; - private long estimatedEntries; - private final String tabletState; + + private final Supplier<String> tabletState; private final Supplier<Duration> currentTime; + private final Supplier<FileInfo> fileInfo; + + private class FileInfo { + private final long estimatedSize; + private final long estimatedEntries; + + FileInfo() { + long estimatedEntries = 0L; + long estimatedSize = 0L; + for (DataFileValue dfv : tabletMetadata.getFilesMap().values()) { + estimatedEntries += dfv.getNumEntries(); + estimatedSize += dfv.getSize(); + } + this.estimatedEntries = estimatedEntries; + this.estimatedSize = estimatedSize; + } + } - public TabletInformationImpl(TabletMetadata tabletMetadata, String tabletState, + public TabletInformationImpl(TabletMetadata tabletMetadata, Supplier<String> tabletState, Supplier<Duration> currentTime) { this.tabletMetadata = tabletMetadata; - estimatedEntries = 0L; - estimatedSize = 0L; - for (DataFileValue dfv : tabletMetadata.getFilesMap().values()) { - estimatedEntries += dfv.getNumEntries(); - estimatedSize += dfv.getSize(); - } + this.fileInfo = Suppliers.memoize(FileInfo::new); this.tabletState = tabletState; this.currentTime = Objects.requireNonNull(currentTime); } @@ -70,17 +83,17 @@ public class TabletInformationImpl implements TabletInformation { @Override public long getEstimatedEntries() { - return this.estimatedEntries; + return this.fileInfo.get().estimatedEntries; } @Override public long getEstimatedSize() { - return estimatedSize; + return fileInfo.get().estimatedSize; } @Override public String getTabletState() { - return tabletState; + return tabletState.get(); } @Override @@ -107,8 +120,6 @@ public class TabletInformationImpl implements TabletInformation { @Override public String toString() { - return "TabletInformationImpl{tabletMetadata=" + tabletMetadata + ", estimatedSize=" - + estimatedSize + ", estimatedEntries=" + estimatedEntries + ", tabletState='" + tabletState - + '\'' + '}'; + return "TabletInformationImpl{tabletMetadata=" + tabletMetadata + '}'; } } diff --git a/shell/src/test/java/org/apache/accumulo/shell/commands/ListTabletsCommandTest.java b/shell/src/test/java/org/apache/accumulo/shell/commands/ListTabletsCommandTest.java index a433b0f937..b7bd5f1f66 100644 --- a/shell/src/test/java/org/apache/accumulo/shell/commands/ListTabletsCommandTest.java +++ b/shell/src/test/java/org/apache/accumulo/shell/commands/ListTabletsCommandTest.java @@ -174,9 +174,9 @@ public class ListTabletsCommandTest { TabletInformationImpl[] tabletInformation = new TabletInformationImpl[3]; Supplier<Duration> currentTime = () -> Duration.ofHours(1); - tabletInformation[0] = new TabletInformationImpl(tm1, "HOSTED", currentTime); - tabletInformation[1] = new TabletInformationImpl(tm2, "HOSTED", currentTime); - tabletInformation[2] = new TabletInformationImpl(tm3, "UNASSIGNED", currentTime); + tabletInformation[0] = new TabletInformationImpl(tm1, () -> "HOSTED", currentTime); + tabletInformation[1] = new TabletInformationImpl(tm2, () -> "HOSTED", currentTime); + tabletInformation[2] = new TabletInformationImpl(tm3, () -> "UNASSIGNED", currentTime); AccumuloClient client = EasyMock.createMock(AccumuloClient.class); ClientContext context = EasyMock.createMock(ClientContext.class); diff --git a/test/src/main/java/org/apache/accumulo/test/TableOperationsIT.java b/test/src/main/java/org/apache/accumulo/test/TableOperationsIT.java index 3e91e67b3e..e583659283 100644 --- a/test/src/main/java/org/apache/accumulo/test/TableOperationsIT.java +++ b/test/src/main/java/org/apache/accumulo/test/TableOperationsIT.java @@ -21,6 +21,7 @@ package org.apache.accumulo.test; import static java.util.concurrent.TimeUnit.SECONDS; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -917,4 +918,125 @@ public class TableOperationsIT extends AccumuloClusterHarness { assertEquals(1000, hash.size()); } + @Test + public void testGetTabletInformation() throws Exception { + String tableName = getUniqueNames(1)[0]; + + try { + SortedSet<Text> splits = new TreeSet<>(); + for (int i = 1; i < 9; i++) { + splits.add(new Text(i + "")); + } + accumuloClient.tableOperations().create(tableName, + new NewTableConfiguration().withSplits(splits)); + try (var writer = accumuloClient.createBatchWriter(tableName)) { + for (int i = 1; i <= 9; i++) { + var m = new Mutation("" + i); + m.at().family("f").qualifier("q").put("" + i); + writer.addMutation(m); + } + } + + accumuloClient.tableOperations().flush(tableName, null, null, true); + + var tableId = TableId.of(accumuloClient.tableOperations().tableIdMap().get(tableName)); + + try (var tablets = accumuloClient.tableOperations().getTabletInformation(tableName, + new Range(), TabletInformation.Field.LOCATION)) { + var tabletList = tablets.collect(Collectors.toList()); + assertEquals(9, tabletList.size()); + tabletList.forEach(ti -> { + assertNotNull(ti.getLocation()); + assertEquals(tableId, ti.getTabletId().getTable()); + assertEquals("HOSTED", ti.getTabletState()); + assertThrows(IllegalStateException.class, ti::getNumFiles); + assertThrows(IllegalStateException.class, ti::getEstimatedEntries); + assertThrows(IllegalStateException.class, ti::getEstimatedSize); + assertThrows(IllegalStateException.class, ti::getNumWalLogs); + assertThrows(IllegalStateException.class, ti::getTabletDir); + assertThrows(IllegalStateException.class, ti::getTabletMergeabilityInfo); + assertThrows(IllegalStateException.class, ti::getTabletAvailability); + }); + } + + try (var tablets = accumuloClient.tableOperations().getTabletInformation(tableName, + new Range(), TabletInformation.Field.FILES)) { + var tabletList = tablets.collect(Collectors.toList()); + assertEquals(9, tabletList.size()); + tabletList.forEach(ti -> { + assertEquals(tableId, ti.getTabletId().getTable()); + assertThrows(IllegalStateException.class, ti::getLocation); + assertThrows(IllegalStateException.class, ti::getTabletState); + assertEquals(1, ti.getNumFiles()); + assertEquals(1, ti.getEstimatedEntries()); + assertTrue(ti.getEstimatedSize() > 0); + assertEquals(0, ti.getNumWalLogs()); + assertNotNull(ti.getTabletDir()); + assertThrows(IllegalStateException.class, ti::getTabletMergeabilityInfo); + assertThrows(IllegalStateException.class, ti::getTabletAvailability); + }); + } + + try (var tablets = accumuloClient.tableOperations().getTabletInformation(tableName, + new Range(), TabletInformation.Field.FILES, TabletInformation.Field.LOCATION)) { + var tabletList = tablets.collect(Collectors.toList()); + assertEquals(9, tabletList.size()); + tabletList.forEach(ti -> { + assertEquals(tableId, ti.getTabletId().getTable()); + assertEquals(tableId, ti.getTabletId().getTable()); + assertEquals("HOSTED", ti.getTabletState()); + assertEquals(1, ti.getNumFiles()); + assertEquals(1, ti.getEstimatedEntries()); + assertTrue(ti.getEstimatedSize() > 0); + assertEquals(0, ti.getNumWalLogs()); + assertNotNull(ti.getTabletDir()); + assertThrows(IllegalStateException.class, ti::getTabletMergeabilityInfo); + assertThrows(IllegalStateException.class, ti::getTabletAvailability); + }); + } + + try (var tablets = accumuloClient.tableOperations().getTabletInformation(tableName, + new Range(), TabletInformation.Field.AVAILABILITY)) { + var tabletList = tablets.collect(Collectors.toList()); + assertEquals(9, tabletList.size()); + tabletList.forEach(ti -> { + assertEquals(tableId, ti.getTabletId().getTable()); + assertThrows(IllegalStateException.class, ti::getLocation); + assertThrows(IllegalStateException.class, ti::getTabletState); + assertThrows(IllegalStateException.class, ti::getNumFiles); + assertThrows(IllegalStateException.class, ti::getEstimatedEntries); + assertThrows(IllegalStateException.class, ti::getEstimatedSize); + assertThrows(IllegalStateException.class, ti::getNumWalLogs); + assertThrows(IllegalStateException.class, ti::getTabletDir); + assertThrows(IllegalStateException.class, ti::getTabletMergeabilityInfo); + assertEquals(TabletAvailability.ONDEMAND, ti.getTabletAvailability()); + }); + } + + try (var tablets = accumuloClient.tableOperations().getTabletInformation(tableName, + new Range(), TabletInformation.Field.MERGEABILITY)) { + var tabletList = tablets.collect(Collectors.toList()); + assertEquals(9, tabletList.size()); + tabletList.forEach(ti -> { + assertEquals(tableId, ti.getTabletId().getTable()); + assertThrows(IllegalStateException.class, ti::getLocation); + assertThrows(IllegalStateException.class, ti::getTabletState); + assertThrows(IllegalStateException.class, ti::getNumFiles); + assertThrows(IllegalStateException.class, ti::getEstimatedEntries); + assertThrows(IllegalStateException.class, ti::getEstimatedSize); + assertThrows(IllegalStateException.class, ti::getNumWalLogs); + assertThrows(IllegalStateException.class, ti::getTabletDir); + if (ti.getTabletId().getEndRow() == null) { + assertFalse(ti.getTabletMergeabilityInfo().getTabletMergeability().isNever()); + } else { + assertTrue(ti.getTabletMergeabilityInfo().getTabletMergeability().isNever()); + } + assertThrows(IllegalStateException.class, ti::getTabletAvailability); + }); + } + + } finally { + accumuloClient.tableOperations().delete(tableName); + } + } }