This is an automated email from the ASF dual-hosted git repository.

dlmarion pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 53e47c13bd Send error back from TabletManagementIterator on invalid 
metadata (#5668)
53e47c13bd is described below

commit 53e47c13bd9ec74bc6d73332838d6a88821231ca
Author: Dave Marion <[email protected]>
AuthorDate: Wed Jul 23 09:29:18 2025 -0400

    Send error back from TabletManagementIterator on invalid metadata (#5668)
    
    Modified the TabletManagementIterator to invoke getPrevEndRow()
    on the constructed TabletMetadata object to catch the raised
    IllegalStateException and return it to the Manager. The existing
    try/catch around computeTabletManagementActions would likely
    throw the IllegalStateException when tm.getExtent is called
    when trying to log an error, preventing the error from being
    returned to the Manager.
    
    There is an existing test case, TabletMetadataTest.testAbsentPrevRow,
    which demonstrates that an IllegalStateException is raised when
    TabletMetadata.convertRow is called on a set of keys that don't
    contain a prevEndRow.
    
    Closes #5658
---
 .../core/manager/state/TabletManagement.java       |   7 +-
 .../manager/state/TabletManagementIterator.java    |  48 ++++--
 .../manager/state/TabletManagementScanner.java     |  13 +-
 .../server/manager/state/TabletManagementTest.java |   6 +-
 .../accumulo/manager/TabletGroupWatcher.java       |   9 +-
 .../functional/TabletManagementIteratorIT.java     | 164 ++++++++++++++++-----
 6 files changed, 182 insertions(+), 65 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/manager/state/TabletManagement.java
 
b/core/src/main/java/org/apache/accumulo/core/manager/state/TabletManagement.java
index 7b13e76b7f..b240f21095 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/manager/state/TabletManagement.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/manager/state/TabletManagement.java
@@ -67,9 +67,9 @@ public class TabletManagement {
   }
 
   public static void addError(final BiConsumer<Key,Value> bic, final Text row,
-      final Exception error) {
+      final String errorMessage) {
     final Key errorKey = new Key(row, ERROR_COLUMN_NAME, EMPTY);
-    final Value errorValue = new Value(error.getMessage());
+    final Value errorValue = new Value(errorMessage);
     bic.accept(errorKey, errorValue);
   }
 
@@ -94,6 +94,9 @@ public class TabletManagement {
     Value errorValue = decodedRow.remove(new Key(row, ERROR_COLUMN_NAME, 
EMPTY));
     if (errorValue != null) {
       this.errorMessage = errorValue.toString();
+      this.actions = null;
+      this.tabletMetadata = null;
+      return;
     } else {
       this.errorMessage = null;
     }
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java
 
b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java
index e61c36d181..4a0fdd0ef2 100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java
@@ -42,6 +42,7 @@ import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.TableId;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.dataImpl.KeyExtent;
 import org.apache.accumulo.core.iterators.IteratorEnvironment;
 import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.iterators.user.WholeRowIterator;
@@ -235,26 +236,45 @@ public class TabletManagementIterator extends 
WholeRowIterator {
     };
 
     final Set<ManagementAction> actions = new HashSet<>();
-    final TabletMetadata tm =
-        TabletMetadata.convertRow(kvIter, TabletManagement.CONFIGURED_COLUMNS, 
false, true);
-
-    Exception error = null;
+    String errorMsg = null;
+    TabletMetadata tm = null;
+    KeyExtent extent = null;
+    final String row = keys.get(0) == null ? "no key" : 
keys.get(0).getRow().toString();
     try {
-      LOG.trace("Evaluating extent: {}", tm);
-      computeTabletManagementActions(tm, actions);
-    } catch (Exception e) {
-      LOG.error("Error computing tablet management actions for extent: {}", 
tm.getExtent(), e);
-      error = e;
+      tm = TabletMetadata.convertRow(kvIter, 
TabletManagement.CONFIGURED_COLUMNS, false, true);
+    } catch (RuntimeException e) {
+      LOG.error("Failed to convert tablet metadata at row: {}", row, e);
+      errorMsg = "Failed to convert tablet metadata at row:" + row + ", error: 
" + e.getMessage();
+    }
+    if (errorMsg == null) {
+      try {
+        // Validate that a minimum set of keys were seen to create a valid 
tablet
+        extent = tm.getExtent();
+      } catch (IllegalStateException e) {
+        LOG.error("Irregular tablet metadata encountered: {}", tm, e);
+        errorMsg =
+            "Irregular tablet metadata encountered at row: " + row + ", error: 
" + e.getMessage();
+      }
+    }
+    if (errorMsg == null) {
+      try {
+        LOG.trace("Evaluating extent: {}", tm);
+        computeTabletManagementActions(tm, actions);
+      } catch (Exception e) {
+        LOG.error("Error computing tablet management actions for extent: {}", 
extent, e);
+        errorMsg = "Error computing tablet management actions for extent: " + 
extent.toString()
+            + ", error: " + e.getMessage();
+      }
     }
 
-    if (!actions.isEmpty() || error != null) {
-      if (error != null) {
+    if (!actions.isEmpty() || errorMsg != null) {
+      if (errorMsg != null) {
         // Insert the error into K,V pair representing
         // the tablet metadata.
         TabletManagement.addError((k, v) -> {
           keys.add(k);
           values.add(v);
-        }, currentRow, error);
+        }, currentRow, errorMsg);
       } else if (!actions.isEmpty()) {
         // If we simply returned here, then the client would get the encoded 
K,V
         // from the WholeRowIterator. However, it would not know the reason(s) 
why
@@ -269,11 +289,11 @@ public class TabletManagementIterator extends 
WholeRowIterator {
       // This key is being created exactly the same way as the whole row 
iterator creates keys.
       // This is important for ensuring that seek works as expected in the 
continue case. See
       // WholeRowIterator seek function for details, it looks for keys w/o 
columns.
-      LOG.trace("Returning extent {} with reasons: {}", tm.getExtent(), 
actions);
+      LOG.trace("Returning extent {} with reasons: {}, error: {}", extent, 
actions, errorMsg);
       return true;
     }
 
-    LOG.trace("No reason to return extent {}, continuing", tm.getExtent());
+    LOG.trace("No reason to return extent {}, continuing", extent);
     return false;
   }
 
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementScanner.java
 
b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementScanner.java
index 6eaec59931..7be790e269 100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementScanner.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementScanner.java
@@ -127,10 +127,15 @@ public class TabletManagementScanner implements 
ClosableIterator<TabletManagemen
     Entry<Key,Value> e = iter.next();
     try {
       TabletManagement tm = TabletManagementIterator.decode(e);
-      log.trace(
-          "Returning metadata tablet, extent: {}, tabletAvailability: {}, 
actions: {}, error: {}",
-          tm.getTabletMetadata().getExtent(), 
tm.getTabletMetadata().getTabletAvailability(),
-          tm.getActions(), tm.getErrorMessage());
+      if (log.isTraceEnabled()) {
+        if (tm.getErrorMessage() == null) {
+          log.trace("Returning metadata tablet, extent: {}, 
tabletAvailability: {}, actions: {}",
+              tm.getTabletMetadata().getExtent(), 
tm.getTabletMetadata().getTabletAvailability(),
+              tm.getActions());
+        } else {
+          log.trace("Returning metadata tablet error: {}", 
tm.getErrorMessage());
+        }
+      }
       return tm;
     } catch (IOException e1) {
       throw new RuntimeException("Error creating TabletMetadata object", e1);
diff --git 
a/server/base/src/test/java/org/apache/accumulo/server/manager/state/TabletManagementTest.java
 
b/server/base/src/test/java/org/apache/accumulo/server/manager/state/TabletManagementTest.java
index 8347353f60..0bab62dfbc 100644
--- 
a/server/base/src/test/java/org/apache/accumulo/server/manager/state/TabletManagementTest.java
+++ 
b/server/base/src/test/java/org/apache/accumulo/server/manager/state/TabletManagementTest.java
@@ -22,6 +22,7 @@ import static 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSec
 import static 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.FLUSH_COLUMN;
 import static 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.TIME_COLUMN;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.util.ArrayList;
@@ -155,7 +156,7 @@ public class TabletManagementTest {
     final SortedMap<Key,Value> entries = createMetadataEntryKV(extent);
 
     TabletManagement.addError(entries::put, entries.firstKey().getRow(),
-        new UnsupportedOperationException("Not supported."));
+        new UnsupportedOperationException("Not supported.").getMessage());
     Key key = entries.firstKey();
     Value val = WholeRowIterator.encodeRow(new ArrayList<>(entries.keySet()),
         new ArrayList<>(entries.values()));
@@ -166,8 +167,7 @@ public class TabletManagementTest {
 
     TabletManagement tmi = new TabletManagement(key, val, true);
     TabletMetadata tabletMetadata = tmi.getTabletMetadata();
-    assertEquals(entries, tabletMetadata.getKeyValues().stream().collect(
-        Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (a, b) -> b, 
TreeMap::new)));
+    assertNull(tabletMetadata);
     assertEquals("Not supported.", tmi.getErrorMessage());
   }
 
diff --git 
a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
 
b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
index 78abd84fed..0658262f56 100644
--- 
a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
+++ 
b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
@@ -484,20 +484,17 @@ abstract class TabletGroupWatcher extends 
AccumuloDaemonThread {
         throw new IllegalStateException("State store returned a null 
ManagerTabletInfo object");
       }
 
-      final TabletMetadata tm = mti.getTabletMetadata();
-
       final String mtiError = mti.getErrorMessage();
       if (mtiError != null) {
-        // An error happened on the TabletServer in the 
TabletManagementIterator
-        // when trying to process this extent.
         LOG.warn(
-            "Error on TabletServer trying to get Tablet management information 
for extent: {}. Error message: {}",
-            tm.getExtent(), mtiError);
+            "Error on TabletServer trying to get Tablet management information 
for metadata tablet. Error message: {}",
+            mtiError);
         this.metrics.incrementTabletGroupWatcherError(this.store.getLevel());
         tableMgmtStats.tabletsWithErrors++;
         continue;
       }
 
+      final TabletMetadata tm = mti.getTabletMetadata();
       final TableId tableId = tm.getTableId();
       // ignore entries for tables that do not exist in zookeeper
       if (manager.getTableManager().getTableState(tableId) == 
TableState.UNKNOWN) {
diff --git 
a/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java
 
b/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java
index ffb8a5db5b..666c0e5149 100644
--- 
a/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java
+++ 
b/test/src/main/java/org/apache/accumulo/test/functional/TabletManagementIteratorIT.java
@@ -25,6 +25,7 @@ import static 
org.apache.accumulo.core.manager.state.TabletManagement.Management
 import static 
org.apache.accumulo.core.manager.state.TabletManagement.ManagementAction.NEEDS_VOLUME_REPLACEMENT;
 import static org.apache.accumulo.harness.AccumuloITBase.MINI_CLUSTER_ONLY;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.IOException;
@@ -79,6 +80,7 @@ import org.apache.accumulo.core.metadata.StoredTabletFile;
 import org.apache.accumulo.core.metadata.SystemTables;
 import org.apache.accumulo.core.metadata.TServerInstance;
 import org.apache.accumulo.core.metadata.schema.Ample;
+import org.apache.accumulo.core.metadata.schema.Ample.TabletMutator;
 import org.apache.accumulo.core.metadata.schema.DataFileValue;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
@@ -87,6 +89,7 @@ import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.Da
 import 
org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType;
+import org.apache.accumulo.core.metadata.schema.TabletMetadata.Location;
 import org.apache.accumulo.core.metadata.schema.TabletOperationId;
 import org.apache.accumulo.core.metadata.schema.TabletOperationType;
 import org.apache.accumulo.core.metadata.schema.TabletsMetadata;
@@ -99,6 +102,7 @@ import 
org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
 import org.apache.accumulo.server.manager.LiveTServerSet;
 import org.apache.accumulo.server.manager.state.TabletManagementIterator;
 import org.apache.accumulo.server.manager.state.TabletManagementParameters;
+import org.apache.accumulo.server.metadata.TabletsMutatorImpl;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
@@ -172,6 +176,7 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
       copyTable(client, SystemTables.METADATA.tableName(), metaCopy1);
 
       var tableId1 = getServerContext().getTableId(t1);
+      var tableId2 = getServerContext().getTableId(t2);
       var tableId3 = getServerContext().getTableId(t3);
       var tableId4 = getServerContext().getTableId(t4);
 
@@ -186,15 +191,16 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
 
       TabletManagementParameters tabletMgmtParams = createParameters(client);
       Map<KeyExtent,Set<TabletManagement.ManagementAction>> tabletsInFlux =
-          findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams);
+          findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams, 
false);
       while (!tabletsInFlux.isEmpty()) {
         log.debug("Waiting for {} tablets for {}", tabletsInFlux, metaCopy1);
         UtilWaitThread.sleep(500);
         copyTable(client, SystemTables.METADATA.tableName(), metaCopy1);
-        tabletsInFlux = findTabletsNeedingAttention(client, metaCopy1, 
tabletMgmtParams);
+        tabletsInFlux = findTabletsNeedingAttention(client, metaCopy1, 
tabletMgmtParams, false);
       }
       expected = Map.of();
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy1, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams, 
false),
           "No tables should need attention");
 
       // The metadata table stabilized and metaCopy1 contains a copy suitable 
for testing. Before
@@ -212,7 +218,8 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
       tabletMgmtParams = createParameters(client);
       expected = Map.of(endR1, Set.of(NEEDS_LOCATION_UPDATE), prevR1, 
Set.of(NEEDS_LOCATION_UPDATE),
           endR3, Set.of(NEEDS_LOCATION_UPDATE), prevR3, 
Set.of(NEEDS_LOCATION_UPDATE));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy1, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams, 
false),
           "Should have four tablets with hosting availability changes");
 
       // test continue scan functionality, this test needs a table and tablet 
mgmt params that will
@@ -223,36 +230,42 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
       removeLocation(client, metaCopy1, t3);
       expected =
           Map.of(endR1, Set.of(NEEDS_LOCATION_UPDATE), prevR1, 
Set.of(NEEDS_LOCATION_UPDATE));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy1, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams, 
false),
           "Should have two tablets without a loc");
 
       // Test setting the operation id on one of the tablets in table t1. 
Table t1 has two tablets
       // w/o a location. Only one should need attention because of the 
operation id.
       setOperationId(client, metaCopy1, t1, new Text("some split"), 
TabletOperationType.SPLITTING);
       expected = Map.of(prevR1, Set.of(NEEDS_LOCATION_UPDATE));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy1, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy1, tabletMgmtParams, 
false),
           "Should have tablets needing attention because of operation id");
 
       // test the cases where the assignment is to a dead tserver
       reassignLocation(client, metaCopy2, t3);
       expected = Map.of(endR3, Set.of(NEEDS_LOCATION_UPDATE));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy2, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy2, tabletMgmtParams, 
false),
           "Only 1 of 2 tablets in table t1 should be returned");
 
       // Test the recovery cases
       createLogEntry(client, metaCopy5, t1);
       setTabletAvailability(client, metaCopy5, t1, 
TabletAvailability.UNHOSTED.name());
       expected = Map.of(endR1, Set.of(NEEDS_LOCATION_UPDATE, NEEDS_RECOVERY));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy5, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy5, tabletMgmtParams, 
false),
           "Only 1 of 2 tablets in table t1 should be returned");
       setTabletAvailability(client, metaCopy5, t1, 
TabletAvailability.ONDEMAND.name());
       expected = Map.of(endR1, Set.of(NEEDS_LOCATION_UPDATE, NEEDS_RECOVERY));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy5, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy5, tabletMgmtParams, 
false),
           "Only 1 of 2 tablets in table t1 should be returned");
       setTabletAvailability(client, metaCopy5, t1, 
TabletAvailability.HOSTED.name());
       expected = Map.of(endR1, Set.of(NEEDS_LOCATION_UPDATE, NEEDS_RECOVERY), 
prevR1,
           Set.of(NEEDS_LOCATION_UPDATE));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy5, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy5, tabletMgmtParams, 
false),
           "2 tablets in table t1 should be returned");
 
       // Remove location and set merge operation id on both tablets
@@ -261,16 +274,19 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
       removeLocation(client, metaCopy4, t4);
       expected =
           Map.of(endR4, Set.of(NEEDS_LOCATION_UPDATE), prevR4, 
Set.of(NEEDS_LOCATION_UPDATE));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy4, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy4, tabletMgmtParams, 
false),
           "Tablets have no location and a tablet availability of hosted, so 
they should need attention");
 
       // Test MERGING and SPLITTING do not need attention with no location or 
wals
       setOperationId(client, metaCopy4, t4, null, TabletOperationType.MERGING);
       expected = Map.of();
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy4, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy4, tabletMgmtParams, 
false),
           "Should have no tablets needing attention for merge as they have no 
location");
       setOperationId(client, metaCopy4, t4, null, 
TabletOperationType.SPLITTING);
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy4, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy4, tabletMgmtParams, 
false),
           "Should have no tablets needing attention for merge as they have no 
location");
 
       // Create a log entry for one of the tablets, this tablet will now need 
attention
@@ -278,24 +294,28 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
       setOperationId(client, metaCopy4, t4, null, TabletOperationType.MERGING);
       createLogEntry(client, metaCopy4, t4);
       expected = Map.of(endR4, Set.of(NEEDS_LOCATION_UPDATE, NEEDS_RECOVERY));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy4, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy4, tabletMgmtParams, 
false),
           "Should have a tablet needing attention because of wals");
       // Switch op to SPLITTING which should also need attention like MERGING
       setOperationId(client, metaCopy4, t4, null, 
TabletOperationType.SPLITTING);
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy4, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy4, tabletMgmtParams, 
false),
           "Should have a tablet needing attention because of wals");
 
       // Switch op to delete, no tablets should need attention even with WALs
       setOperationId(client, metaCopy4, t4, null, 
TabletOperationType.DELETING);
       expected = Map.of();
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy4, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy4, tabletMgmtParams, 
false),
           "Should have no tablets needing attention for delete");
 
       // test the bad tablet location state case (inconsistent metadata)
       tabletMgmtParams = createParameters(client);
       addDuplicateLocation(client, metaCopy3, t3);
       expected = Map.of(prevR3, Set.of(NEEDS_LOCATION_UPDATE, BAD_STATE));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy3, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy3, tabletMgmtParams, 
false),
           "Should have 1 tablet that needs a metadata repair");
 
       // test the volume replacements case. Need to insert some files into
@@ -306,20 +326,23 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
           Map.of(new Path("file:/vol1/accumulo/inst_id"), new 
Path("file:/vol2/accumulo/inst_id"));
       tabletMgmtParams = createParameters(client, replacements);
       expected = Map.of(prevR4, Set.of(NEEDS_VOLUME_REPLACEMENT));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy4, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy4, tabletMgmtParams, 
false),
           "Should have one tablet that needs a volume replacement");
 
       // In preparation for split an offline testing ensure nothing needs 
attention
       tabletMgmtParams = createParameters(client);
       addFiles(client, metaCopy6, t4);
       expected = Map.of();
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy6, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy6, tabletMgmtParams, 
false),
           "No tablets should need attention");
       // Lower the split threshold for the table, should cause the files added 
to need attention.
       client.tableOperations().setProperty(tables[3], 
Property.TABLE_SPLIT_THRESHOLD.getKey(),
           "1K");
       expected = Map.of(prevR4, Set.of(NEEDS_SPLITTING));
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy6, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy6, tabletMgmtParams, 
false),
           "Should have one tablet that needs splitting");
 
       // Take the table offline which should prevent the tablet from being 
returned for needing to
@@ -327,9 +350,81 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
       client.tableOperations().offline(tables[3], false);
       tabletMgmtParams = createParameters(client);
       expected = Map.of();
-      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy6, 
tabletMgmtParams),
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy6, tabletMgmtParams, 
false),
           "No tablets should need attention");
 
+      // Bring the table back online to re-confirm that it needs splitting. 
Introduce some errors
+      // into the metadata table.
+      client.tableOperations().online(tables[3]);
+      tabletMgmtParams = createParameters(client);
+      expected = Map.of(prevR4, Set.of(NEEDS_SPLITTING));
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy6, tabletMgmtParams, 
false),
+          "Should have one tablet that needs splitting");
+
+      // insert an errant entry into the metadata between tables 1 and 2.
+      TableId badTableId = TableId.of(tableId2.canonical() + "1");
+      try (TabletsMutatorImpl mut = new TabletsMutatorImpl(getServerContext(), 
(dl) -> metaCopy6)) {
+        KeyExtent nonExistantTable = new KeyExtent(badTableId, null, null);
+        TabletMutator tm = mut.mutateTablet(nonExistantTable);
+        tm.putLocation(Location.current("fakeServer", "fakeSession"));
+        tm.automaticallyPutServerLock(false);
+        tm.mutate();
+      }
+      IllegalStateException ise = assertThrows(IllegalStateException.class,
+          () -> findTabletsNeedingAttention(client, metaCopy6, 
createParameters(client), false));
+      assertTrue(ise.getMessage().startsWith("Irregular tablet metadata 
encountered at row: "));
+      assertTrue(ise.getMessage()
+          .endsWith("No prev endrow seen.  tableId: " + badTableId + " endrow: 
null"));
+
+      // with errors suppressed, should return prior state
+      tabletMgmtParams = createParameters(client);
+      expected = Map.of(prevR4, Set.of(NEEDS_SPLITTING));
+      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy6, 
tabletMgmtParams, true),
+          "Should have one tablet that needs splitting");
+
+      // Remove the errant entry
+      try (TabletsMutatorImpl mut = new TabletsMutatorImpl(getServerContext(), 
(dl) -> metaCopy6)) {
+        KeyExtent nonExistantTable = new KeyExtent(badTableId, null, null);
+        TabletMutator tm = mut.mutateTablet(nonExistantTable);
+        tm.deleteLocation(Location.current("fakeServer", "fakeSession"));
+        tm.automaticallyPutServerLock(false);
+        tm.mutate();
+      }
+      // should return prior state without error suppression
+      tabletMgmtParams = createParameters(client);
+      expected = Map.of(prevR4, Set.of(NEEDS_SPLITTING));
+      assertEquals(expected,
+          findTabletsNeedingAttention(client, metaCopy6, tabletMgmtParams, 
false),
+          "Should have one tablet that needs splitting");
+
+      // insert file bad json into a good row
+      try (BatchWriter bw = client.createBatchWriter(metaCopy6)) {
+        Mutation mut = new Mutation(prevR1.toMetaRow());
+        Path p = new Path(
+            "hdfs://localhost:8020/accumulo/tables/" + tableId1 + 
"/default_tablet/F0000070.rf");
+        Range r = new Range();
+        StoredTabletFile path = StoredTabletFile.of(p, r);
+        DataFileValue dfv = new DataFileValue(10, 10, 10);
+        mut.put(DataFileColumnFamily.NAME, new 
Text(path.getMetadataText().toString().substring(5)),
+            new Value(dfv.encode()));
+        bw.addMutation(mut);
+      }
+
+      // check that error is thrown
+      tabletMgmtParams = createParameters(client);
+      expected = Map.of(prevR4, Set.of(NEEDS_SPLITTING));
+      ise = assertThrows(IllegalStateException.class,
+          () -> findTabletsNeedingAttention(client, metaCopy6, 
createParameters(client), false));
+      assertTrue(ise.getMessage().contains("Expected BEGIN_OBJECT but was 
STRING"));
+
+      // with errors suppressed, should return prior state
+      tabletMgmtParams = createParameters(client);
+      expected = Map.of(prevR4, Set.of(NEEDS_SPLITTING));
+      assertEquals(expected, findTabletsNeedingAttention(client, metaCopy6, 
tabletMgmtParams, true),
+          "Should have one tablet that needs splitting");
+
       // clean up
       dropTables(client, t1, t2, t3, t4, metaCopy1, metaCopy2, metaCopy3, 
metaCopy4, metaCopy5,
           metaCopy6);
@@ -378,19 +473,6 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
     try (BatchWriter bw = client.createBatchWriter(table)) {
       bw.addMutation(m);
     }
-    try {
-      client.createScanner(table).iterator()
-          .forEachRemaining(e -> System.out.println(e.getKey() + "-> " + 
e.getValue()));
-    } catch (TableNotFoundException e) {
-      // TODO Auto-generated catch block
-      e.printStackTrace();
-    } catch (AccumuloSecurityException e) {
-      // TODO Auto-generated catch block
-      e.printStackTrace();
-    } catch (AccumuloException e) {
-      // TODO Auto-generated catch block
-      e.printStackTrace();
-    }
   }
 
   private void reassignLocation(AccumuloClient client, String table, String 
tableNameToModify)
@@ -449,8 +531,8 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
   }
 
   private Map<KeyExtent,Set<TabletManagement.ManagementAction>> 
findTabletsNeedingAttention(
-      AccumuloClient client, String table, TabletManagementParameters 
tabletMgmtParams)
-      throws TableNotFoundException, IOException {
+      AccumuloClient client, String table, TabletManagementParameters 
tabletMgmtParams,
+      boolean suppressErrors) throws TableNotFoundException, IOException {
     Map<KeyExtent,Set<TabletManagement.ManagementAction>> results = new 
HashMap<>();
     List<KeyExtent> resultList = new ArrayList<>();
     try (Scanner scanner = client.createScanner(table, Authorizations.EMPTY)) {
@@ -460,6 +542,16 @@ public class TabletManagementIteratorIT extends 
AccumuloClusterHarness {
       for (Entry<Key,Value> e : scanner) {
         if (e != null) {
           TabletManagement mti = TabletManagementIterator.decode(e);
+
+          if (mti.getErrorMessage() != null) {
+            if (suppressErrors) {
+              // just skip this tablet metadata entry
+              continue;
+            } else {
+              throw new IllegalStateException(mti.getErrorMessage());
+            }
+          }
+
           results.put(mti.getTabletMetadata().getExtent(), mti.getActions());
           log.debug("Found tablets that changed state: {}", 
mti.getTabletMetadata().getExtent());
           log.debug("actions : {}", mti.getActions());

Reply via email to