This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch elasticity in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/elasticity by this push: new cebef4bdbc cleaned up a few TODOs in the split code (#4401) cebef4bdbc is described below commit cebef4bdbce59c9f33da8409d64b590b59166195 Author: Keith Turner <ktur...@apache.org> AuthorDate: Wed Mar 20 18:26:20 2024 -0400 cleaned up a few TODOs in the split code (#4401) Removed TODOs in the code related to #3415, #3709, #3412. These todos were done or had issues opened. Added some best effot checks to avoid doing splits if a table is offline. The checks have race conditions as the table lock is not acquired by the split fate op. However as outlined in a comment on #3412 the offline code that waits could be made more comprehensive to account for this. One TODO was handled by using `Ample.RejectionHandler.acceptAbsentTablet()` which likely did not exists when the TODO was written. --- .../accumulo/manager/FateServiceHandler.java | 6 ------ .../manager/tableOps/split/FindSplits.java | 7 +++++++ .../accumulo/manager/tableOps/split/PreSplit.java | 20 ++++++++++++------- .../manager/tableOps/split/UpdateTablets.java | 23 +++++++--------------- 4 files changed, 27 insertions(+), 29 deletions(-) diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/FateServiceHandler.java b/server/manager/src/main/java/org/apache/accumulo/manager/FateServiceHandler.java index 5c2ae952d0..3e45faca23 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/FateServiceHandler.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/FateServiceHandler.java @@ -719,10 +719,6 @@ class FateServiceHandler implements FateService.Iface { case TABLE_SPLIT: { TableOperation tableOp = TableOperation.SPLIT; - // ELASTICITY_TODO this does not check if table is offline for now, that is usually done in - // FATE operation with a table lock. Deferring that check for now as its possible tablet - // locks may not be needed. - int SPLIT_OFFSET = 3; // offset where split data begins in arguments list if (arguments.size() < (SPLIT_OFFSET + 1)) { throw new ThriftTableOperationException(null, null, tableOp, @@ -752,8 +748,6 @@ class FateServiceHandler implements FateService.Iface { endRow = endRow.getLength() == 0 ? null : endRow; prevEndRow = prevEndRow.getLength() == 0 ? null : prevEndRow; - // ELASTICITY_TODO create table stores splits in a file, maybe this operation should do the - // same SortedSet<Text> splits = arguments.subList(SPLIT_OFFSET, arguments.size()).stream() .map(ByteBufferUtil::toText).collect(Collectors.toCollection(TreeSet::new)); diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/FindSplits.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/FindSplits.java index 7e3acd8d7f..f72d26cb05 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/FindSplits.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/FindSplits.java @@ -28,6 +28,7 @@ import java.util.function.Consumer; import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.fate.FateId; import org.apache.accumulo.core.fate.Repo; +import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.metadata.schema.Ample.ConditionalResult; import org.apache.accumulo.core.metadata.schema.Ample.ConditionalResult.Status; import org.apache.accumulo.core.metadata.schema.UnSplittableMetadata; @@ -88,6 +89,12 @@ public class FindSplits extends ManagerRepo { return null; } + if (manager.getContext().getTableState(extent.tableId()) != TableState.ONLINE) { + // The table is offline, do not bother finding splits + log.debug("Not splitting {} because the table is not online", tabletMetadata.getExtent()); + return null; + } + SortedSet<Text> splits = SplitUtils.findSplits(manager.getContext(), tabletMetadata); if (extent.endRow() != null) { diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/PreSplit.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/PreSplit.java index d956821d18..2bf5007d8c 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/PreSplit.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/PreSplit.java @@ -30,9 +30,13 @@ import java.util.Objects; import java.util.Optional; import java.util.SortedSet; +import org.apache.accumulo.core.clientImpl.AcceptableThriftTableOperationException; +import org.apache.accumulo.core.clientImpl.thrift.TableOperation; +import org.apache.accumulo.core.clientImpl.thrift.TableOperationExceptionType; import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.fate.FateId; import org.apache.accumulo.core.fate.Repo; +import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.Ample.ConditionalResult.Status; import org.apache.accumulo.core.metadata.schema.TabletMetadata; @@ -63,15 +67,8 @@ public class PreSplit extends ManagerRepo { @Override public long isReady(FateId fateId, Manager manager) throws Exception { - - // ELASTICITY_TODO intentionally not getting the table lock because not sure if its needed, - // revist later when more operations are moved out of tablet server - var opid = TabletOperationId.from(TabletOperationType.SPLITTING, fateId); - // ELASTICITY_TODO write IT that spins up 100 threads that all try to add a diff split to - // the same tablet. - // ELASTICITY_TODO does FATE prioritize running Fate txs that have already started? If not would // be good to look into this so we can finish things that are started before running new txs // that have not completed their first step. Once splits starts running, would like it to move @@ -97,6 +94,15 @@ public class PreSplit extends ManagerRepo { return 1000; } } else { + // do not set the operation id on the tablet if the table is offline + if (manager.getContext().getTableState(splitInfo.getOriginal().tableId()) + != TableState.ONLINE) { + throw new AcceptableThriftTableOperationException( + splitInfo.getOriginal().tableId().canonical(), null, TableOperation.SPLIT, + TableOperationExceptionType.OFFLINE, + "Unable to split tablet because the table is offline"); + } + try (var tabletsMutator = manager.getContext().getAmple().conditionallyMutateTablets()) { tabletsMutator.mutateTablet(splitInfo.getOriginal()).requireAbsentOperation() diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/UpdateTablets.java b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/UpdateTablets.java index 808ebf8b3a..98623f75c4 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/UpdateTablets.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/split/UpdateTablets.java @@ -32,6 +32,7 @@ import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.fate.FateId; import org.apache.accumulo.core.fate.Repo; import org.apache.accumulo.core.metadata.StoredTabletFile; +import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.Ample.ConditionalResult.Status; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.metadata.schema.TabletMetadata; @@ -288,25 +289,15 @@ public class UpdateTablets extends ManagerRepo { log.debug("{} deleting unsplittable metadata from {} because of split", fateId, newExtent); } - mutator.submit(tm -> false); + // if the tablet no longer exists (because changed prev end row, then the update was + // successful. + mutator.submit(Ample.RejectionHandler.acceptAbsentTablet()); var result = tabletsMutator.process().get(splitInfo.getOriginal()); - if (result.getStatus() != Status.ACCEPTED) { - // Can not use Ample's built in code for checking rejected because we are changing the prev - // end row and Ample would try to read the old tablet, so must check it manually. - - var tabletMeta = manager.getContext().getAmple().readTablet(newExtent); - - if (tabletMeta == null || !tabletMeta.getOperationId().equals(opid) - || tabletMeta.getLocation() != null) { - throw new IllegalStateException("Failed to update existing tablet in split " - + splitInfo.getOriginal() + " " + result.getStatus() + " " + result.getExtent() + " " - + (tabletMeta == null ? null : tabletMeta.getLocation())); - } else { - // ELASTICITY_TODO - } - } + Preconditions.checkState(result.getStatus() == Status.ACCEPTED, + "Failed to update existing tablet in split %s %s %s", fateId, splitInfo.getOriginal(), + result.getExtent()); } } }