liangyepianzhou commented on code in PR #18273: URL: https://github.com/apache/pulsar/pull/18273#discussion_r1095338443
########## pulsar-broker/src/main/java/org/apache/pulsar/broker/transaction/buffer/impl/SnapshotSegmentAbortedTxnProcessorImpl.java: ########## @@ -0,0 +1,721 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing,2 + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.broker.transaction.buffer.impl; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Supplier; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.mledger.AsyncCallbacks; +import org.apache.bookkeeper.mledger.Entry; +import org.apache.bookkeeper.mledger.ManagedLedgerException; +import org.apache.bookkeeper.mledger.impl.ManagedLedgerImpl; +import org.apache.bookkeeper.mledger.impl.PositionImpl; +import org.apache.bookkeeper.mledger.impl.ReadOnlyManagedLedgerImpl; +import org.apache.commons.collections4.map.LinkedMap; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.pulsar.broker.service.BrokerServiceException; +import org.apache.pulsar.broker.service.persistent.PersistentTopic; +import org.apache.pulsar.broker.systopic.SystemTopicClient; +import org.apache.pulsar.broker.transaction.buffer.AbortedTxnProcessor; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndex; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexes; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexesMetadata; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotSegment; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TxnIDData; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.transaction.TxnID; +import org.apache.pulsar.client.impl.MessageIdImpl; +import org.apache.pulsar.client.impl.PulsarClientImpl; +import org.apache.pulsar.common.naming.SystemTopicNames; +import org.apache.pulsar.common.naming.TopicDomain; +import org.apache.pulsar.common.naming.TopicName; +import org.apache.pulsar.common.protocol.Commands; +import org.apache.pulsar.common.util.FutureUtil; + +@Slf4j +public class SnapshotSegmentAbortedTxnProcessorImpl implements AbortedTxnProcessor { + + /** + * Stored the unsealed aborted transaction IDs Whose size is always less than the snapshotSegmentCapacity. + * It will be persistent as a snapshot segment when its size reach the configured capacity. + */ + private LinkedList<TxnID> unsealedTxnIds; + + /** + * The map is used to clear the aborted transaction IDs persistent in the expired ledger. + * <p> + * The key PositionImpl {@link PositionImpl} is the persistent position of + * the latest transaction of a segment. + * The value TxnID {@link TxnID} is the latest Transaction ID in a segment. + * </p> + * + * <p> + * If the position is expired, the processor can get the according latest + * transaction ID in this map. And then the processor can clear all the + * transaction IDs in the aborts {@link SnapshotSegmentAbortedTxnProcessorImpl#aborts} + * that lower than the transaction ID. + * And then the processor can delete the segments persistently according to + * the positions. + * </p> + */ + private final LinkedMap<PositionImpl, TxnID> segmentIndex = new LinkedMap<>(); + + /** + * This map is used to check whether a transaction is an aborted transaction. + * <p> + * The transaction IDs is appended in order, so the processor can delete expired + * transaction IDs according to the latest expired transaction IDs in segmentIndex + * {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex}. + * </p> + */ + private final LinkedMap<TxnID, TxnID> aborts = new LinkedMap<>(); + /** + * This map stores the indexes of the snapshot segment. + * <p> + * The key is the persistent position of the marker of the last transaction in the segment. + * The value TransactionBufferSnapshotIndex {@link TransactionBufferSnapshotIndex} is the + * indexes of the snapshot segment. + * </p> + */ + private final LinkedMap<PositionImpl, TransactionBufferSnapshotIndex> indexes = new LinkedMap<>(); + + private final PersistentTopic topic; + + private volatile long lastSnapshotTimestamps; + + /** + * The number of the aborted transaction IDs in a segment. + * This is calculated according to the configured memory size. + */ + private final int snapshotSegmentCapacity; + /** + * Responsible for executing the persistent tasks. + * <p>Including:</p> + * <p> Update segment index.</p> + * <p> Write snapshot segment.</p> + * <p> Delete snapshot segment.</p> + * <p> Clear all snapshot segment. </p> + */ + private final PersistentWorker persistentWorker; + + private static final String SNAPSHOT_PREFIX = "multiple-"; + + public SnapshotSegmentAbortedTxnProcessorImpl(PersistentTopic topic) { + this.topic = topic; + this.persistentWorker = new PersistentWorker(topic); + /* + Calculate the segment capital according to its size configuration. + <p> + The empty transaction segment size is 5. + Adding an empty linkedList, the size increase to 6. + Add the topic name the size increase to the 7 + topic.getName().length(). + Add the aborted transaction IDs, the size increase to 8 + + topic.getName().length() + 3 * aborted transaction ID size. + </p> + */ + this.snapshotSegmentCapacity = (topic.getBrokerService().getPulsar() + .getConfiguration().getTransactionBufferSnapshotSegmentSize() - 8 - topic.getName().length()) / 3; + this.unsealedTxnIds = new LinkedList<>(); + } + + @Override + public void putAbortedTxnAndPosition(TxnID txnID, PositionImpl position) { + unsealedTxnIds.add(txnID); + aborts.put(txnID, txnID); + /* + The size of lastAbortedTxns reaches the configuration of the size of snapshot segment. + Append a task to persistent the segment with the aborted transaction IDs and the latest + transaction mark persistent position passed by param. + */ + if (unsealedTxnIds.size() >= snapshotSegmentCapacity) { + LinkedList<TxnID> abortedSegment = unsealedTxnIds; + segmentIndex.put(position, txnID); + persistentWorker.appendTask(PersistentWorker.OperationType.WriteSegment, () -> + persistentWorker.takeSnapshotSegmentAsync(abortedSegment, position)); + this.unsealedTxnIds = new LinkedList<>(); + } + } + + @Override + public boolean checkAbortedTransaction(TxnID txnID) { + return aborts.containsKey(txnID); + } + + /** + * Check werther the position in segmentIndex {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex} + * is expired. If the position is not exist in the original topic, the according transaction is an invalid + * transaction. And the according segment is invalid, too. The transaction IDs before the transaction ID + * in the aborts are invalid, too. + */ + @Override + public void trimExpiredAbortedTxns() { + //Checking whether there are some segment expired. + List<PositionImpl> positionsNeedToDelete = new ArrayList<>(); + while (!segmentIndex.isEmpty() && !((ManagedLedgerImpl) topic.getManagedLedger()) + .ledgerExists(segmentIndex.firstKey().getLedgerId())) { + if (log.isDebugEnabled()) { + log.debug("[{}] Topic transaction buffer clear aborted transactions, maxReadPosition : {}", + topic.getName(), segmentIndex.firstKey()); + } + PositionImpl positionNeedToDelete = segmentIndex.firstKey(); + positionsNeedToDelete.add(positionNeedToDelete); + + TxnID theLatestDeletedTxnID = segmentIndex.remove(0); + while (!aborts.firstKey().equals(theLatestDeletedTxnID)) { + aborts.remove(0); + } + aborts.remove(0); + } + //Batch delete the expired segment + if (!positionsNeedToDelete.isEmpty()) { + persistentWorker.appendTask(PersistentWorker.OperationType.DeleteSegment, + () -> persistentWorker.deleteSnapshotSegment(positionsNeedToDelete)); + } + } + + private String buildKey(long sequenceId) { + return SNAPSHOT_PREFIX + sequenceId + "-" + this.topic.getName(); + } + + @Override + public CompletableFuture<Void> takeAbortedTxnsSnapshot(PositionImpl maxReadPosition) { + //Store the latest aborted transaction IDs in unsealedTxnIDs and the according the latest max read position. + TransactionBufferSnapshotIndexesMetadata metadata = new TransactionBufferSnapshotIndexesMetadata( + maxReadPosition.getLedgerId(), maxReadPosition.getEntryId(), + convertTypeToTxnIDData(unsealedTxnIds)); + return persistentWorker.appendTask(PersistentWorker.OperationType.UpdateIndex, () -> persistentWorker + .updateSnapshotIndex(metadata, indexes.values().stream().toList())); + } + + @Override + public CompletableFuture<PositionImpl> recoverFromSnapshot() { + return topic.getBrokerService().getPulsar().getTransactionBufferSnapshotServiceFactory() + .getTxnBufferSnapshotIndexService() + .createReader(TopicName.get(topic.getName())).thenComposeAsync(reader -> { + PositionImpl startReadCursorPosition = null; + TransactionBufferSnapshotIndexes persistentSnapshotIndexes = null; + boolean hasIndex = false; + try { + /* + Read the transaction snapshot segment index. + <p> + The processor can get the sequence ID, unsealed transaction IDs, + segment index list and max read position in the snapshot segment index. + Then we can traverse the index list to read all aborted transaction IDs + in segments to aborts. + </p> + */ + while (reader.hasMoreEvents()) { + Message<TransactionBufferSnapshotIndexes> message = reader.readNextAsync() + .get(getSystemClientOperationTimeoutMs(), TimeUnit.MILLISECONDS); + if (topic.getName().equals(message.getKey())) { + TransactionBufferSnapshotIndexes transactionBufferSnapshotIndexes = message.getValue(); + if (transactionBufferSnapshotIndexes != null) { + hasIndex = true; + persistentSnapshotIndexes = transactionBufferSnapshotIndexes; + startReadCursorPosition = PositionImpl.get( + transactionBufferSnapshotIndexes.getSnapshot().getMaxReadPositionLedgerId(), + transactionBufferSnapshotIndexes.getSnapshot().getMaxReadPositionEntryId()); + } + } + } + } catch (TimeoutException ex) { + Throwable t = FutureUtil.unwrapCompletionException(ex); + String errorMessage = String.format("[%s] Transaction buffer recover fail by read " + + "transactionBufferSnapshot timeout!", topic.getName()); + log.error(errorMessage, t); + return FutureUtil.failedFuture( + new BrokerServiceException.ServiceUnitNotReadyException(errorMessage, t)); + } catch (Exception ex) { + log.error("[{}] Transaction buffer recover fail when read " + + "transactionBufferSnapshot!", topic.getName(), ex); + return FutureUtil.failedFuture(ex); + } finally { + closeReader(reader); + } + PositionImpl finalStartReadCursorPosition = startReadCursorPosition; + TransactionBufferSnapshotIndexes finalPersistentSnapshotIndexes = persistentSnapshotIndexes; + if (!hasIndex) { + return CompletableFuture.completedFuture(null); + } else { + this.unsealedTxnIds = convertTypeToTxnID(persistentSnapshotIndexes + .getSnapshot().getAborts()); + } + //Read snapshot segment to recover aborts. + ArrayList<CompletableFuture<Void>> completableFutures = new ArrayList<>(); + CompletableFuture<Void> openManagedLedgerFuture = new CompletableFuture<>(); + AtomicBoolean hasInvalidIndex = new AtomicBoolean(false); + AsyncCallbacks.OpenReadOnlyManagedLedgerCallback callback = new AsyncCallbacks + .OpenReadOnlyManagedLedgerCallback() { + @Override + public void openReadOnlyManagedLedgerComplete(ReadOnlyManagedLedgerImpl readOnlyManagedLedger, + Object ctx) { + finalPersistentSnapshotIndexes.getIndexList().forEach(index -> { + CompletableFuture<Void> handleSegmentFuture = new CompletableFuture<>(); + completableFutures.add(handleSegmentFuture); + readOnlyManagedLedger.asyncReadEntry( + new PositionImpl(index.getSegmentLedgerID(), + index.getSegmentEntryID()), + new AsyncCallbacks.ReadEntryCallback() { + @Override + public void readEntryComplete(Entry entry, Object ctx) { + handleSnapshotSegmentEntry(entry); + indexes.put(new PositionImpl( + index.abortedMarkLedgerID, + index.abortedMarkEntryID), + index); + entry.release(); + handleSegmentFuture.complete(null); + } + + @Override + public void readEntryFailed(ManagedLedgerException exception, Object ctx) { + if (exception instanceof ManagedLedgerException Review Comment: The ledger of the aborted mark is deleted meaning the index is invalid. So we may delete the check for the exception. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
