hachikuji commented on a change in pull request #10974: URL: https://github.com/apache/kafka/pull/10974#discussion_r664156720
########## File path: tools/src/main/java/org/apache/kafka/tools/TransactionsCommand.java ########## @@ -461,6 +469,417 @@ public void execute(Admin admin, Namespace ns, PrintStream out) throws Exception } } + static class FindHangingTransactionsCommand extends TransactionsCommand { + private static final int MAX_BATCH_SIZE = 500; + + static final String[] HEADERS = new String[] { + "Topic", + "Partition", + "ProducerId", + "ProducerEpoch", + "CoordinatorEpoch", + "StartOffset", + "LastTimestamp", + "Duration(min)" + }; + + FindHangingTransactionsCommand(Time time) { + super(time); + } + + @Override + String name() { + return "find-hanging"; + } + + @Override + void addSubparser(Subparsers subparsers) { + Subparser subparser = subparsers.addParser(name()) + .help("find hanging transactions"); + + subparser.addArgument("--broker-id") + .help("broker id to search for hanging transactions") + .action(store()) + .type(Integer.class) + .required(false); + + subparser.addArgument("--max-transaction-timeout") + .help("maximum transaction timeout in minutes to limit the scope of the search") + .action(store()) + .type(Integer.class) + .setDefault(15) + .required(false); + + subparser.addArgument("--topic") + .help("topic name to limit search to") + .action(store()) + .type(String.class) + .required(false); + + subparser.addArgument("--partition") + .help("partition number") + .action(store()) + .type(Integer.class) + .required(false); + } + + @Override + void execute(Admin admin, Namespace ns, PrintStream out) throws Exception { + Optional<Integer> brokerId = Optional.ofNullable(ns.getInt("broker_id")); + Optional<String> topic = Optional.ofNullable(ns.getString("topic")); + + if (!topic.isPresent() && !brokerId.isPresent()) { + printErrorAndExit("The `find-hanging` command requires either --topic " + + "or --broker-id to limit the scope of the search"); + return; + } + + Optional<Integer> partition = Optional.ofNullable(ns.getInt("partition")); + if (partition.isPresent() && !topic.isPresent()) { + printErrorAndExit("The --partition argument requires --topic to be provided"); + return; + } + + long maxTransactionTimeoutMs = TimeUnit.MINUTES.toMillis( + ns.getInt("max_transaction_timeout")); + + List<TopicPartition> topicPartitions = collectTopicPartitionsToSearch( + admin, + topic, + partition, + brokerId + ); + + List<OpenTransaction> candidates = collectCandidateOpenTransactions( + admin, + brokerId, + maxTransactionTimeoutMs, + topicPartitions + ); + + if (candidates.isEmpty()) { + printHangingTransactions(Collections.emptyList(), out); + } else { + Map<Long, List<OpenTransaction>> openTransactionsByProducerId = groupByProducerId(candidates); + + Map<Long, String> transactionalIds = lookupTransactionalIds( + admin, + openTransactionsByProducerId.keySet() + ); + + Map<String, TransactionDescription> descriptions = describeTransactions( + admin, + transactionalIds.values() + ); + + List<OpenTransaction> hangingTransactions = filterHangingTransactions( + openTransactionsByProducerId, + transactionalIds, + descriptions + ); + + printHangingTransactions(hangingTransactions, out); + } + } + + private List<TopicPartition> collectTopicPartitionsToSearch( + Admin admin, + Optional<String> topic, + Optional<Integer> partition, + Optional<Integer> brokerId + ) throws Exception { + final List<String> topics; + + if (topic.isPresent()) { + if (partition.isPresent()) { + return Collections.singletonList(new TopicPartition(topic.get(), partition.get())); + } else { + topics = Collections.singletonList(topic.get()); + } + } else { + topics = listTopics(admin); + } + + return findTopicPartitions( + admin, + brokerId, + topics + ); + } + + private List<OpenTransaction> filterHangingTransactions( + Map<Long, List<OpenTransaction>> openTransactionsByProducerId, + Map<Long, String> transactionalIds, + Map<String, TransactionDescription> descriptions + ) { + List<OpenTransaction> hangingTransactions = new ArrayList<>(); + + openTransactionsByProducerId.forEach((producerId, openTransactions) -> { + String transactionalId = transactionalIds.get(producerId); + if (transactionalId == null) { + // If we could not find the transactionalId corresponding to the + // producerId of an open transaction, then the transaction is hanging. + hangingTransactions.addAll(openTransactions); + } else { + // Otherwise, we need to check the current transaction state + TransactionDescription description = descriptions.get(transactionalId); + if (description == null) { + hangingTransactions.addAll(openTransactions); + } else { + for (OpenTransaction openTransaction : openTransactions) { + if (description.producerEpoch() > openTransaction.producerState.producerEpoch() + || !description.topicPartitions().contains(openTransaction.topicPartition)) { Review comment: Thanks, added a comment. I decided to remove the epoch check. The producer epoch could be bumped as a result of a transaction timeout, so the check did not seem quite right. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org