chia7712 commented on code in PR #19793:
URL: https://github.com/apache/kafka/pull/19793#discussion_r2837830578


##########
server/src/main/java/org/apache/kafka/server/purgatory/DelayedProduce.java:
##########
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.server.purgatory;
+
+import org.apache.kafka.common.TopicIdPartition;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse;
+import org.apache.kafka.server.metrics.KafkaMetricsGroup;
+
+import com.yammer.metrics.core.Meter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+/**
+ * A delayed produce operation that can be created by the replica manager and 
watched
+ * in the produce operation purgatory
+ */
+public class DelayedProduce extends DelayedOperation {
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(DelayedProduce.class);
+
+    // Changing the package or class name may cause incompatibility with 
existing code and metrics configuration
+    private static final String METRICS_PACKAGE = "kafka.server";
+    private static final String METRICS_CLASS_NAME = "DelayedProduceMetrics";
+    private static final KafkaMetricsGroup METRICS_GROUP = new 
KafkaMetricsGroup(METRICS_PACKAGE, METRICS_CLASS_NAME);
+    private static final Meter AGGREGATE_EXPIRATION_METER = 
METRICS_GROUP.newMeter("ExpiresPerSec", "requests", TimeUnit.SECONDS);
+    private static final ConcurrentHashMap<TopicPartition, Meter> 
PARTITION_EXPIRATION_METERS = new ConcurrentHashMap<>();
+
+    public static final class ProducePartitionStatus {
+        private final long requiredOffset;
+        private final PartitionResponse responseStatus;
+
+        private volatile boolean acksPending;
+
+        public ProducePartitionStatus(long requiredOffset, PartitionResponse 
responseStatus) {
+            this.requiredOffset = requiredOffset;
+            this.responseStatus = responseStatus;
+        }
+
+        public PartitionResponse responseStatus() {
+            return responseStatus;
+        }
+
+        public void setAcksPending(boolean acksPending) {
+            this.acksPending = acksPending;
+        }
+
+        @Override
+        public String toString() {
+            return String.format(
+                    "[acksPending: %s, error: %s, startOffset: %s, 
requiredOffset: %d]",
+                    acksPending,
+                    responseStatus.error.code(),
+                    responseStatus.baseOffset,
+                    requiredOffset
+            );
+        }
+
+        @Override

Review Comment:
   I think `equal` and `hashCode` is not required, and this is a mutable object 
so providing those methods are not safe



##########
server/src/main/java/org/apache/kafka/server/purgatory/DelayedProduce.java:
##########
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.server.purgatory;
+
+import org.apache.kafka.common.TopicIdPartition;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse;
+import org.apache.kafka.server.metrics.KafkaMetricsGroup;
+
+import com.yammer.metrics.core.Meter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+/**
+ * A delayed produce operation that can be created by the replica manager and 
watched
+ * in the produce operation purgatory
+ */
+public class DelayedProduce extends DelayedOperation {
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(DelayedProduce.class);
+
+    // Changing the package or class name may cause incompatibility with 
existing code and metrics configuration
+    private static final String METRICS_PACKAGE = "kafka.server";
+    private static final String METRICS_CLASS_NAME = "DelayedProduceMetrics";
+    private static final KafkaMetricsGroup METRICS_GROUP = new 
KafkaMetricsGroup(METRICS_PACKAGE, METRICS_CLASS_NAME);
+    private static final Meter AGGREGATE_EXPIRATION_METER = 
METRICS_GROUP.newMeter("ExpiresPerSec", "requests", TimeUnit.SECONDS);
+    private static final ConcurrentHashMap<TopicPartition, Meter> 
PARTITION_EXPIRATION_METERS = new ConcurrentHashMap<>();
+
+    public static final class ProducePartitionStatus {
+        private final long requiredOffset;
+        private final PartitionResponse responseStatus;
+
+        private volatile boolean acksPending;
+
+        public ProducePartitionStatus(long requiredOffset, PartitionResponse 
responseStatus) {
+            this.requiredOffset = requiredOffset;
+            this.responseStatus = responseStatus;
+        }
+
+        public PartitionResponse responseStatus() {
+            return responseStatus;
+        }
+
+        public void setAcksPending(boolean acksPending) {
+            this.acksPending = acksPending;
+        }
+
+        @Override
+        public String toString() {
+            return String.format(
+                    "[acksPending: %s, error: %s, startOffset: %s, 
requiredOffset: %d]",
+                    acksPending,
+                    responseStatus.error.code(),
+                    responseStatus.baseOffset,
+                    requiredOffset
+            );
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (o == null || getClass() != o.getClass()) {
+                return false;
+            }
+            final ProducePartitionStatus that = (ProducePartitionStatus) o;
+            return requiredOffset == that.requiredOffset && acksPending == 
that.acksPending && Objects.equals(responseStatus, that.responseStatus);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(requiredOffset, responseStatus, acksPending);
+        }
+    }
+
+    @FunctionalInterface
+    public interface PartitionStatusValidator {
+        /**
+         * Validates the status of a partition and its replicas to determine
+         * if a delayed produce operation can be completed.
+         *
+         * @param topicPartition The partition to check.
+         * @param requiredOffset The offset that replicas must reach.
+         * @return An entry where the key is a Boolean (hasEnoughReplicas)
+         * and the value is the Error code.
+         */
+        Map.Entry<Boolean, Errors> validate(TopicPartition topicPartition, 
long requiredOffset);

Review Comment:
   Sorry, this is a bit hard to follow. Would you mind refactoring it using 
record class for better readability?



##########
server/src/main/java/org/apache/kafka/server/purgatory/DelayedProduce.java:
##########
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.server.purgatory;
+
+import org.apache.kafka.common.TopicIdPartition;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse;
+import org.apache.kafka.server.metrics.KafkaMetricsGroup;
+
+import com.yammer.metrics.core.Meter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+/**
+ * A delayed produce operation that can be created by the replica manager and 
watched
+ * in the produce operation purgatory
+ */
+public class DelayedProduce extends DelayedOperation {
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(DelayedProduce.class);
+
+    // Changing the package or class name may cause incompatibility with 
existing code and metrics configuration
+    private static final String METRICS_PACKAGE = "kafka.server";
+    private static final String METRICS_CLASS_NAME = "DelayedProduceMetrics";
+    private static final KafkaMetricsGroup METRICS_GROUP = new 
KafkaMetricsGroup(METRICS_PACKAGE, METRICS_CLASS_NAME);
+    private static final Meter AGGREGATE_EXPIRATION_METER = 
METRICS_GROUP.newMeter("ExpiresPerSec", "requests", TimeUnit.SECONDS);
+    private static final ConcurrentHashMap<TopicPartition, Meter> 
PARTITION_EXPIRATION_METERS = new ConcurrentHashMap<>();
+
+    public static final class ProducePartitionStatus {
+        private final long requiredOffset;
+        private final PartitionResponse responseStatus;
+
+        private volatile boolean acksPending;
+
+        public ProducePartitionStatus(long requiredOffset, PartitionResponse 
responseStatus) {
+            this.requiredOffset = requiredOffset;
+            this.responseStatus = responseStatus;
+        }
+
+        public PartitionResponse responseStatus() {
+            return responseStatus;
+        }
+
+        public void setAcksPending(boolean acksPending) {

Review Comment:
   Could we change the scope from public to private? It should not be 
accessible publicly



##########
server/src/main/java/org/apache/kafka/server/purgatory/DelayedProduce.java:
##########
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.server.purgatory;
+
+import org.apache.kafka.common.TopicIdPartition;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse;
+import org.apache.kafka.server.metrics.KafkaMetricsGroup;
+
+import com.yammer.metrics.core.Meter;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+/**
+ * A delayed produce operation that can be created by the replica manager and 
watched
+ * in the produce operation purgatory
+ */
+public class DelayedProduce extends DelayedOperation {
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(DelayedProduce.class);
+
+    // Changing the package or class name may cause incompatibility with 
existing code and metrics configuration
+    private static final String METRICS_PACKAGE = "kafka.server";
+    private static final String METRICS_CLASS_NAME = "DelayedProduceMetrics";
+    private static final KafkaMetricsGroup METRICS_GROUP = new 
KafkaMetricsGroup(METRICS_PACKAGE, METRICS_CLASS_NAME);
+    private static final Meter AGGREGATE_EXPIRATION_METER = 
METRICS_GROUP.newMeter("ExpiresPerSec", "requests", TimeUnit.SECONDS);
+    private static final ConcurrentHashMap<TopicPartition, Meter> 
PARTITION_EXPIRATION_METERS = new ConcurrentHashMap<>();
+
+    public static final class ProducePartitionStatus {
+        private final long requiredOffset;
+        private final PartitionResponse responseStatus;
+
+        private volatile boolean acksPending;
+
+        public ProducePartitionStatus(long requiredOffset, PartitionResponse 
responseStatus) {
+            this.requiredOffset = requiredOffset;
+            this.responseStatus = responseStatus;
+        }
+
+        public PartitionResponse responseStatus() {
+            return responseStatus;
+        }
+
+        public void setAcksPending(boolean acksPending) {
+            this.acksPending = acksPending;
+        }
+
+        @Override
+        public String toString() {
+            return String.format(
+                    "[acksPending: %s, error: %s, startOffset: %s, 
requiredOffset: %d]",
+                    acksPending,
+                    responseStatus.error.code(),
+                    responseStatus.baseOffset,
+                    requiredOffset
+            );
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (o == null || getClass() != o.getClass()) {
+                return false;
+            }
+            final ProducePartitionStatus that = (ProducePartitionStatus) o;
+            return requiredOffset == that.requiredOffset && acksPending == 
that.acksPending && Objects.equals(responseStatus, that.responseStatus);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(requiredOffset, responseStatus, acksPending);
+        }
+    }
+
+    @FunctionalInterface
+    public interface PartitionStatusValidator {
+        /**
+         * Validates the status of a partition and its replicas to determine
+         * if a delayed produce operation can be completed.
+         *
+         * @param topicPartition The partition to check.
+         * @param requiredOffset The offset that replicas must reach.
+         * @return An entry where the key is a Boolean (hasEnoughReplicas)
+         * and the value is the Error code.
+         */
+        Map.Entry<Boolean, Errors> validate(TopicPartition topicPartition, 
long requiredOffset);
+    }
+
+    private final Map<TopicIdPartition, ProducePartitionStatus> produceStatus;
+    private final PartitionStatusValidator statusValidator;
+    private final Consumer<Map<TopicIdPartition, PartitionResponse>> 
responseCallback;
+
+    public DelayedProduce(long delayMs,
+                          Map<TopicIdPartition, ProducePartitionStatus> 
produceStatus,
+                          PartitionStatusValidator statusValidator,
+                          Consumer<Map<TopicIdPartition, PartitionResponse>> 
responseCallback) {
+        super(delayMs);
+
+        this.produceStatus = produceStatus;
+        this.statusValidator = statusValidator;
+        this.responseCallback = responseCallback;
+
+        // first update the acks pending variable according to the error code
+        produceStatus.forEach((topicPartition, status) -> {
+            if (status.responseStatus.error == Errors.NONE) {
+                // Timeout error state will be cleared when required acks are 
received
+                status.acksPending = true;
+                status.responseStatus.error = Errors.REQUEST_TIMED_OUT;
+            } else {
+                status.acksPending = false;
+            }
+
+            LOGGER.trace("Initial partition status for {} is {}", 
topicPartition, status);
+        });
+    }
+
+    /**
+     * The delayed produce operation can be completed if every partition
+     * it produces to is satisfied by one of the following:
+     *
+     * Case A: Replica not assigned to partition
+     * Case B: Replica is no longer the leader of this partition
+     * Case C: This broker is the leader:
+     *   C.1 - If there was a local error thrown while checking if at least 
requiredAcks
+     *         replicas have caught up to this operation: set an error in 
response
+     *   C.2 - Otherwise, set the response with no error.
+     *
+     * These cases were originally validated by some methods in the 
ReplicaManager.
+     * However, since DelayedProduce has been moved to the server module, it 
cannot directly access the ReplicaManager.
+     * Therefore, these validations have been delegated to the method within 
`ReplicaManager#maybeAddDelayedProduce()`.
+     */
+    @Override
+    public boolean tryComplete() {
+        // check for each partition if it still has pending acks
+        produceStatus.forEach((topicIdPartition, status) -> {
+            LOGGER.trace("Checking produce satisfaction for {}, current status 
{}", topicIdPartition, status);
+            // skip those partitions that have already been satisfied
+            if (status.acksPending) {
+                // Delegate to `ReplicaManager#maybeAddDelayedProduce`
+                // Validate Cases A, B, or C
+                Map.Entry<Boolean, Errors> result = 
statusValidator.validate(topicIdPartition.topicPartition(), 
status.requiredOffset);
+
+                // Update the partition status to reflect Case A, B, or C:
+                boolean hasEnough = result.getKey();
+                Errors errors = result.getValue();
+                if (errors != Errors.NONE || hasEnough) {
+                    status.setAcksPending(false);
+                    status.responseStatus.error = errors;
+                }
+            }
+        });
+
+        // check if every partition has satisfied at least one of case A, B or 
C
+        boolean anyPending = false;
+        for (ProducePartitionStatus status : produceStatus.values()) {
+            if (status.acksPending) {
+                anyPending = true;
+                break;
+            }
+        }
+        if (!anyPending) {
+            return forceComplete();
+        }
+
+        return false;
+    }
+
+    @Override
+    public void onExpiration() {
+        produceStatus.forEach((topicIdPartition, status) -> {
+            if (status.acksPending) {
+                LOGGER.debug("Expiring produce request for partition {} with 
status {}", topicIdPartition, status);
+                recordExpiration(topicIdPartition.topicPartition());
+            }
+        });
+    }
+
+    /**
+     * Upon completion, return the current response status along with the 
error code per partition
+     */
+    @Override
+    public void onComplete() {
+        Map<TopicIdPartition, PartitionResponse> responseStatus =
+                produceStatus.entrySet().stream()
+                        .collect(Collectors.toMap(Map.Entry::getKey, entry -> 
entry.getValue().responseStatus()));
+
+        responseCallback.accept(responseStatus);
+    }
+
+    private static void recordExpiration(TopicPartition partition) {
+        AGGREGATE_EXPIRATION_METER.mark();
+        PARTITION_EXPIRATION_METERS.computeIfAbsent(partition,

Review Comment:
   open https://issues.apache.org/jira/browse/KAFKA-20204 to handle the removal 
for the non-existent partitions



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to