heesung-sn commented on code in PR #20948:
URL: https://github.com/apache/pulsar/pull/20948#discussion_r1292187842
##########
pulsar-broker/src/main/java/org/apache/pulsar/broker/service/persistent/MessageDeduplication.java:
##########
@@ -323,26 +323,29 @@ public MessageDupStatus isDuplicate(PublishContext
publishContext, ByteBuf heade
String producerName = publishContext.getProducerName();
long sequenceId = publishContext.getSequenceId();
+ headersAndPayload.markReaderIndex();
+ MessageMetadata msgMetadata =
Commands.parseMessageMetadata(headersAndPayload);
+ headersAndPayload.resetReaderIndex();
long highestSequenceId =
Math.max(publishContext.getHighestSequenceId(), sequenceId);
if (producerName.startsWith(replicatorPrefix)) {
// Message is coming from replication, we need to use the original
producer name and sequence id
// for the purpose of deduplication and not rely on the
"replicator" name.
- int readerIndex = headersAndPayload.readerIndex();
- MessageMetadata md =
Commands.parseMessageMetadata(headersAndPayload);
- producerName = md.getProducerName();
- sequenceId = md.getSequenceId();
- highestSequenceId = Math.max(md.getHighestSequenceId(),
sequenceId);
+ producerName = msgMetadata.getProducerName();
+ sequenceId = msgMetadata.getSequenceId();
+ highestSequenceId = Math.max(msgMetadata.getHighestSequenceId(),
sequenceId);
publishContext.setOriginalProducerName(producerName);
publishContext.setOriginalSequenceId(sequenceId);
publishContext.setOriginalHighestSequenceId(highestSequenceId);
- headersAndPayload.readerIndex(readerIndex);
}
-
+ long chunkID = msgMetadata.hasChunkId() ? msgMetadata.getChunkId() : 0;
// Synchronize the get() and subsequent put() on the map. This would
only be relevant if the producer
// disconnects and re-connects very quickly. At that point the call
can be coming from a different thread
synchronized (highestSequencedPushed) {
Long lastSequenceIdPushed =
highestSequencedPushed.get(producerName);
- if (lastSequenceIdPushed != null && sequenceId <=
lastSequenceIdPushed) {
+ // All chunks of a message use the same message metadata and
sequence ID,
+ // so it's expected for sequenceId == lastSequenceIdPushed when
the chunk ID > 0.
+ if (lastSequenceIdPushed != null && (chunkID > 0 ? sequenceId <
lastSequenceIdPushed
+ : sequenceId <= lastSequenceIdPushed)) {
Review Comment:
What about this case :
M1-c1-s1
M1-c2-s1
M1-c2-s1 // shouldn't we dedup this?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]