mimaison commented on a change in pull request #8730: URL: https://github.com/apache/kafka/pull/8730#discussion_r463097918
########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorConnectorsIntegrationTest.java ########## @@ -367,14 +406,37 @@ public void testOneWayReplicationWithAutorOffsetSync1() throws InterruptedExcept time.sleep(5000); // create a consumer at backup cluster with same consumer group Id to consume old and new topic - consumer = backup.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( - "group.id", "consumer-group-1"), "primary.test-topic-1", "primary.test-topic-2"); + consumer = backup.kafka().createConsumerAndSubscribeTo(consumerProps, "primary.test-topic-1", "primary.test-topic-2"); records = consumer.poll(Duration.ofMillis(500)); // similar reasoning as above, no more records to consume by the same consumer group at backup cluster assertEquals("consumer record size is not zero", 0, records.count()); consumer.close(); + } + + private void produceMessages(EmbeddedConnectCluster cluster, String topicName, int partitions, String msgPrefix) { + for (int i = 0; i < NUM_RECORDS_PRODUCED; i++) { + // produce to all partitions but the last one Review comment: This comment needs updating ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorConnectorsIntegrationTest.java ########## @@ -190,24 +211,19 @@ public void close() { public void testReplication() throws InterruptedException { // create consumers before starting the connectors so we don't need to wait for discovery - Consumer<byte[], byte[]> consumer1 = primary.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( - "group.id", "consumer-group-1"), "test-topic-1", "backup.test-topic-1"); + Consumer<byte[], byte[]> consumer1 = primary.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1", "backup.test-topic-1"); consumer1.poll(Duration.ofMillis(500)); consumer1.commitSync(); consumer1.close(); - Consumer<byte[], byte[]> consumer2 = backup.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( - "group.id", "consumer-group-1"), "test-topic-1", "primary.test-topic-1"); + Consumer<byte[], byte[]> consumer2 = backup.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1", "primary.test-topic-1"); Review comment: Do we still need these 2 blocks? In `setup()` we already consumed all messages ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorConnectorsIntegrationTest.java ########## @@ -128,10 +136,23 @@ public void setup() throws InterruptedException { backup.kafka().createTopic("primary.test-topic-1", 1); backup.kafka().createTopic("heartbeats", 1); - for (int i = 0; i < NUM_RECORDS_PRODUCED; i++) { - primary.kafka().produce("test-topic-1", i % NUM_PARTITIONS, "key", "message-1-" + i); - backup.kafka().produce("test-topic-1", i % NUM_PARTITIONS, "key", "message-2-" + i); - } + // produce to all partitions but the last one Review comment: Would it be better using a separate topic in order to keep a partition without any records? By changing this topic it affects existing checks in all tests ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorConnectorsIntegrationTest.java ########## @@ -128,10 +136,23 @@ public void setup() throws InterruptedException { backup.kafka().createTopic("primary.test-topic-1", 1); backup.kafka().createTopic("heartbeats", 1); - for (int i = 0; i < NUM_RECORDS_PRODUCED; i++) { - primary.kafka().produce("test-topic-1", i % NUM_PARTITIONS, "key", "message-1-" + i); - backup.kafka().produce("test-topic-1", i % NUM_PARTITIONS, "key", "message-2-" + i); - } + // produce to all partitions but the last one + produceMessages(primary, "test-topic-1", NUM_PARTITIONS - 1, "message-1-"); + produceMessages(backup, "test-topic-1", NUM_PARTITIONS - 1, "message-2-"); + + consumerProps = new HashMap<String, Object>() {{ Review comment: As this does not change, I wonder if we could direct initialize `consumerProps` when it's declared ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorConnectorsIntegrationTest.java ########## @@ -244,26 +251,50 @@ public void testReplication() throws InterruptedException { assertTrue("Offsets not translated downstream to backup cluster. Found: " + backupOffsets, backupOffsets.containsKey( new TopicPartition("primary.test-topic-1", 0))); + assertTrue("Offset of empty partition not translated downstream to backup cluster. Found: " + backupOffsets, backupOffsets.containsKey( + new TopicPartition("primary.test-topic-1", NUM_PARTITIONS - 1))); + + // Produce additional messages. + for (int i = 0; i < NUM_RECORDS_PRODUCED; i++) { + // produce to all partitions this time + primary.kafka().produce("test-topic-1", i % NUM_PARTITIONS, "key", "message-2-" + i); + backup.kafka().produce("test-topic-1", i % NUM_PARTITIONS, "key", "message-2-" + i); + } // Failover consumer group to backup cluster. - Consumer<byte[], byte[]> consumer1 = backup.kafka().createConsumer(Collections.singletonMap("group.id", "consumer-group-1")); - consumer1.assign(backupOffsets.keySet()); + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", "consumer-group-1"); + put("auto.offset.reset", "latest"); + }}; + Consumer<byte[], byte[]> consumer1 = backup.kafka().createConsumer(consumerProps); + List<TopicPartition> backupPartitions = IntStream.range(0, NUM_PARTITIONS) + .boxed() + .flatMap(p -> Stream.of(new TopicPartition("test-topic-1", p), new TopicPartition("primary.test-topic-1", p))) + .collect(Collectors.toList()); + consumer1.assign(backupPartitions); backupOffsets.forEach(consumer1::seek); - consumer1.poll(Duration.ofMillis(500)); - consumer1.commitSync(); assertTrue("Consumer failedover to zero offset.", consumer1.position(new TopicPartition("primary.test-topic-1", 0)) > 0); assertTrue("Consumer failedover beyond expected offset.", consumer1.position( - new TopicPartition("primary.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + new TopicPartition("primary.test-topic-1", 0)) <= Math.ceil((float) NUM_RECORDS_PRODUCED / (NUM_PARTITIONS - 1))); + assertEquals("Consumer failedover to non-zero offset on last partition.", 0, + consumer1.position(new TopicPartition("primary.test-topic-1", NUM_PARTITIONS - 1))); assertTrue("Checkpoints were not emitted upstream to primary cluster.", primary.kafka().consume(1, CHECKPOINT_DURATION_MS, "backup.checkpoints.internal").count() > 0); + Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> messages1 = consumeAllMessages(consumer1); + System.out.println(messages1); + for (TopicPartition tp : backupPartitions) { + assertNotNull("No data consumed from partition " + tp + ".", messages1.get(tp)); + int expectedMessageCount = tp.toString().equals("test-topic-1-0") ? 22 : 10; Review comment: I'm actually surprized we only see positions `22` and `10`. Why do we only get `test-topic-1-0` here and not the other 9 partitions? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org