[GitHub] [kafka] vitojeng commented on pull request #8907: MINOR: code cleanup for `VOut` inconsistent naming

2020-07-25 Thread GitBox


vitojeng commented on pull request #8907:
URL: https://github.com/apache/kafka/pull/8907#issuecomment-663928064


   Thanks @abbccdda !



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] jsancio commented on a change in pull request #9050: KAFKA-10193: Add preemption for controller events that have callbacks

2020-07-25 Thread GitBox


jsancio commented on a change in pull request #9050:
URL: https://github.com/apache/kafka/pull/9050#discussion_r460468239



##
File path: core/src/main/scala/kafka/controller/ControllerEventManager.scala
##
@@ -77,7 +77,7 @@ class ControllerEventManager(controllerId: Int,
   private val putLock = new ReentrantLock()
   private val queue = new LinkedBlockingQueue[QueuedEvent]
   // Visible for test
-  private[controller] val thread = new 
ControllerEventThread(ControllerEventThreadName)
+  private[controller] var thread = new 
ControllerEventThread(ControllerEventThreadName)

Review comment:
   This comment applies to `clearAndPut`:
   
   ```scala
 def clearAndPut(event: ControllerEvent): QueuedEvent = inLock(putLock) {
   queue.forEach(_.preempt(processor))
   queue.clear()
   put(event)
 }
   ```
   I think there is a bug here where at most one event will be process twice. 
Once by `_preempt(processor)` and once by `doWork`. I think we can fix this 
concurrency bug if we use `LinkedBlockingQueue::drainTo`. E.g.
   
   ```scala
 def clearAndPut(event: ControllerEvent): QueuedEvent = {
   val preemptedEvents = ...;
   
   inLock(putLock) {
 queue.drainTo(preemptedEvents)
 put(event)
   }
   
   preemtedEvents.forEach(_.preempt(processor))
 }
   ```





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] jsancio commented on a change in pull request #9050: KAFKA-10193: Add preemption for controller events that have callbacks

2020-07-25 Thread GitBox


jsancio commented on a change in pull request #9050:
URL: https://github.com/apache/kafka/pull/9050#discussion_r460468241



##
File path: core/src/main/scala/kafka/controller/KafkaController.scala
##
@@ -1986,101 +1965,125 @@ private[controller] class ControllerStats extends 
KafkaMetricsGroup {
 
 sealed trait ControllerEvent {
   def state: ControllerState
+  def preempt(): Unit

Review comment:
   I would add a documentation comment to this method explaining that this 
method will not be executed by the controller thread but instead it will be 
executed by some other thread.

##
File path: core/src/main/scala/kafka/controller/ControllerEventManager.scala
##
@@ -77,7 +77,7 @@ class ControllerEventManager(controllerId: Int,
   private val putLock = new ReentrantLock()
   private val queue = new LinkedBlockingQueue[QueuedEvent]
   // Visible for test
-  private[controller] val thread = new 
ControllerEventThread(ControllerEventThreadName)
+  private[controller] var thread = new 
ControllerEventThread(ControllerEventThreadName)

Review comment:
   This comment applies to `clearAndPut`:
   
   ```scala
 def clearAndPut(event: ControllerEvent): QueuedEvent = inLock(putLock) {
   queue.forEach(_.preempt(processor))
   queue.clear()
   put(event)
 }
   ```
   I think there is a bug here where at most one event will be process twice. 
Once by `_preempt(processor)` and once by `doWork`. I think we can fix this 
concurrency bug if we use `LinkedBlockingQueue::drainTo`. E.g.
   
   ```scala
 def clearAndPut(event: ControllerEvent): QueuedEvent = {
   val events = ...;
   
   inLock(putLock) {
 queue.drainTo(events)
 put(event)
   }
   
   event.forEach(_.preempt(processor))
 }
   ```

##
File path: 
core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala
##
@@ -598,6 +603,86 @@ class ControllerIntegrationTest extends 
ZooKeeperTestHarness {
 }, "Broker fail to initialize after restart")
   }
 
+  @Test
+  def testPreemptionOnControllerShutdown(): Unit = {
+servers = makeServers(1, enableControlledShutdown = false)
+val controller = getController().kafkaController
+val count = new AtomicInteger(2)
+val latch = new CountDownLatch(1)
+val spyThread = spy(controller.eventManager.thread)
+controller.eventManager.setControllerEventThread(spyThread)
+val processedEvent = new MockEvent(ControllerState.TopicChange) {
+  override def process(): Unit = latch.await()
+  override def preempt(): Unit = {}
+}
+val preemptedEvent = new MockEvent(ControllerState.TopicChange) {
+  override def process(): Unit = {}
+  override def preempt(): Unit = count.decrementAndGet()

Review comment:
   This method should be executed by the thread that is running this test. 
If you agree, no need to use an `AtomicInteger`.

##
File path: 
core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala
##
@@ -598,6 +603,86 @@ class ControllerIntegrationTest extends 
ZooKeeperTestHarness {
 }, "Broker fail to initialize after restart")
   }
 
+  @Test
+  def testPreemptionOnControllerShutdown(): Unit = {
+servers = makeServers(1, enableControlledShutdown = false)
+val controller = getController().kafkaController
+val count = new AtomicInteger(2)
+val latch = new CountDownLatch(1)
+val spyThread = spy(controller.eventManager.thread)
+controller.eventManager.setControllerEventThread(spyThread)
+val processedEvent = new MockEvent(ControllerState.TopicChange) {
+  override def process(): Unit = latch.await()
+  override def preempt(): Unit = {}
+}
+val preemptedEvent = new MockEvent(ControllerState.TopicChange) {
+  override def process(): Unit = {}
+  override def preempt(): Unit = count.decrementAndGet()
+}
+
+controller.eventManager.put(processedEvent)
+controller.eventManager.put(preemptedEvent)
+controller.eventManager.put(preemptedEvent)
+
+doAnswer((_: InvocationOnMock) => {
+  latch.countDown()
+}).doCallRealMethod().when(spyThread).awaitShutdown()

Review comment:
   The important part is that we want to do `latch.countDown` after 
`initiateShutdown` has been called so that the controller thread doesn't pick 
up a new event because `isRunning` is `false`.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] mjsax commented on pull request #9066: KAFKA-10287: Skip unknown offsets when computing sum of changelog offsets

2020-07-25 Thread GitBox


mjsax commented on pull request #9066:
URL: https://github.com/apache/kafka/pull/9066#issuecomment-663926510


   Ah yes. I work for all committers -- it's not project specific. Missed that 
you are HBase Committer/PMC :) 



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] jsancio commented on a change in pull request #9050: KAFKA-10193: Add preemption for controller events that have callbacks

2020-07-25 Thread GitBox


jsancio commented on a change in pull request #9050:
URL: https://github.com/apache/kafka/pull/9050#discussion_r460467261



##
File path: 
core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala
##
@@ -598,6 +603,86 @@ class ControllerIntegrationTest extends 
ZooKeeperTestHarness {
 }, "Broker fail to initialize after restart")
   }
 
+  @Test
+  def testPreemptionOnControllerShutdown(): Unit = {
+servers = makeServers(1, enableControlledShutdown = false)
+val controller = getController().kafkaController
+val count = new AtomicInteger(2)
+val latch = new CountDownLatch(1)
+val spyThread = spy(controller.eventManager.thread)
+controller.eventManager.setControllerEventThread(spyThread)
+val processedEvent = new MockEvent(ControllerState.TopicChange) {
+  override def process(): Unit = latch.await()
+  override def preempt(): Unit = {}
+}
+val preemptedEvent = new MockEvent(ControllerState.TopicChange) {
+  override def process(): Unit = {}
+  override def preempt(): Unit = count.decrementAndGet()
+}
+
+controller.eventManager.put(processedEvent)
+controller.eventManager.put(preemptedEvent)
+controller.eventManager.put(preemptedEvent)
+
+doAnswer((_: InvocationOnMock) => {
+  latch.countDown()
+}).doCallRealMethod().when(spyThread).awaitShutdown()

Review comment:
   Got it. Sounds good.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[jira] [Commented] (KAFKA-10134) High CPU issue during rebalance in Kafka consumer after upgrading to 2.5

2020-07-25 Thread Ismael Juma (Jira)


[ 
https://issues.apache.org/jira/browse/KAFKA-10134?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17165133#comment-17165133
 ] 

Ismael Juma commented on KAFKA-10134:
-

[~zhowei] So you're saying you see the issue when restarting the consumers, not 
when restarting the brokers?

> High CPU issue during rebalance in Kafka consumer after upgrading to 2.5
> 
>
> Key: KAFKA-10134
> URL: https://issues.apache.org/jira/browse/KAFKA-10134
> Project: Kafka
>  Issue Type: Bug
>  Components: clients
>Affects Versions: 2.5.0
>Reporter: Sean Guo
>Assignee: Guozhang Wang
>Priority: Blocker
> Fix For: 2.6.0, 2.5.1
>
> Attachments: consumer5.log.2020-07-22.log
>
>
> We want to utilize the new rebalance protocol to mitigate the stop-the-world 
> effect during the rebalance as our tasks are long running task.
> But after the upgrade when we try to kill an instance to let rebalance happen 
> when there is some load(some are long running tasks >30S) there, the CPU will 
> go sky-high. It reads ~700% in our metrics so there should be several threads 
> are in a tight loop. We have several consumer threads consuming from 
> different partitions during the rebalance. This is reproducible in both the 
> new CooperativeStickyAssignor and old eager rebalance rebalance protocol. The 
> difference is that with old eager rebalance rebalance protocol used the high 
> CPU usage will dropped after the rebalance done. But when using cooperative 
> one, it seems the consumers threads are stuck on something and couldn't 
> finish the rebalance so the high CPU usage won't drop until we stopped our 
> load. Also a small load without long running task also won't cause continuous 
> high CPU usage as the rebalance can finish in that case.
>  
> "executor.kafka-consumer-executor-4" #124 daemon prio=5 os_prio=0 
> cpu=76853.07ms elapsed=841.16s tid=0x7fe11f044000 nid=0x1f4 runnable  
> [0x7fe119aab000]"executor.kafka-consumer-executor-4" #124 daemon prio=5 
> os_prio=0 cpu=76853.07ms elapsed=841.16s tid=0x7fe11f044000 nid=0x1f4 
> runnable  [0x7fe119aab000]   java.lang.Thread.State: RUNNABLE at 
> org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.poll(ConsumerCoordinator.java:467)
>  at 
> org.apache.kafka.clients.consumer.KafkaConsumer.updateAssignmentMetadataIfNeeded(KafkaConsumer.java:1275)
>  at 
> org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:1241) 
> at 
> org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:1216) 
> at
>  
> By debugging into the code we found it looks like the clients are  in a loop 
> on finding the coordinator.
> I also tried the old rebalance protocol for the new version the issue still 
> exists but the CPU will be back to normal when the rebalance is done.
> Also tried the same on the 2.4.1 which seems don't have this issue. So it 
> seems related something changed between 2.4.1 and 2.5.0.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[GitHub] [kafka] ijuma opened a new pull request #9080: MINOR: Recommend Java 11

2020-07-25 Thread GitBox


ijuma opened a new pull request #9080:
URL: https://github.com/apache/kafka/pull/9080


   Java 11 has been recommended for a while, but the ops section had not been 
updated.
   Also tweak the text a bit to read better.
   
   ### Committer Checklist (excluded from commit message)
   - [ ] Verify design and implementation 
   - [ ] Verify test coverage and CI build status
   - [ ] Verify documentation (including upgrade notes)
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on pull request #9079: KAFKA-10308 fix flaky core/round_trip_fault_test.py

2020-07-25 Thread GitBox


ijuma commented on pull request #9079:
URL: https://github.com/apache/kafka/pull/9079#issuecomment-663913254


   retest this please



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on pull request #9022: KAFKA-10158: Improve test behavior and resolve flakiness of test...

2020-07-25 Thread GitBox


ijuma commented on pull request #9022:
URL: https://github.com/apache/kafka/pull/9022#issuecomment-663913281


   https://builds.apache.org/job/kafka-pr-jdk11-scala2.13/



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma removed a comment on pull request #9022: KAFKA-10158: Improve test behavior and resolve flakiness of test...

2020-07-25 Thread GitBox


ijuma removed a comment on pull request #9022:
URL: https://github.com/apache/kafka/pull/9022#issuecomment-663913281


   https://builds.apache.org/job/kafka-pr-jdk11-scala2.13/



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on a change in pull request #9063: MINOR: Fixed deprecated Gradle build Properties.

2020-07-25 Thread GitBox


ijuma commented on a change in pull request #9063:
URL: https://github.com/apache/kafka/pull/9063#discussion_r460453530



##
File path: build.gradle
##
@@ -304,7 +304,7 @@ subprojects {
   }
 
   test {
-maxParallelForks = userMaxForks ?: Runtime.runtime.availableProcessors()
+maxParallelForks = userMaxForks ?: Runtime.runtime.availableProcessors() 
as int

Review comment:
   What is the reason for the various `as` additions?





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on pull request #9063: MINOR: Fixed deprecated Gradle build Properties.

2020-07-25 Thread GitBox


ijuma commented on pull request #9063:
URL: https://github.com/apache/kafka/pull/9063#issuecomment-663913074


   ok to test



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on a change in pull request #9049: MINOR: fix scala warnings

2020-07-25 Thread GitBox


ijuma commented on a change in pull request #9049:
URL: https://github.com/apache/kafka/pull/9049#discussion_r460453389



##
File path: core/src/main/scala/kafka/security/authorizer/AclAuthorizer.scala
##
@@ -384,8 +380,8 @@ class AclAuthorizer extends Authorizer with Logging {
 
 val prefixed = new ArrayBuffer[AclEntry]
 aclCacheSnapshot
-  .from(new ResourcePattern(resourceType, resourceName, 
PatternType.PREFIXED))
-  .to(new ResourcePattern(resourceType, resourceName.take(1), 
PatternType.PREFIXED))
+  .rangeFrom(new ResourcePattern(resourceType, resourceName, 
PatternType.PREFIXED))
+  .rangeTo(new ResourcePattern(resourceType, resourceName.take(1), 
PatternType.PREFIXED))

Review comment:
   Scala 2.12 doesn't have these methods.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on a change in pull request #9049: MINOR: fix scala warnings

2020-07-25 Thread GitBox


ijuma commented on a change in pull request #9049:
URL: https://github.com/apache/kafka/pull/9049#discussion_r460453337



##
File path: core/src/main/scala/kafka/admin/ConfigCommand.scala
##
@@ -344,11 +342,14 @@ object ConfigCommand extends Config {
 val sensitiveEntries = newEntries.filter(_._2.value == null)
 if (sensitiveEntries.nonEmpty)
   throw new InvalidConfigurationException(s"All sensitive broker 
config entries must be specified for --alter, missing entries: 
${sensitiveEntries.keySet}")
-val newConfig = new JConfig(newEntries.asJava.values)
+
+val alterLogLevelEntries = (newEntries.values.map(new AlterConfigOp(_, 
AlterConfigOp.OpType.SET))
+  ++ configsToBeDeleted.map { k => new AlterConfigOp(new 
ConfigEntry(k, ""), AlterConfigOp.OpType.DELETE) }
+  ).asJavaCollection
 
 val configResource = new ConfigResource(ConfigResource.Type.BROKER, 
entityNameHead)
 val alterOptions = new 
AlterConfigsOptions().timeoutMs(3).validateOnly(false)
-adminClient.alterConfigs(Map(configResource -> newConfig).asJava, 
alterOptions).all().get(60, TimeUnit.SECONDS)
+adminClient.incrementalAlterConfigs(Map(configResource -> 
alterLogLevelEntries).asJava, alterOptions).all().get(60, TimeUnit.SECONDS)

Review comment:
   Some broker versions don't support incremental alter configs, so you 
would be breaking compatibility by making this change.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 commented on pull request #9066: KAFKA-10287: Skip unknown offsets when computing sum of changelog offsets

2020-07-25 Thread GitBox


chia7712 commented on pull request #9066:
URL: https://github.com/apache/kafka/pull/9066#issuecomment-663912465


   Jenkins is running now :(
   
   Maybe it does not exclude the committers from other Apache projects. At any 
rate, I will not trigger Jenkins in the future.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 commented on pull request #9066: KAFKA-10287: Skip unknown offsets when computing sum of changelog offsets

2020-07-25 Thread GitBox


chia7712 commented on pull request #9066:
URL: https://github.com/apache/kafka/pull/9066#issuecomment-663912187


   > Only committers can trigger Jenkins builds.
   
   It seems I can trigger the jenkins by comment “ Test this please”. Let me 
test it again.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] mjsax commented on a change in pull request #9047: KAFKA-9274: Remove `retries` for global task

2020-07-25 Thread GitBox


mjsax commented on a change in pull request #9047:
URL: https://github.com/apache/kafka/pull/9047#discussion_r460449901



##
File path: 
streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImpl.java
##
@@ -275,31 +259,70 @@ private void restoreState(final StateRestoreCallback 
stateRestoreCallback,
   final RecordConverter recordConverter) {
 for (final TopicPartition topicPartition : topicPartitions) {
 globalConsumer.assign(Collections.singletonList(topicPartition));
+long offset;
 final Long checkpoint = checkpointFileCache.get(topicPartition);
 if (checkpoint != null) {
 globalConsumer.seek(topicPartition, checkpoint);
+offset = checkpoint;
 } else {
 
globalConsumer.seekToBeginning(Collections.singletonList(topicPartition));
+final AtomicLong position = new AtomicLong();
+retryUntilSuccessOrThrowOnTaskTimeout(
+() -> 
position.set(globalConsumer.position(topicPartition)),
+String.format(
+"Failed to get position for partition %s. The broker 
may be transiently unavailable at the moment.",
+topicPartition
+)
+);
+offset = position.get();
 }
 
-long offset = globalConsumer.position(topicPartition);
 final Long highWatermark = highWatermarks.get(topicPartition);
 final RecordBatchingStateRestoreCallback stateRestoreAdapter =
 StateRestoreCallbackAdapter.adapt(stateRestoreCallback);
 
 stateRestoreListener.onRestoreStart(topicPartition, storeName, 
offset, highWatermark);
 long restoreCount = 0L;
 
+long deadlineMs = NO_DEADLINE;
 while (offset < highWatermark) {
 try {
 final ConsumerRecords records = 
globalConsumer.poll(pollTime);
+if (records.isEmpty()) {
+if (taskTimeoutMs == 0L) {
+deadlineMs = maybeUpdateDeadlineOrThrow(

Review comment:
   I agree. The "issue" is really that `poll()` does _not_ throw a 
`TimeoutException`... Also, because we do manual assignment, `poll()` would 
never return "early" as it never need to wait for joining a consumer group. -- 
However, compare to `max.task.idle.ms`, we are in a better situation here, 
because we poll() for only a single partition at a time.
   
   I also agree, that applying `task.timeout.ms` should start _after_ we got a 
first timeout -- this was how the original code worked that you criticized as:
   > Man, this is confusing.
   
   And I agree, that the code was not straightforward to understand. But if we 
think it's the right thing to do, I am also happy to add it back :)
   
   I am also not an expert on all consumer internals, but from my 
understanding, fetch requests are send async in general, and if a fetch request 
fails, the consumer would actually not retry it but a retry would be triggered 
by the next `poll()` call. If there is no data available (ie, fetch request did 
not return yet) when `poll()` is called, the consumer would block internally 
until `poll(Duration)` timeout expires or until a fetch request returns 
(whatever comes first).
   
   Furthermore, before `poll()` returns, it always check if a fetch request is 
in-flight or not, and sends one if not.
   
   Thus, on the verify first call to `poll()` we know that no fetch request can 
be in-flight and we also know that `poll()` would send one, and block until it 
returns or `poll(Duration)` expired. Thus, if `poll()` does not block for at 
least `request.timeout.ms`, and we get empty back we don't know which case 
holds, however, if we use the request timeout, it seems that we _know_ if the 
fetch was successful or did time out? We also know, that a fetch request will 
be inflight after `poll()` returns. Thus, for any consecutive `poll()` applying 
request timeout also ensures that we know if the request was successful or not.
   
   I guess the only difference to what I just described to my original code 
was, that I uses `pollTime + requestTimeout`.
   
   Bottom line: I am not 100% sure what you propose? Should we go with the 
original design? Or with the new design? -- In the end, I think we don't need a 
follow up PR, and we can just try to get it right in this PR. I don't see any 
benefit in splitting it up into 2 PRs (because, as mentioned above, we fetch 
for a single partitions and thus it's a different case compared to 
`max.task.idle.ms` scenario).





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For 

[GitHub] [kafka] mjsax commented on pull request #9066: KAFKA-10287: Skip unknown offsets when computing sum of changelog offsets

2020-07-25 Thread GitBox


mjsax commented on pull request #9066:
URL: https://github.com/apache/kafka/pull/9066#issuecomment-663906866


   Java 8 passed the tests in question are know to be flaky. Might be good 
enough? Leave it to @vvcephei to make a call and merge.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] mjsax commented on pull request #9066: KAFKA-10287: Skip unknown offsets when computing sum of changelog offsets

2020-07-25 Thread GitBox


mjsax commented on pull request #9066:
URL: https://github.com/apache/kafka/pull/9066#issuecomment-663906774


   @chia7712 Only committers can trigger Jenkins builds.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] mjsax commented on pull request #9075: KAFKA-10306: GlobalThread should fail on InvalidOffsetException

2020-07-25 Thread GitBox


mjsax commented on pull request #9075:
URL: https://github.com/apache/kafka/pull/9075#issuecomment-663905333


   Retest this please.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[jira] [Commented] (KAFKA-10255) Fix flaky testOneWayReplicationWithAutorOffsetSync1 test

2020-07-25 Thread Matthias J. Sax (Jira)


[ 
https://issues.apache.org/jira/browse/KAFKA-10255?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17165120#comment-17165120
 ] 

Matthias J. Sax commented on KAFKA-10255:
-

[https://builds.apache.org/job/kafka-pr-jdk14-scala2.13/1680/console]

> Fix flaky testOneWayReplicationWithAutorOffsetSync1 test
> 
>
> Key: KAFKA-10255
> URL: https://issues.apache.org/jira/browse/KAFKA-10255
> Project: Kafka
>  Issue Type: Test
>Reporter: Luke Chen
>Assignee: Luke Chen
>Priority: Major
>
> https://builds.apache.org/blue/rest/organizations/jenkins/pipelines/kafka-trunk-jdk14/runs/279/log/?start=0
> org.apache.kafka.connect.mirror.MirrorConnectorsIntegrationTest > 
> testOneWayReplicationWithAutorOffsetSync1 STARTED
> org.apache.kafka.connect.mirror.MirrorConnectorsIntegrationTest.testOneWayReplicationWithAutorOffsetSync1
>  failed, log available in 
> /home/jenkins/jenkins-slave/workspace/kafka-trunk-jdk14/connect/mirror/build/reports/testOutput/org.apache.kafka.connect.mirror.MirrorConnectorsIntegrationTest.testOneWayReplicationWithAutorOffsetSync1.test.stdout
> org.apache.kafka.connect.mirror.MirrorConnectorsIntegrationTest > 
> testOneWayReplicationWithAutorOffsetSync1 FAILED
>  java.lang.AssertionError: consumer record size is not zero expected:<0> but 
> was:<2>
>  at org.junit.Assert.fail(Assert.java:89)
>  at org.junit.Assert.failNotEquals(Assert.java:835)
>  at org.junit.Assert.assertEquals(Assert.java:647)
>  at 
> org.apache.kafka.connect.mirror.MirrorConnectorsIntegrationTest.testOneWayReplicationWithAutorOffsetSync1(MirrorConnectorsIntegrationTest.java:349)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[GitHub] [kafka] mjsax commented on pull request #9075: KAFKA-10306: GlobalThread should fail on InvalidOffsetException

2020-07-25 Thread GitBox


mjsax commented on pull request #9075:
URL: https://github.com/apache/kafka/pull/9075#issuecomment-663905239


   Retest this please.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 closed pull request #5708: Minor: remove WorkerCoordinatorMetrics and instantiate the metrics in the constructor of WorkerCoordinator

2020-07-25 Thread GitBox


chia7712 closed pull request #5708:
URL: https://github.com/apache/kafka/pull/5708


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 closed pull request #8137: KAFKA-8967 Flaky test kafka.api.SaslSslAdminIntegrationTest.testCreat…

2020-07-25 Thread GitBox


chia7712 closed pull request #8137:
URL: https://github.com/apache/kafka/pull/8137


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma removed a comment on pull request #9022: KAFKA-10158: Improve test behavior and resolve flakiness of test...

2020-07-25 Thread GitBox


ijuma removed a comment on pull request #9022:
URL: https://github.com/apache/kafka/pull/9022#issuecomment-663871604


   https://builds.apache.org/job/kafka-pr-jdk11-scala2.13/



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on pull request #9022: KAFKA-10158: Improve test behavior and resolve flakiness of test...

2020-07-25 Thread GitBox


ijuma commented on pull request #9022:
URL: https://github.com/apache/kafka/pull/9022#issuecomment-663871642


   retest this please



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on pull request #9022: KAFKA-10158: Improve test behavior and resolve flakiness of test...

2020-07-25 Thread GitBox


ijuma commented on pull request #9022:
URL: https://github.com/apache/kafka/pull/9022#issuecomment-663871604


   https://builds.apache.org/job/kafka-pr-jdk11-scala2.13/



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on pull request #9022: KAFKA-10158: Improve test behavior and resolve flakiness of test...

2020-07-25 Thread GitBox


ijuma commented on pull request #9022:
URL: https://github.com/apache/kafka/pull/9022#issuecomment-663871416


   ok to test



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 opened a new pull request #9079: KAFKA-10308 fix flaky core/round_trip_fault_test.py

2020-07-25 Thread GitBox


chia7712 opened a new pull request #9079:
URL: https://github.com/apache/kafka/pull/9079


   Creating a topic may fail (due to timeout) in running system tests. However, 
```RoundTripWorker``` does not ignore ```TopicExistsException``` which makes 
```round_trip_fault_test.py``` be a flaky one.
   
   ### Committer Checklist (excluded from commit message)
   - [ ] Verify design and implementation 
   - [ ] Verify test coverage and CI build status
   - [ ] Verify documentation (including upgrade notes)
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 commented on pull request #9066: KAFKA-10287: Skip unknown offsets when computing sum of changelog offsets

2020-07-25 Thread GitBox


chia7712 commented on pull request #9066:
URL: https://github.com/apache/kafka/pull/9066#issuecomment-663870629







This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] abbccdda merged pull request #8907: MINOR: code cleanup for `VOut` inconsistent naming

2020-07-25 Thread GitBox


abbccdda merged pull request #8907:
URL: https://github.com/apache/kafka/pull/8907


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] abbccdda commented on pull request #8907: MINOR: code cleanup for `VOut` inconsistent naming

2020-07-25 Thread GitBox


abbccdda commented on pull request #8907:
URL: https://github.com/apache/kafka/pull/8907#issuecomment-663870524


   Test failure non related.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] abbccdda merged pull request #9034: KAFKA-10246 : AbstractProcessorContext topic() throws NPE

2020-07-25 Thread GitBox


abbccdda merged pull request #9034:
URL: https://github.com/apache/kafka/pull/9034


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] abbccdda commented on pull request #9052: MINOR: TopologyTestDriver should not require dummy parameters

2020-07-25 Thread GitBox


abbccdda commented on pull request #9052:
URL: https://github.com/apache/kafka/pull/9052#issuecomment-663869941


   retest this please



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] ijuma commented on a change in pull request #9022: KAFKA-10158: Improve test behavior and resolve flakiness of test...

2020-07-25 Thread GitBox


ijuma commented on a change in pull request #9022:
URL: https://github.com/apache/kafka/pull/9022#discussion_r460416797



##
File path: 
core/src/test/scala/unit/kafka/admin/TopicCommandWithAdminClientTest.scala
##
@@ -673,10 +678,14 @@ class TopicCommandWithAdminClientTest extends 
KafkaServerTestHarness with Loggin
   Collections.singletonList(new NewTopic(testTopicName, partitions, 
replicationFactor).configs(configMap))).all().get()
 waitForTopicCreated(testTopicName)
 TestUtils.generateAndProduceMessages(servers, testTopicName, numMessages = 
10, acks = -1)
+Thread.sleep(10)

Review comment:
   We can call `flush` on the producer to force it to send the messages.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] cadonna edited a comment on pull request #9066: KAFKA-10287: Skip unknown offsets when computing sum of changelog offsets

2020-07-25 Thread GitBox


cadonna edited a comment on pull request #9066:
URL: https://github.com/apache/kafka/pull/9066#issuecomment-663867610


   Java 11 failed with
   
   ```
   08:16:38 org.apache.kafka.streams.integration.EosBetaUpgradeIntegrationTest 
> shouldUpgradeFromEosAlphaToEosBeta[true] FAILED
   08:16:38 java.lang.AssertionError: Did not receive all 20 records from 
topic multiPartitionOutputTopic within 6 ms
   08:16:38 Expected: is a value equal to or greater than <20>
   08:16:38  but: <0> was less than <20>
   ```
   
   Java 14 failed with 
   
   ```
   
   06:37:03 kafka.network.SocketServerTest > testIdleConnection FAILED
   06:37:03 org.scalatest.exceptions.TestFailedException: Failed to close 
idle channel
   
   ```
   
   Java 8 failed with
   
   ```
   
org.apache.kafka.streams.integration.EosBetaUpgradeIntegrationTest.shouldUpgradeFromEosAlphaToEosBeta[true]
   
   java.lang.AssertionError: Did not receive all 168 records from topic 
multiPartitionOutputTopic within 6 ms
   Expected: is a value equal to or greater than <168>
but: <167> was less than <168>
   ```
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] cadonna commented on pull request #9066: KAFKA-10287: Skip unknown offsets when computing sum of changelog offsets

2020-07-25 Thread GitBox


cadonna commented on pull request #9066:
URL: https://github.com/apache/kafka/pull/9066#issuecomment-663867610


   Java 11 failed with
   
   ```
   08:16:38 org.apache.kafka.streams.integration.EosBetaUpgradeIntegrationTest 
> shouldUpgradeFromEosAlphaToEosBeta[true] FAILED
   08:16:38 java.lang.AssertionError: Did not receive all 20 records from 
topic multiPartitionOutputTopic within 6 ms
   08:16:38 Expected: is a value equal to or greater than <20>
   08:16:38  but: <0> was less than <20>
   ```
   
   Java 14 failed with 
   
   ```
   
   06:37:03 kafka.network.SocketServerTest > testIdleConnection FAILED
   06:37:03 org.scalatest.exceptions.TestFailedException: Failed to close 
idle channel
   
   ```
   
   Java 8 failed with
   
   ```
   java.lang.AssertionError: Did not receive all 168 records from topic 
multiPartitionOutputTopic within 6 ms
   Expected: is a value equal to or greater than <168>
but: <167> was less than <168>
   ```
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[jira] [Comment Edited] (KAFKA-8733) Offline partitions occur when leader's disk is slow in reads while responding to follower fetch requests.

2020-07-25 Thread Ming Liu (Jira)


[ 
https://issues.apache.org/jira/browse/KAFKA-8733?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17098264#comment-17098264
 ] 

Ming Liu edited comment on KAFKA-8733 at 7/25/20, 3:03 PM:
---

We (at Twitter) also saw this issue almost every month. Given it is 
availability loss, we have to react very fast and set unclean leader election. 

First,  is it possible to add a metrics monitor such exact scenario before it 
happens?

One observation, given we are using min-ISR=2, is that the last follower 
falling out of ISR should have the same HW as the leader before it is going 
offline. So when you set the unclean leader election, there is 50% chance you 
incur a data loss(if the election doesn't select that replica).

 

 


was (Author: mingaliu):
We (at Twitter) also saw this issue almost every month and it is annoying. 
Given it is availability loss, we have to react very fast and set unclean 
leader election. 

Given when this happens, we should also starts the disk/host swap operation. It 
seems the low level system metrics or fetch latency metrics are good metrics to 
monitor such scenario. We should add a metrics monitor such exact scenario?

Also another observation, given we are using min-ISR=2. So the last replica 
kicked out of ISR should have the same HW as the eventual offline leader. So 
when you set the unclean leader election, there is 50% chance you incur a data 
loss(if the election doesn't select that replica).

 

 

> Offline partitions occur when leader's disk is slow in reads while responding 
> to follower fetch requests.
> -
>
> Key: KAFKA-8733
> URL: https://issues.apache.org/jira/browse/KAFKA-8733
> Project: Kafka
>  Issue Type: Bug
>  Components: core
>Affects Versions: 1.1.2, 2.4.0
>Reporter: Satish Duggana
>Assignee: Satish Duggana
>Priority: Critical
> Attachments: weighted-io-time-2.png, wio-time.png
>
>
> We found offline partitions issue multiple times on some of the hosts in our 
> clusters. After going through the broker logs and hosts’s disk stats, it 
> looks like this issue occurs whenever the read/write operations take more 
> time on that disk. In a particular case where read time is more than the 
> replica.lag.time.max.ms, follower replicas will be out of sync as their 
> earlier fetch requests are stuck while reading the local log and their fetch 
> status is not yet updated as mentioned in the below code of `ReplicaManager`. 
> If there is an issue in reading the data from the log for a duration more 
> than replica.lag.time.max.ms then all the replicas will be out of sync and 
> partition becomes offline if min.isr.replicas > 1 and unclean.leader.election 
> is false.
>  
> {code:java}
> def readFromLog(): Seq[(TopicPartition, LogReadResult)] = {
>   val result = readFromLocalLog( // this call took more than 
> `replica.lag.time.max.ms`
>   replicaId = replicaId,
>   fetchOnlyFromLeader = fetchOnlyFromLeader,
>   readOnlyCommitted = fetchOnlyCommitted,
>   fetchMaxBytes = fetchMaxBytes,
>   hardMaxBytesLimit = hardMaxBytesLimit,
>   readPartitionInfo = fetchInfos,
>   quota = quota,
>   isolationLevel = isolationLevel)
>   if (isFromFollower) updateFollowerLogReadResults(replicaId, result). // 
> fetch time gets updated here, but mayBeShrinkIsr should have been already 
> called and the replica is removed from isr
>  else result
>  }
> val logReadResults = readFromLog()
> {code}
> Attached the graphs of disk weighted io time stats when this issue occurred.
> I will raise [KIP-501|https://s.apache.org/jhbpn] describing options on how 
> to handle this scenario.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[GitHub] [kafka] vvcephei commented on a change in pull request #9047: KAFKA-9274: Remove `retries` for global task

2020-07-25 Thread GitBox


vvcephei commented on a change in pull request #9047:
URL: https://github.com/apache/kafka/pull/9047#discussion_r460404740



##
File path: 
streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImpl.java
##
@@ -275,31 +259,70 @@ private void restoreState(final StateRestoreCallback 
stateRestoreCallback,
   final RecordConverter recordConverter) {
 for (final TopicPartition topicPartition : topicPartitions) {
 globalConsumer.assign(Collections.singletonList(topicPartition));
+long offset;
 final Long checkpoint = checkpointFileCache.get(topicPartition);
 if (checkpoint != null) {
 globalConsumer.seek(topicPartition, checkpoint);
+offset = checkpoint;
 } else {
 
globalConsumer.seekToBeginning(Collections.singletonList(topicPartition));
+final AtomicLong position = new AtomicLong();
+retryUntilSuccessOrThrowOnTaskTimeout(
+() -> 
position.set(globalConsumer.position(topicPartition)),
+String.format(
+"Failed to get position for partition %s. The broker 
may be transiently unavailable at the moment.",
+topicPartition
+)
+);
+offset = position.get();
 }
 
-long offset = globalConsumer.position(topicPartition);
 final Long highWatermark = highWatermarks.get(topicPartition);
 final RecordBatchingStateRestoreCallback stateRestoreAdapter =
 StateRestoreCallbackAdapter.adapt(stateRestoreCallback);
 
 stateRestoreListener.onRestoreStart(topicPartition, storeName, 
offset, highWatermark);
 long restoreCount = 0L;
 
+long deadlineMs = NO_DEADLINE;
 while (offset < highWatermark) {
 try {
 final ConsumerRecords records = 
globalConsumer.poll(pollTime);
+if (records.isEmpty()) {
+if (taskTimeoutMs == 0L) {
+deadlineMs = maybeUpdateDeadlineOrThrow(

Review comment:
   Thanks, @mjsax , that's fair.
   
   I promise I'm not equivocating here; I'm just trying to figure out what my 
intuition is trying to tell me.
   
   It seems like maybe the fundamental problem here is that we can't 
distinguish among a successful poll that returns no data, a failure to poll, 
and a pending async fetch request. The one thing we know is that the end offset 
is beyond our current position, so there _should_ be data to poll, so we can 
assume that an empty return means either that the fetch failed internally or it 
hasn't completed yet.
   
   Stepping back, this seems to be related to the problem of task idling, in 
which it's pointless to "idle" for a time so short that we have no chance to 
actually get a response back from the broker.
   
   I feel like this is substantially my fault from #4855 / KIP-266. The purpose 
of making this API completely async was to avoid harming liveness in situations 
where we might have a relatively strict deadline. But that's not the case here.
   
   I guess the "poor man's" solution we're going for here is to block poll for 
at least long enough to allow for a complete fetch round-trip from the broker. 
If we know that there was a round-trip, and we didn't get any data, then we can 
conclude that there was an error (since we know there is data to get). Since we 
can't know that there was a round trip, we weaken the condition to: if we know 
it's been long enough that there should have been a round-trip and we don't get 
data, we conclude there was probably an error.
   
   In your KIP, we specified we would start the task timer _after_ the first 
error, so it seems like we really want to just block the poll for the 
round-trip time, and then apply your "update deadline, etc." function. I'm with 
you now that to get the round-trip time, we have to extract some config(s) from 
the Consumer. This is a pretty awkward hack, but now that I've thought it 
through, it seems the best we can do. Maybe we can mull it over and file an 
improvement jira for the Consumer to improve use cases like this.
   
   Anyway, it seems like the "poll time" config is irrelevant, we just need to 
know what config to grab that corresponds to completing a fetch request with 
high probability. It seems like we shouldn't need to update metadata, so we 
would send a fetch request on the first poll call, and we just need to block 
for whatever time bounds the fetch response time.
   
   I'm honestly not sure what timeout would be best here. It looks like the 
ConsumerNetworkClient will just wait for a response until it gets a 
"disconnect" (L598). Is that a socket timeout? I'm not sure.






[GitHub] [kafka] vvcephei commented on a change in pull request #9047: KAFKA-9274: Remove `retries` for global task

2020-07-25 Thread GitBox


vvcephei commented on a change in pull request #9047:
URL: https://github.com/apache/kafka/pull/9047#discussion_r460404740



##
File path: 
streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImpl.java
##
@@ -275,31 +259,70 @@ private void restoreState(final StateRestoreCallback 
stateRestoreCallback,
   final RecordConverter recordConverter) {
 for (final TopicPartition topicPartition : topicPartitions) {
 globalConsumer.assign(Collections.singletonList(topicPartition));
+long offset;
 final Long checkpoint = checkpointFileCache.get(topicPartition);
 if (checkpoint != null) {
 globalConsumer.seek(topicPartition, checkpoint);
+offset = checkpoint;
 } else {
 
globalConsumer.seekToBeginning(Collections.singletonList(topicPartition));
+final AtomicLong position = new AtomicLong();
+retryUntilSuccessOrThrowOnTaskTimeout(
+() -> 
position.set(globalConsumer.position(topicPartition)),
+String.format(
+"Failed to get position for partition %s. The broker 
may be transiently unavailable at the moment.",
+topicPartition
+)
+);
+offset = position.get();
 }
 
-long offset = globalConsumer.position(topicPartition);
 final Long highWatermark = highWatermarks.get(topicPartition);
 final RecordBatchingStateRestoreCallback stateRestoreAdapter =
 StateRestoreCallbackAdapter.adapt(stateRestoreCallback);
 
 stateRestoreListener.onRestoreStart(topicPartition, storeName, 
offset, highWatermark);
 long restoreCount = 0L;
 
+long deadlineMs = NO_DEADLINE;
 while (offset < highWatermark) {
 try {
 final ConsumerRecords records = 
globalConsumer.poll(pollTime);
+if (records.isEmpty()) {
+if (taskTimeoutMs == 0L) {
+deadlineMs = maybeUpdateDeadlineOrThrow(

Review comment:
   Thanks, @mjsax , that's fair.
   
   I promise I'm no equivocating here; I'm just trying to understand why my 
intuition is that this is wrong.
   
   It seems like maybe the fundamental problem here is that we can't 
distinguish among a successful poll that returns no data, a failure to poll, 
and a pending async fetch request. The one thing we know is that the end offset 
is beyond our current position, so there _should_ be data to poll, so we can 
assume that an empty return means either that the fetch failed internally or it 
hasn't completed yet.
   
   Stepping back, this seems to be related to the problem of task idling, in 
which it's pointless to "idle" for a time so short that we have no chance to 
actually get a response back from the broker.
   
   I feel like this is substantially my fault from #4855 / KIP-266. The purpose 
of making this API completely async was to avoid harming liveness in situations 
where we might have a relatively strict deadline. But that's not the case here.
   
   I guess the "poor man's" solution we're going for here is to block poll for 
at least long enough to allow for a complete fetch round-trip from the broker. 
If we know that there was a round-trip, and we didn't get any data, then we can 
conclude that there was an error (since we know there is data to get). Since we 
can't know that there was a round trip, we weaken the condition to: if we know 
it's been long enough that there should have been a round-trip and we don't get 
data, we conclude there was probably an error.
   
   In your KIP, we specified we would start the task timer _after_ the first 
error, so it seems like we really want to just block the poll for the 
round-trip time, and then apply your "update deadline, etc." function. I'm with 
you now that to get the round-trip time, we have to extract some config(s) from 
the Consumer. This is a pretty awkward hack, but now that I've thought it 
through, it seems the best we can do. Maybe we can mull it over and file an 
improvement jira for the Consumer to improve use cases like this.
   
   Anyway, it seems like the "poll time" config is irrelevant, we just need to 
know what config to grab that corresponds to completing a fetch request with 
high probability. It seems like we shouldn't need to update metadata, so we 
would send a fetch request on the first poll call, and we just need to block 
for whatever time bounds the fetch response time.
   
   I'm honestly not sure what timeout would be best here. It looks like the 
ConsumerNetworkClient will just wait for a response until it gets a 
"disconnect" (L598). Is that a socket timeout? I'm not sure.






[GitHub] [kafka] rgroothuijsen commented on pull request #9078: KAFKA-10132: Return correct value types for MBean attributes

2020-07-25 Thread GitBox


rgroothuijsen commented on pull request #9078:
URL: https://github.com/apache/kafka/pull/9078#issuecomment-663854901


   I'm not entirely sure about returning raw nulls as a fallback, however, or 
how permissive it should be about null values in general. Thoughts?



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] rgroothuijsen opened a new pull request #9078: KAFKA-10132: Return correct value types for MBean attributes

2020-07-25 Thread GitBox


rgroothuijsen opened a new pull request #9078:
URL: https://github.com/apache/kafka/pull/9078


   Currently, JMX outputs all metrics as having type `double`, even if they are 
strings or other types of numbers. This PR gets the type from the metric's 
value if possible, with `null` as a fallback.
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[jira] [Comment Edited] (KAFKA-10307) Topology cycles in KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable

2020-07-25 Thread Bill Bejeck (Jira)


[ 
https://issues.apache.org/jira/browse/KAFKA-10307?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17164911#comment-17164911
 ] 

Bill Bejeck edited comment on KAFKA-10307 at 7/25/20, 1:07 PM:
---

Hi [~bchen225242] and [~vvcephei] I'll take a look at this with the Streams 
Topology graph building perspective


was (Author: bbejeck):
Hi [~bchen225242] and [~vvcephei] I'll take a look at this as well from the 
graph building perspective

> Topology cycles in 
> KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable
> -
>
> Key: KAFKA-10307
> URL: https://issues.apache.org/jira/browse/KAFKA-10307
> Project: Kafka
>  Issue Type: Bug
>  Components: streams
>Affects Versions: 2.4.0, 2.5.0, 2.6.0
>Reporter: Boyang Chen
>Priority: Major
>
> We have spotted a cycled topology for the foreign-key join test 
> *shouldInnerJoinMultiPartitionQueryable*, not sure yet whether this is a bug 
> in the algorithm or the test only. Used 
> [https://zz85.github.io/kafka-streams-viz/] to visualize:
> {code:java}
> Sub-topology: 0
> Source: KTABLE-SOURCE-19 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Source: KTABLE-SOURCE-32 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-30-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Source: KSTREAM-SOURCE-01 (topics: [table1])
>   --> KTABLE-SOURCE-02
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-OUTPUT-21
>   <-- KTABLE-SOURCE-19
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33 (stores: 
> [INNER-store1])
>   --> KTABLE-FK-JOIN-OUTPUT-34
>   <-- KTABLE-SOURCE-32
> Processor: KTABLE-FK-JOIN-OUTPUT-21 (stores: [INNER-store1])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Processor: KTABLE-FK-JOIN-OUTPUT-34 (stores: [INNER-store2])
>   --> KTABLE-TOSTREAM-35
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Processor: KTABLE-SOURCE-02 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
>   <-- KSTREAM-SOURCE-01
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10 (stores: 
> [])
>   --> KTABLE-SINK-11
>   <-- KTABLE-SOURCE-02
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23 (stores: 
> [])
>   --> KTABLE-SINK-24
>   <-- KTABLE-FK-JOIN-OUTPUT-21
> Processor: KTABLE-TOSTREAM-35 (stores: [])
>   --> KSTREAM-SINK-36
>   <-- KTABLE-FK-JOIN-OUTPUT-34
> Sink: KSTREAM-SINK-36 (topic: output-)
>   <-- KTABLE-TOSTREAM-35
> Sink: KTABLE-SINK-11 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
> Sink: KTABLE-SINK-24 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23  Sub-topology: 1
> Source: KSTREAM-SOURCE-04 (topics: [table2])
>   --> KTABLE-SOURCE-05
> Source: KTABLE-SOURCE-12 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15
>   <-- KTABLE-SOURCE-12
> Processor: KTABLE-SOURCE-05 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16
>   <-- KSTREAM-SOURCE-04
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-SINK-18
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-SINK-18
>   <-- KTABLE-SOURCE-05
> Sink: KTABLE-SINK-18 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15, 
> 

[jira] [Commented] (KAFKA-10307) Topology cycles in KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable

2020-07-25 Thread Bill Bejeck (Jira)


[ 
https://issues.apache.org/jira/browse/KAFKA-10307?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17164911#comment-17164911
 ] 

Bill Bejeck commented on KAFKA-10307:
-

Hi [~bchen225242] and [~vvcephei] I'll take a look at this as well from the 
graph building perspective

> Topology cycles in 
> KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable
> -
>
> Key: KAFKA-10307
> URL: https://issues.apache.org/jira/browse/KAFKA-10307
> Project: Kafka
>  Issue Type: Bug
>  Components: streams
>Affects Versions: 2.4.0, 2.5.0, 2.6.0
>Reporter: Boyang Chen
>Priority: Major
>
> We have spotted a cycled topology for the foreign-key join test 
> *shouldInnerJoinMultiPartitionQueryable*, not sure yet whether this is a bug 
> in the algorithm or the test only. Used 
> [https://zz85.github.io/kafka-streams-viz/] to visualize:
> {code:java}
> Sub-topology: 0
> Source: KTABLE-SOURCE-19 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Source: KTABLE-SOURCE-32 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-30-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Source: KSTREAM-SOURCE-01 (topics: [table1])
>   --> KTABLE-SOURCE-02
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-OUTPUT-21
>   <-- KTABLE-SOURCE-19
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33 (stores: 
> [INNER-store1])
>   --> KTABLE-FK-JOIN-OUTPUT-34
>   <-- KTABLE-SOURCE-32
> Processor: KTABLE-FK-JOIN-OUTPUT-21 (stores: [INNER-store1])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Processor: KTABLE-FK-JOIN-OUTPUT-34 (stores: [INNER-store2])
>   --> KTABLE-TOSTREAM-35
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Processor: KTABLE-SOURCE-02 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
>   <-- KSTREAM-SOURCE-01
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10 (stores: 
> [])
>   --> KTABLE-SINK-11
>   <-- KTABLE-SOURCE-02
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23 (stores: 
> [])
>   --> KTABLE-SINK-24
>   <-- KTABLE-FK-JOIN-OUTPUT-21
> Processor: KTABLE-TOSTREAM-35 (stores: [])
>   --> KSTREAM-SINK-36
>   <-- KTABLE-FK-JOIN-OUTPUT-34
> Sink: KSTREAM-SINK-36 (topic: output-)
>   <-- KTABLE-TOSTREAM-35
> Sink: KTABLE-SINK-11 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
> Sink: KTABLE-SINK-24 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23  Sub-topology: 1
> Source: KSTREAM-SOURCE-04 (topics: [table2])
>   --> KTABLE-SOURCE-05
> Source: KTABLE-SOURCE-12 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15
>   <-- KTABLE-SOURCE-12
> Processor: KTABLE-SOURCE-05 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16
>   <-- KSTREAM-SOURCE-04
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-SINK-18
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-SINK-18
>   <-- KTABLE-SOURCE-05
> Sink: KTABLE-SINK-18 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15, 
> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16  Sub-topology: 2
> Source: KSTREAM-SOURCE-07 (topics: [table3])
>   --> KTABLE-SOURCE-08
> Source: 

[jira] [Assigned] (KAFKA-10307) Topology cycles in KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable

2020-07-25 Thread Bill Bejeck (Jira)


 [ 
https://issues.apache.org/jira/browse/KAFKA-10307?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Bill Bejeck reassigned KAFKA-10307:
---

Assignee: (was: Bill Bejeck)

> Topology cycles in 
> KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable
> -
>
> Key: KAFKA-10307
> URL: https://issues.apache.org/jira/browse/KAFKA-10307
> Project: Kafka
>  Issue Type: Bug
>  Components: streams
>Affects Versions: 2.4.0, 2.5.0, 2.6.0
>Reporter: Boyang Chen
>Priority: Major
>
> We have spotted a cycled topology for the foreign-key join test 
> *shouldInnerJoinMultiPartitionQueryable*, not sure yet whether this is a bug 
> in the algorithm or the test only. Used 
> [https://zz85.github.io/kafka-streams-viz/] to visualize:
> {code:java}
> Sub-topology: 0
> Source: KTABLE-SOURCE-19 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Source: KTABLE-SOURCE-32 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-30-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Source: KSTREAM-SOURCE-01 (topics: [table1])
>   --> KTABLE-SOURCE-02
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-OUTPUT-21
>   <-- KTABLE-SOURCE-19
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33 (stores: 
> [INNER-store1])
>   --> KTABLE-FK-JOIN-OUTPUT-34
>   <-- KTABLE-SOURCE-32
> Processor: KTABLE-FK-JOIN-OUTPUT-21 (stores: [INNER-store1])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Processor: KTABLE-FK-JOIN-OUTPUT-34 (stores: [INNER-store2])
>   --> KTABLE-TOSTREAM-35
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Processor: KTABLE-SOURCE-02 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
>   <-- KSTREAM-SOURCE-01
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10 (stores: 
> [])
>   --> KTABLE-SINK-11
>   <-- KTABLE-SOURCE-02
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23 (stores: 
> [])
>   --> KTABLE-SINK-24
>   <-- KTABLE-FK-JOIN-OUTPUT-21
> Processor: KTABLE-TOSTREAM-35 (stores: [])
>   --> KSTREAM-SINK-36
>   <-- KTABLE-FK-JOIN-OUTPUT-34
> Sink: KSTREAM-SINK-36 (topic: output-)
>   <-- KTABLE-TOSTREAM-35
> Sink: KTABLE-SINK-11 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
> Sink: KTABLE-SINK-24 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23  Sub-topology: 1
> Source: KSTREAM-SOURCE-04 (topics: [table2])
>   --> KTABLE-SOURCE-05
> Source: KTABLE-SOURCE-12 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15
>   <-- KTABLE-SOURCE-12
> Processor: KTABLE-SOURCE-05 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16
>   <-- KSTREAM-SOURCE-04
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-SINK-18
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-SINK-18
>   <-- KTABLE-SOURCE-05
> Sink: KTABLE-SINK-18 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15, 
> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16  Sub-topology: 2
> Source: KSTREAM-SOURCE-07 (topics: [table3])
>   --> KTABLE-SOURCE-08
> Source: KTABLE-SOURCE-25 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic])
>   --> 

[jira] [Assigned] (KAFKA-10307) Topology cycles in KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable

2020-07-25 Thread Bill Bejeck (Jira)


 [ 
https://issues.apache.org/jira/browse/KAFKA-10307?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Bill Bejeck reassigned KAFKA-10307:
---

Assignee: Bill Bejeck

> Topology cycles in 
> KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable
> -
>
> Key: KAFKA-10307
> URL: https://issues.apache.org/jira/browse/KAFKA-10307
> Project: Kafka
>  Issue Type: Bug
>  Components: streams
>Affects Versions: 2.4.0, 2.5.0, 2.6.0
>Reporter: Boyang Chen
>Assignee: Bill Bejeck
>Priority: Major
>
> We have spotted a cycled topology for the foreign-key join test 
> *shouldInnerJoinMultiPartitionQueryable*, not sure yet whether this is a bug 
> in the algorithm or the test only. Used 
> [https://zz85.github.io/kafka-streams-viz/] to visualize:
> {code:java}
> Sub-topology: 0
> Source: KTABLE-SOURCE-19 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Source: KTABLE-SOURCE-32 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-30-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Source: KSTREAM-SOURCE-01 (topics: [table1])
>   --> KTABLE-SOURCE-02
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-OUTPUT-21
>   <-- KTABLE-SOURCE-19
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33 (stores: 
> [INNER-store1])
>   --> KTABLE-FK-JOIN-OUTPUT-34
>   <-- KTABLE-SOURCE-32
> Processor: KTABLE-FK-JOIN-OUTPUT-21 (stores: [INNER-store1])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Processor: KTABLE-FK-JOIN-OUTPUT-34 (stores: [INNER-store2])
>   --> KTABLE-TOSTREAM-35
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Processor: KTABLE-SOURCE-02 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
>   <-- KSTREAM-SOURCE-01
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10 (stores: 
> [])
>   --> KTABLE-SINK-11
>   <-- KTABLE-SOURCE-02
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23 (stores: 
> [])
>   --> KTABLE-SINK-24
>   <-- KTABLE-FK-JOIN-OUTPUT-21
> Processor: KTABLE-TOSTREAM-35 (stores: [])
>   --> KSTREAM-SINK-36
>   <-- KTABLE-FK-JOIN-OUTPUT-34
> Sink: KSTREAM-SINK-36 (topic: output-)
>   <-- KTABLE-TOSTREAM-35
> Sink: KTABLE-SINK-11 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
> Sink: KTABLE-SINK-24 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23  Sub-topology: 1
> Source: KSTREAM-SOURCE-04 (topics: [table2])
>   --> KTABLE-SOURCE-05
> Source: KTABLE-SOURCE-12 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15
>   <-- KTABLE-SOURCE-12
> Processor: KTABLE-SOURCE-05 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16
>   <-- KSTREAM-SOURCE-04
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-SINK-18
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-SINK-18
>   <-- KTABLE-SOURCE-05
> Sink: KTABLE-SINK-18 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15, 
> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16  Sub-topology: 2
> Source: KSTREAM-SOURCE-07 (topics: [table3])
>   --> KTABLE-SOURCE-08
> Source: KTABLE-SOURCE-25 (topics: 
> 

[jira] [Updated] (KAFKA-10308) fix flaky core/round_trip_fault_test.py

2020-07-25 Thread Chia-Ping Tsai (Jira)


 [ 
https://issues.apache.org/jira/browse/KAFKA-10308?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Chia-Ping Tsai updated KAFKA-10308:
---
Component/s: system tests

> fix flaky core/round_trip_fault_test.py
> ---
>
> Key: KAFKA-10308
> URL: https://issues.apache.org/jira/browse/KAFKA-10308
> Project: Kafka
>  Issue Type: Bug
>  Components: system tests
>Reporter: Chia-Ping Tsai
>Assignee: Chia-Ping Tsai
>Priority: Major
>
> {quote}
> Module: kafkatest.tests.core.round_trip_fault_test
> Class: RoundTripFaultTest
> Method: test_produce_consume_with_client_partition
> {quote}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Created] (KAFKA-10308) fix flaky core/round_trip_fault_test.py

2020-07-25 Thread Chia-Ping Tsai (Jira)
Chia-Ping Tsai created KAFKA-10308:
--

 Summary: fix flaky core/round_trip_fault_test.py
 Key: KAFKA-10308
 URL: https://issues.apache.org/jira/browse/KAFKA-10308
 Project: Kafka
  Issue Type: Bug
Reporter: Chia-Ping Tsai
Assignee: Chia-Ping Tsai


{quote}

Module: kafkatest.tests.core.round_trip_fault_test
Class: RoundTripFaultTest
Method: test_produce_consume_with_client_partition

{quote}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[GitHub] [kafka] huxihx merged pull request #9071: KAFKA-10305: Print usage when parsing fails for ConsumerPerformance

2020-07-25 Thread GitBox


huxihx merged pull request #9071:
URL: https://github.com/apache/kafka/pull/9071


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[jira] [Resolved] (KAFKA-10305) Print usage when parsing fails for ConsumerPerformance

2020-07-25 Thread huxihx (Jira)


 [ 
https://issues.apache.org/jira/browse/KAFKA-10305?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

huxihx resolved KAFKA-10305.

Fix Version/s: 2.7.0
   Resolution: Fixed

> Print usage when parsing fails for ConsumerPerformance
> --
>
> Key: KAFKA-10305
> URL: https://issues.apache.org/jira/browse/KAFKA-10305
> Project: Kafka
>  Issue Type: Improvement
>  Components: tools
>Affects Versions: 2.6.0
>Reporter: huxihx
>Assignee: huxihx
>Priority: Minor
> Fix For: 2.7.0
>
>
> When `kafka-consumer-perf-test.sh` is executed without required options or no 
> options at all, only the error message is displayed. It's better off showing 
> the usage as well like what we did for kafka-console-producer.sh.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[GitHub] [kafka] huxihx commented on pull request #9071: KAFKA-10305: Print usage when parsing fails for ConsumerPerformance

2020-07-25 Thread GitBox


huxihx commented on pull request #9071:
URL: https://github.com/apache/kafka/pull/9071#issuecomment-663842491


   @omkreddy  Thanks for the review, merging to trunk.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] Mathieu1124 opened a new pull request #9076: MINOR - fix typo

2020-07-25 Thread GitBox


Mathieu1124 opened a new pull request #9076:
URL: https://github.com/apache/kafka/pull/9076


   *FIx typo in ProducerConfig*
   
   ### Committer Checklist (excluded from commit message)
   - [ ] Verify design and implementation 
   - [ ] Verify test coverage and CI build status
   - [ ] Verify documentation (including upgrade notes)
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] rootex- commented on pull request #6700: KAFKA-7817 ConsumerGroupCommand Regex Feature

2020-07-25 Thread GitBox


rootex- commented on pull request #6700:
URL: https://github.com/apache/kafka/pull/6700#issuecomment-663834666


   @abbccdda Yes, sure 



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 commented on pull request #9034: KAFKA-10246 : AbstractProcessorContext topic() throws NPE

2020-07-25 Thread GitBox


chia7712 commented on pull request #9034:
URL: https://github.com/apache/kafka/pull/9034#issuecomment-663833458


   @AshishRoyJava Nice finding and fixing!
   
   > AbstractProcessorContext topic() throws NullPointerException when 
modifying a state store within the DSL from a punctuator
   
   Could you share more details? It seems to me it is weird to see a 
```ProcessorRecordContext``` carrying ```null``` topic.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] huxihx commented on pull request #9071: KAFKA-10305: Print usage when parsing fails for ConsumerPerformance

2020-07-25 Thread GitBox


huxihx commented on pull request #9071:
URL: https://github.com/apache/kafka/pull/9071#issuecomment-663829002


   retest this please



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] AshishRoyJava commented on pull request #9034: KAFKA-10246 : AbstractProcessorContext topic() throws NPE

2020-07-25 Thread GitBox


AshishRoyJava commented on pull request #9034:
URL: https://github.com/apache/kafka/pull/9034#issuecomment-663825200


   All checks are failing due to some other test case failure. Can anyone look 
into the logs please?
   @abbccdda 



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 edited a comment on pull request #8657: KAFKA-8334 Make sure the thread which tries to complete delayed reque…

2020-07-25 Thread GitBox


chia7712 edited a comment on pull request #8657:
URL: https://github.com/apache/kafka/pull/8657#issuecomment-663818439


   @junrao 
   
   I have rebased this PR to include fix of ```group_mode_transactions_test```. 
Could you run system tests again? Except for ```streams_eos_test```, 
```streams_standby_replica_test``` and transaction tests, other tests work well 
on my local.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 edited a comment on pull request #8657: KAFKA-8334 Make sure the thread which tries to complete delayed reque…

2020-07-25 Thread GitBox


chia7712 edited a comment on pull request #8657:
URL: https://github.com/apache/kafka/pull/8657#issuecomment-663818439


   @junrao 
   
   I have rebased this PR to include fix of ```group_mode_transactions_test```. 
Could you run system tests again? Except for ```streams_eos_test```, 
```streams.streams_eos_test``` and transaction tests, other tests work well on 
my local.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] chia7712 commented on pull request #8657: KAFKA-8334 Make sure the thread which tries to complete delayed reque…

2020-07-25 Thread GitBox


chia7712 commented on pull request #8657:
URL: https://github.com/apache/kafka/pull/8657#issuecomment-663818439


   @junrao 
   
   I have rebased this PR to include fix of ```group_mode_transactions_test```. 
Could you run system tests again? Except for ```streams_eos_test``` and 
transaction tests, other tests work well on my local.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] abbccdda commented on pull request #8907: MINOR: code cleanup for `VOut` inconsistent naming

2020-07-25 Thread GitBox


abbccdda commented on pull request #8907:
URL: https://github.com/apache/kafka/pull/8907#issuecomment-663817294


   retest this please



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[jira] [Updated] (KAFKA-10307) Topology cycles in KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable

2020-07-25 Thread Boyang Chen (Jira)


 [ 
https://issues.apache.org/jira/browse/KAFKA-10307?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Boyang Chen updated KAFKA-10307:

Priority: Major  (was: Minor)

> Topology cycles in 
> KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable
> -
>
> Key: KAFKA-10307
> URL: https://issues.apache.org/jira/browse/KAFKA-10307
> Project: Kafka
>  Issue Type: Bug
>Reporter: Boyang Chen
>Priority: Major
>
> We have spotted a cycled topology for the foreign-key join test 
> *shouldInnerJoinMultiPartitionQueryable*, not sure yet whether this is a bug 
> in the algorithm or the test only. Used 
> [https://zz85.github.io/kafka-streams-viz/] to visualize:
> {code:java}
> Sub-topology: 0
> Source: KTABLE-SOURCE-19 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Source: KTABLE-SOURCE-32 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-30-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Source: KSTREAM-SOURCE-01 (topics: [table1])
>   --> KTABLE-SOURCE-02
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-OUTPUT-21
>   <-- KTABLE-SOURCE-19
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33 (stores: 
> [INNER-store1])
>   --> KTABLE-FK-JOIN-OUTPUT-34
>   <-- KTABLE-SOURCE-32
> Processor: KTABLE-FK-JOIN-OUTPUT-21 (stores: [INNER-store1])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Processor: KTABLE-FK-JOIN-OUTPUT-34 (stores: [INNER-store2])
>   --> KTABLE-TOSTREAM-35
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Processor: KTABLE-SOURCE-02 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
>   <-- KSTREAM-SOURCE-01
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10 (stores: 
> [])
>   --> KTABLE-SINK-11
>   <-- KTABLE-SOURCE-02
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23 (stores: 
> [])
>   --> KTABLE-SINK-24
>   <-- KTABLE-FK-JOIN-OUTPUT-21
> Processor: KTABLE-TOSTREAM-35 (stores: [])
>   --> KSTREAM-SINK-36
>   <-- KTABLE-FK-JOIN-OUTPUT-34
> Sink: KSTREAM-SINK-36 (topic: output-)
>   <-- KTABLE-TOSTREAM-35
> Sink: KTABLE-SINK-11 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
> Sink: KTABLE-SINK-24 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23  Sub-topology: 1
> Source: KSTREAM-SOURCE-04 (topics: [table2])
>   --> KTABLE-SOURCE-05
> Source: KTABLE-SOURCE-12 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15
>   <-- KTABLE-SOURCE-12
> Processor: KTABLE-SOURCE-05 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16
>   <-- KSTREAM-SOURCE-04
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-SINK-18
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-SINK-18
>   <-- KTABLE-SOURCE-05
> Sink: KTABLE-SINK-18 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15, 
> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16  Sub-topology: 2
> Source: KSTREAM-SOURCE-07 (topics: [table3])
>   --> KTABLE-SOURCE-08
> Source: KTABLE-SOURCE-25 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-27
> Processor: 

[jira] [Updated] (KAFKA-10307) Topology cycles in KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable

2020-07-25 Thread Boyang Chen (Jira)


 [ 
https://issues.apache.org/jira/browse/KAFKA-10307?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Boyang Chen updated KAFKA-10307:

Component/s: streams

> Topology cycles in 
> KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable
> -
>
> Key: KAFKA-10307
> URL: https://issues.apache.org/jira/browse/KAFKA-10307
> Project: Kafka
>  Issue Type: Bug
>  Components: streams
>Affects Versions: 2.4.0, 2.5.0, 2.6.0
>Reporter: Boyang Chen
>Priority: Major
>
> We have spotted a cycled topology for the foreign-key join test 
> *shouldInnerJoinMultiPartitionQueryable*, not sure yet whether this is a bug 
> in the algorithm or the test only. Used 
> [https://zz85.github.io/kafka-streams-viz/] to visualize:
> {code:java}
> Sub-topology: 0
> Source: KTABLE-SOURCE-19 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Source: KTABLE-SOURCE-32 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-30-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Source: KSTREAM-SOURCE-01 (topics: [table1])
>   --> KTABLE-SOURCE-02
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-OUTPUT-21
>   <-- KTABLE-SOURCE-19
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33 (stores: 
> [INNER-store1])
>   --> KTABLE-FK-JOIN-OUTPUT-34
>   <-- KTABLE-SOURCE-32
> Processor: KTABLE-FK-JOIN-OUTPUT-21 (stores: [INNER-store1])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Processor: KTABLE-FK-JOIN-OUTPUT-34 (stores: [INNER-store2])
>   --> KTABLE-TOSTREAM-35
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Processor: KTABLE-SOURCE-02 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
>   <-- KSTREAM-SOURCE-01
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10 (stores: 
> [])
>   --> KTABLE-SINK-11
>   <-- KTABLE-SOURCE-02
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23 (stores: 
> [])
>   --> KTABLE-SINK-24
>   <-- KTABLE-FK-JOIN-OUTPUT-21
> Processor: KTABLE-TOSTREAM-35 (stores: [])
>   --> KSTREAM-SINK-36
>   <-- KTABLE-FK-JOIN-OUTPUT-34
> Sink: KSTREAM-SINK-36 (topic: output-)
>   <-- KTABLE-TOSTREAM-35
> Sink: KTABLE-SINK-11 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
> Sink: KTABLE-SINK-24 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23  Sub-topology: 1
> Source: KSTREAM-SOURCE-04 (topics: [table2])
>   --> KTABLE-SOURCE-05
> Source: KTABLE-SOURCE-12 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15
>   <-- KTABLE-SOURCE-12
> Processor: KTABLE-SOURCE-05 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16
>   <-- KSTREAM-SOURCE-04
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-SINK-18
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-SINK-18
>   <-- KTABLE-SOURCE-05
> Sink: KTABLE-SINK-18 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15, 
> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16  Sub-topology: 2
> Source: KSTREAM-SOURCE-07 (topics: [table3])
>   --> KTABLE-SOURCE-08
> Source: KTABLE-SOURCE-25 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic])
>   --> 

[jira] [Updated] (KAFKA-10307) Topology cycles in KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable

2020-07-25 Thread Boyang Chen (Jira)


 [ 
https://issues.apache.org/jira/browse/KAFKA-10307?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Boyang Chen updated KAFKA-10307:

Affects Version/s: 2.6.0
   2.4.0
   2.5.0

> Topology cycles in 
> KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable
> -
>
> Key: KAFKA-10307
> URL: https://issues.apache.org/jira/browse/KAFKA-10307
> Project: Kafka
>  Issue Type: Bug
>Affects Versions: 2.4.0, 2.5.0, 2.6.0
>Reporter: Boyang Chen
>Priority: Major
>
> We have spotted a cycled topology for the foreign-key join test 
> *shouldInnerJoinMultiPartitionQueryable*, not sure yet whether this is a bug 
> in the algorithm or the test only. Used 
> [https://zz85.github.io/kafka-streams-viz/] to visualize:
> {code:java}
> Sub-topology: 0
> Source: KTABLE-SOURCE-19 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Source: KTABLE-SOURCE-32 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-30-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Source: KSTREAM-SOURCE-01 (topics: [table1])
>   --> KTABLE-SOURCE-02
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-OUTPUT-21
>   <-- KTABLE-SOURCE-19
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33 (stores: 
> [INNER-store1])
>   --> KTABLE-FK-JOIN-OUTPUT-34
>   <-- KTABLE-SOURCE-32
> Processor: KTABLE-FK-JOIN-OUTPUT-21 (stores: [INNER-store1])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Processor: KTABLE-FK-JOIN-OUTPUT-34 (stores: [INNER-store2])
>   --> KTABLE-TOSTREAM-35
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Processor: KTABLE-SOURCE-02 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
>   <-- KSTREAM-SOURCE-01
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10 (stores: 
> [])
>   --> KTABLE-SINK-11
>   <-- KTABLE-SOURCE-02
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23 (stores: 
> [])
>   --> KTABLE-SINK-24
>   <-- KTABLE-FK-JOIN-OUTPUT-21
> Processor: KTABLE-TOSTREAM-35 (stores: [])
>   --> KSTREAM-SINK-36
>   <-- KTABLE-FK-JOIN-OUTPUT-34
> Sink: KSTREAM-SINK-36 (topic: output-)
>   <-- KTABLE-TOSTREAM-35
> Sink: KTABLE-SINK-11 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
> Sink: KTABLE-SINK-24 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23  Sub-topology: 1
> Source: KSTREAM-SOURCE-04 (topics: [table2])
>   --> KTABLE-SOURCE-05
> Source: KTABLE-SOURCE-12 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15
>   <-- KTABLE-SOURCE-12
> Processor: KTABLE-SOURCE-05 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16
>   <-- KSTREAM-SOURCE-04
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-SINK-18
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-SINK-18
>   <-- KTABLE-SOURCE-05
> Sink: KTABLE-SINK-18 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15, 
> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16  Sub-topology: 2
> Source: KSTREAM-SOURCE-07 (topics: [table3])
>   --> KTABLE-SOURCE-08
> Source: KTABLE-SOURCE-25 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic])
>  

[jira] [Commented] (KAFKA-10307) Topology cycles in KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable

2020-07-25 Thread Boyang Chen (Jira)


[ 
https://issues.apache.org/jira/browse/KAFKA-10307?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17164760#comment-17164760
 ] 

Boyang Chen commented on KAFKA-10307:
-

[~vvcephei] Could you take a look? [~feyman] found this out in his PR to 
refactor the stream assignor logic: 

[https://github.com/apache/kafka/pull/8832#issuecomment-646943356]

 

> Topology cycles in 
> KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable
> -
>
> Key: KAFKA-10307
> URL: https://issues.apache.org/jira/browse/KAFKA-10307
> Project: Kafka
>  Issue Type: Bug
>Reporter: Boyang Chen
>Priority: Minor
>
> We have spotted a cycled topology for the foreign-key join test 
> *shouldInnerJoinMultiPartitionQueryable*, not sure yet whether this is a bug 
> in the algorithm or the test only. Used 
> [https://zz85.github.io/kafka-streams-viz/] to visualize:
> {code:java}
> Sub-topology: 0
> Source: KTABLE-SOURCE-19 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Source: KTABLE-SOURCE-32 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-30-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Source: KSTREAM-SOURCE-01 (topics: [table1])
>   --> KTABLE-SOURCE-02
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-OUTPUT-21
>   <-- KTABLE-SOURCE-19
> Processor: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33 (stores: 
> [INNER-store1])
>   --> KTABLE-FK-JOIN-OUTPUT-34
>   <-- KTABLE-SOURCE-32
> Processor: KTABLE-FK-JOIN-OUTPUT-21 (stores: [INNER-store1])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
> Processor: KTABLE-FK-JOIN-OUTPUT-34 (stores: [INNER-store2])
>   --> KTABLE-TOSTREAM-35
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
> Processor: KTABLE-SOURCE-02 (stores: 
> [table1-STATE-STORE-00])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
>   <-- KSTREAM-SOURCE-01
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10 (stores: 
> [])
>   --> KTABLE-SINK-11
>   <-- KTABLE-SOURCE-02
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23 (stores: 
> [])
>   --> KTABLE-SINK-24
>   <-- KTABLE-FK-JOIN-OUTPUT-21
> Processor: KTABLE-TOSTREAM-35 (stores: [])
>   --> KSTREAM-SINK-36
>   <-- KTABLE-FK-JOIN-OUTPUT-34
> Sink: KSTREAM-SINK-36 (topic: output-)
>   <-- KTABLE-TOSTREAM-35
> Sink: KTABLE-SINK-11 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
> Sink: KTABLE-SINK-24 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23  Sub-topology: 1
> Source: KSTREAM-SOURCE-04 (topics: [table2])
>   --> KTABLE-SOURCE-05
> Source: KTABLE-SOURCE-12 (topics: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15
>   <-- KTABLE-SOURCE-12
> Processor: KTABLE-SOURCE-05 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16
>   <-- KSTREAM-SOURCE-04
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15 (stores: 
> [table2-STATE-STORE-03])
>   --> KTABLE-SINK-18
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
> Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16 (stores: 
> [KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
>   --> KTABLE-SINK-18
>   <-- KTABLE-SOURCE-05
> Sink: KTABLE-SINK-18 (topic: 
> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic)
>   <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15, 
> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16  Sub-topology: 2
> Source: KSTREAM-SOURCE-07 (topics: [table3])
>   --> KTABLE-SOURCE-08
> Source: 

[jira] [Created] (KAFKA-10307) Topology cycles in KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable

2020-07-25 Thread Boyang Chen (Jira)
Boyang Chen created KAFKA-10307:
---

 Summary: Topology cycles in 
KTableKTableForeignKeyInnerJoinMultiIntegrationTest#shouldInnerJoinMultiPartitionQueryable
 Key: KAFKA-10307
 URL: https://issues.apache.org/jira/browse/KAFKA-10307
 Project: Kafka
  Issue Type: Bug
Reporter: Boyang Chen


We have spotted a cycled topology for the foreign-key join test 
*shouldInnerJoinMultiPartitionQueryable*, not sure yet whether this is a bug in 
the algorithm or the test only. Used 
[https://zz85.github.io/kafka-streams-viz/] to visualize:


{code:java}
Sub-topology: 0
Source: KTABLE-SOURCE-19 (topics: 
[KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
Source: KTABLE-SOURCE-32 (topics: 
[KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-30-topic])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
Source: KSTREAM-SOURCE-01 (topics: [table1])
  --> KTABLE-SOURCE-02
Processor: 
KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20 (stores: 
[table1-STATE-STORE-00])
  --> KTABLE-FK-JOIN-OUTPUT-21
  <-- KTABLE-SOURCE-19
Processor: 
KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33 (stores: 
[INNER-store1])
  --> KTABLE-FK-JOIN-OUTPUT-34
  <-- KTABLE-SOURCE-32
Processor: KTABLE-FK-JOIN-OUTPUT-21 (stores: [INNER-store1])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23
  <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-20
Processor: KTABLE-FK-JOIN-OUTPUT-34 (stores: [INNER-store2])
  --> KTABLE-TOSTREAM-35
  <-- KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-RESOLVER-PROCESSOR-33
Processor: KTABLE-SOURCE-02 (stores: 
[table1-STATE-STORE-00])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
  <-- KSTREAM-SOURCE-01
Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10 (stores: [])
  --> KTABLE-SINK-11
  <-- KTABLE-SOURCE-02
Processor: KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23 (stores: [])
  --> KTABLE-SINK-24
  <-- KTABLE-FK-JOIN-OUTPUT-21
Processor: KTABLE-TOSTREAM-35 (stores: [])
  --> KSTREAM-SINK-36
  <-- KTABLE-FK-JOIN-OUTPUT-34
Sink: KSTREAM-SINK-36 (topic: output-)
  <-- KTABLE-TOSTREAM-35
Sink: KTABLE-SINK-11 (topic: 
KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic)
  <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-10
Sink: KTABLE-SINK-24 (topic: 
KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic)
  <-- KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-23  Sub-topology: 1
Source: KSTREAM-SOURCE-04 (topics: [table2])
  --> KTABLE-SOURCE-05
Source: KTABLE-SOURCE-12 (topics: 
[KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-09-topic])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14 (stores: 
[KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15
  <-- KTABLE-SOURCE-12
Processor: KTABLE-SOURCE-05 (stores: 
[table2-STATE-STORE-03])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16
  <-- KSTREAM-SOURCE-04
Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15 (stores: 
[table2-STATE-STORE-03])
  --> KTABLE-SINK-18
  <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-14
Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16 (stores: 
[KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-13])
  --> KTABLE-SINK-18
  <-- KTABLE-SOURCE-05
Sink: KTABLE-SINK-18 (topic: 
KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-17-topic)
  <-- KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-15, 
KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-16  Sub-topology: 2
Source: KSTREAM-SOURCE-07 (topics: [table3])
  --> KTABLE-SOURCE-08
Source: KTABLE-SOURCE-25 (topics: 
[KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-22-topic])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-27
Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-27 (stores: 
[KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-26])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-28
  <-- KTABLE-SOURCE-25
Processor: KTABLE-SOURCE-08 (stores: 
[table3-STATE-STORE-06])
  --> KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-29
  <-- KSTREAM-SOURCE-07
Processor: KTABLE-FK-JOIN-SUBSCRIPTION-PROCESSOR-28 (stores: