nicktelford commented on code in PR #16922: URL: https://github.com/apache/kafka/pull/16922#discussion_r1742159162
########## streams/src/main/java/org/apache/kafka/streams/processor/internals/StateDirectory.java: ########## @@ -182,6 +206,118 @@ private boolean lockStateDirectory() { return stateDirLock != null; } + public void initializeTasksForLocalState(final TopologyMetadata topologyMetadata, final StreamsMetricsImpl streamsMetrics) { + final List<TaskDirectory> nonEmptyTaskDirectories = listNonEmptyTaskDirectories(); + if (hasPersistentStores && !nonEmptyTaskDirectories.isEmpty()) { + final LogContext logContext = new LogContext("main-thread "); + final ThreadCache dummyCache = new ThreadCache(logContext, 0, streamsMetrics); + final boolean eosEnabled = StreamsConfigUtils.eosEnabled(config); + final boolean stateUpdaterEnabled = StreamsConfig.InternalConfig.stateUpdaterEnabled(config.originals()); + + // discover all non-empty task directories in StateDirectory + for (final TaskDirectory taskDirectory : nonEmptyTaskDirectories) { + final String dirName = taskDirectory.file().getName(); + final TaskId id = parseTaskDirectoryName(dirName, taskDirectory.namedTopology()); + final ProcessorTopology topology = topologyMetadata.buildSubtopology(id); + final Set<TopicPartition> inputPartitions = topology.sourceTopics().stream().map(topic -> new TopicPartition(topic, id.partition())).collect(Collectors.toSet()); + + if (topology.hasStateWithChangelogs()) { + final ProcessorStateManager stateManager = new ProcessorStateManager( + id, + Task.TaskType.STANDBY, + eosEnabled, + logContext, + this, + null, + topology.storeToChangelogTopic(), + inputPartitions, + stateUpdaterEnabled + ); + + final InternalProcessorContext<Object, Object> context = new ProcessorContextImpl( + id, + config, + stateManager, + streamsMetrics, + dummyCache + ); + + final Task task = new StandbyTask( + id, + inputPartitions, + topology, + topologyMetadata.taskConfig(id), + streamsMetrics, + stateManager, + this, + dummyCache, + context + ); + + // initialize and suspend new Tasks + try { + task.initializeIfNeeded(); + task.suspend(); + + tasksForLocalState.put(id, task); + } catch (final TaskCorruptedException e) { + // Task is corrupt - wipe it out (under EOS) and don't initialize a Standby for it + task.suspend(); + task.closeDirty(); + } + } + } + } + } + + public boolean hasInitialTasks() { + return !tasksForLocalState.isEmpty(); + } + + public Task assignInitialTask(final TaskId taskId) { + final Task task = tasksForLocalState.remove(taskId); + if (task != null) { + lockedTasksToOwner.replace(taskId, Thread.currentThread()); + } + return task; + } + + public void closeInitialTasksIfLastAssginedThread() { + if (hasInitialTasks() && threadsWithAssignment.incrementAndGet() >= numStreamThreads.get()) { Review Comment: ~How about, as an alternative option: having a `Set<Thread>` that we add `Thread.currentThread()` to in `closePendingTasksIfLastAssignedThread()`, then we compare this set with every `StreamThread` instance (via `Thread.enumerate` to determine if there are any StreamThreads that have not yet received their assignment.~ Or perhaps a much simpler solution would be to essentially "listen" for the end of the assignment process in `KafkaStream#onChange` and just call `StateDirectory#closePendingTasks` in `KafkaStreams#maybeSetRunning()`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org