[ https://issues.apache.org/jira/browse/KAFKA-7367?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16687113#comment-16687113 ]
ASF GitHub Bot commented on KAFKA-7367: --------------------------------------- vvcephei closed pull request #5647: KAFKA-7367: Ensure stateless topologies don't require disk access URL: https://github.com/apache/kafka/pull/5647 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StatelessIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StatelessIntegrationTest.java new file mode 100644 index 00000000000..f5981a984ae --- /dev/null +++ b/streams/src/test/java/org/apache/kafka/streams/integration/StatelessIntegrationTest.java @@ -0,0 +1,107 @@ +package org.apache.kafka.streams.integration; + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.utils.SystemTime; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.StreamsBuilder; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.Topology; +import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; +import org.apache.kafka.streams.kstream.Consumed; +import org.apache.kafka.streams.kstream.KStream; +import org.apache.kafka.streams.kstream.Produced; +import org.apache.kafka.test.TestUtils; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; + +import static java.util.Arrays.asList; +import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.produceKeyValuesSynchronously; +import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitUntilMinRecordsReceived; + +public class StatelessIntegrationTest { + @Test + public void statelessTopologiesShouldNotCreateDirectories() throws IOException, InterruptedException, ExecutionException { + final EmbeddedKafkaCluster broker = new EmbeddedKafkaCluster(1); + broker.start(); + broker.deleteAllTopicsAndWait(30_000L); + + final String applicationId = UUID.randomUUID().toString(); + + final String inputTopic = "input" + applicationId; + final String outputTopic = "output" + applicationId; + + broker.createTopic(inputTopic, 2, 1); + broker.createTopic(outputTopic, 2, 1); + + final String path = TestUtils.tempDirectory(applicationId).getPath(); + + final Properties streamsConfiguration = new Properties(); + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, applicationId); + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, broker.bootstrapServers()); + streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, path); + streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000); + streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + streamsConfiguration.put(IntegrationTestUtils.INTERNAL_LEAVE_GROUP_ON_CLOSE, true); + + final StreamsBuilder builder = new StreamsBuilder(); + final KStream<String, String> input = builder.stream(inputTopic, Consumed.with(Serdes.String(), Serdes.String())); + final KStream<String, String> filter = input.filter((k, s) -> s.length() % 2 == 0); + final KStream<String, String> map = filter.map((k, v) -> new KeyValue<>(k, k + v)); + map.to(outputTopic, Produced.with(Serdes.String(), Serdes.String())); + + final Topology topology = builder.build(); + final KafkaStreams kafkaStreams = new KafkaStreams(topology, streamsConfiguration); + try { + kafkaStreams.start(); + + final Properties producerConfig = new Properties(); + producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, Serdes.String().serializer().getClass()); + producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, Serdes.String().serializer().getClass()); + producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, broker.bootstrapServers()); + produceKeyValuesSynchronously( + inputTopic, + asList(new KeyValue<>("a", "b"), new KeyValue<>("c", "de")), producerConfig, + new SystemTime() + ); + + final Properties consumerConfig = new Properties(); + consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, Serdes.String().deserializer().getClass()); + consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, Serdes.String().deserializer().getClass()); + consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, broker.bootstrapServers()); + consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "mygroup"); + + waitUntilMinRecordsReceived( + consumerConfig, + outputTopic, + 1 + ); + + if (new File(path).exists()) { + final List<Path> collect = + Files.find(Paths.get(path), 999, (p, bfa) -> true).collect(Collectors.toList()); + Assert.fail(path + " should not have existed, but it did: " + collect); + } + + } finally { + kafkaStreams.close(); + kafkaStreams.cleanUp(); + } + } +} ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Streams should not create state store directories unless they are needed > ------------------------------------------------------------------------ > > Key: KAFKA-7367 > URL: https://issues.apache.org/jira/browse/KAFKA-7367 > Project: Kafka > Issue Type: Improvement > Components: streams > Reporter: John Roesler > Assignee: Kamal Chandraprakash > Priority: Major > Labels: newbie > > Streams currently unconditionally creates state store directories, even if > the topology is stateless. > This can be a problem running Streams in an environment without access to > disk. -- This message was sent by Atlassian JIRA (v7.6.3#76005)