pabloem commented on code in PR #22403: URL: https://github.com/apache/beam/pull/22403#discussion_r944813013
########## sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaSchemaTransformReadConfiguration.java: ########## @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.kafka; + +import com.google.auto.value.AutoValue; +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.schemas.AutoValueSchema; +import org.apache.beam.sdk.schemas.annotations.DefaultSchema; +import org.apache.kafka.common.TopicPartition; + +/** + * Configuration for reading from a Kafka topic. + * + * <p><b>Internal only:</b> This class is actively being worked on, and it will likely change. We + * provide no backwards compatibility guarantees, and it should not be implemented outside the Beam + * repository. + */ +@Experimental +@DefaultSchema(AutoValueSchema.class) +@AutoValue +public abstract class KafkaSchemaTransformReadConfiguration { + + /** Instantiates a {@link KafkaSchemaTransformReadConfiguration.Builder} instance. */ + public static Builder builder() { + return new AutoValue_KafkaSchemaTransformReadConfiguration.Builder(); + } + + /** Sets the bootstrap servers for the Kafka consumer. */ + @Nullable + public abstract String getBootstrapServers(); + + /** Flags whether finalized offsets are committed to Kafka. */ + @Nullable + public abstract Boolean getCommitOffsetsInFinalize(); + + /** Configuration updates for the backend main consumer. */ + @Nullable + public abstract Map<String, Object> getConsumerConfigUpdates(); + + /** + * Sets the timestamps policy based on KafkaTimestampType.CREATE_TIME timestamp of the records. + */ + @Nullable + public abstract Long getCreateTimeMillisecondsMaximumDelay(); + + /** + * Configure the KafkaIO to use WatchKafkaTopicPartitionDoFn to detect and emit any new available + * {@link TopicPartition} for ReadFromKafkaDoFn to consume during pipeline execution time. + */ + @Nullable + public abstract Long getDynamicReadMillisecondsDuration(); + + /** + * Reads a bounded amount of data from the unbounded Kafka topic resource. The bound is specified + * as a number of records to read. + */ + @Nullable + public abstract Long getMaxNumRecords(); + + /** + * Reads a bounded amount of data from the unbounded Kafka topic resource. The bound is specified + * as an amount of time to read for. Each split of the source will read for this much time. + */ + @Nullable + public abstract Long getMaxReadMillisecondsDuration(); + + /** Additional configuration for the backend offset consumer. */ + @Nullable + public abstract Map<String, Object> getOffsetConsumerConfiguration(); + + /** Specifies whether to include metadata when reading from Kafka topic. */ + @Nullable + public abstract Boolean getReadWithMetadata(); + + /** Sets "isolation_level" to "read_committed" in Kafka consumer configuration. */ + @Nullable + public abstract Boolean getReadCommitted(); + + /** Use timestamp to set up start offset. */ + @Nullable + public abstract Long getStartReadTimeMillisecondsEpoch(); + + /** Use timestamp to set up stop offset. */ + @Nullable + public abstract Long getStopReadTimeMillisecondsEpoch(); + + /** + * A timestamp policy to assign event time for messages in a Kafka partition and watermark for it. + */ + @Nullable + public abstract TimestampPolicyConfiguration getTimestampPolicy(); + + /** Sets the topic from which to read. */ + @Nullable + public abstract String getTopic(); + + /** Kafka partitions from which to read. */ + @Nullable + public abstract List<TopicPartitionConfiguration> getTopicPartitions(); + + /** Builder for the {@link KafkaSchemaTransformReadConfiguration}. */ + @AutoValue.Builder + public abstract static class Builder { + + /** Sets the bootstrap servers for the Kafka consumer. */ + public abstract Builder setBootstrapServers(String value); + + /** Flags whether finalized offsets are committed to Kafka. */ + public abstract Builder setCommitOffsetsInFinalize(Boolean value); + + /** Configuration updates for the backend main consumer. */ + public abstract Builder setConsumerConfigUpdates(Map<String, Object> value); + + /** + * Sets the timestamps policy based on KafkaTimestampType.CREATE_TIME timestamp of the records. + */ + public abstract Builder setCreateTimeMillisecondsMaximumDelay(Long value); + + /** + * Configure the KafkaIO to use WatchKafkaTopicPartitionDoFn to detect and emit any new + * available {@link TopicPartition} for ReadFromKafkaDoFn to consume during pipeline execution + * time. + */ + public abstract Builder setDynamicReadMillisecondsDuration(Long value); + + /** + * Reads a bounded amount of data from the unbounded Kafka topic resource. The bound is + * specified as a number of records to read. + */ + public abstract Builder setMaxNumRecords(Long value); + + /** + * Reads a bounded amount of data from the unbounded Kafka topic resource. The bound is + * specified as an amount of time to read for. Each split of the source will read for this much + * time. + */ + public abstract Builder setMaxReadMillisecondsDuration(Long value); + Review Comment: these are testing options so we can remove for now ```suggestion ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
