[
https://issues.apache.org/jira/browse/TAJO-1480?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14480842#comment-14480842
]
ASF GitHub Bot commented on TAJO-1480:
--------------------------------------
Github user blrunner commented on a diff in the pull request:
https://github.com/apache/tajo/pull/482#discussion_r27783300
--- Diff:
tajo-storage/tajo-storage-kafka/src/main/java/org/apache/tajo/storage/kafka/SimpleConsumerManager.java
---
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.kafka;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import kafka.api.FetchRequest;
+import kafka.api.FetchRequestBuilder;
+import kafka.api.PartitionOffsetRequestInfo;
+import kafka.cluster.Broker;
+import kafka.common.ErrorMapping;
+import kafka.common.TopicAndPartition;
+import kafka.javaapi.FetchResponse;
+import kafka.javaapi.OffsetResponse;
+import kafka.javaapi.PartitionMetadata;
+import kafka.javaapi.TopicMetadata;
+import kafka.javaapi.TopicMetadataRequest;
+import kafka.javaapi.consumer.SimpleConsumer;
+import kafka.message.MessageAndOffset;
+
+import org.apache.commons.collections.IteratorUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.tajo.util.NetUtils;
+
+// SimpleConsumerManager is kafka client for KafkaScanner.
+// It's one per partition. Each partition instantiate this class.
+public class SimpleConsumerManager {
+ private static final Log LOG =
LogFactory.getLog(SimpleConsumerManager.class);
+ // TODO: configurable setting.
+ static final int CONSUMER_TIMEOUT = 30000;
+ static final int CONSUMER_BUFFER_SIZE = 64 * 1024;
+ static final int CONSUMER_FETCH_SIZE = 300 * 1024;
+ static final int FETCH_TRY_NUM = 3;
+
+ private SimpleConsumer consumer = null;
+ private List<InetSocketAddress> brokers = new
ArrayList<InetSocketAddress>();
+ private String topic;
+ private int partition;
+ private String clientId;
+ // leader of this partition.
+ private Broker leader;
+
+ public SimpleConsumerManager(String seedBrokers, String topic, int
partition) throws IOException {
+ this.topic = topic;
+ this.partition = partition;
+ // Identifier of simpleConsumer.
+ this.clientId = SimpleConsumerManager.getIdentifier();
+ this.brokers = SimpleConsumerManager.getBrokerList(seedBrokers);
+ this.leader = findLeader(topic, partition);
+ // consumer creation fail.
+ if (null == leader) {
+ throw new IOException("consumer creation fail");
+ } else {
+ consumer = new SimpleConsumer(leader.host(), leader.port(),
CONSUMER_TIMEOUT, CONSUMER_BUFFER_SIZE, clientId);
+ }
+ }
+
+ /**
+ * Create SimpleConsumer instance. seedBrokers is connection info of
kafka
+ * brokers. ex) localhost:9092,localhost:9091 topic is topic name.
partition
+ * is partition id.
+ *
+ * @param seedBrokers
+ * @param topic
+ * @param partition
+ * @return
+ * @throws IOException
+ */
+ static public SimpleConsumerManager getSimpleConsumerManager(String
seedBrokers, String topic, int partition)
+ throws IOException {
+ return new SimpleConsumerManager(seedBrokers, topic, partition);
+ }
+
+ /**
+ * Return partition ID list of specific topic. Check for seedBrokers.
+ * seedBrokers is kafka brokers.
+ *
+ * @param seedBrokers
+ * @param topic
+ * @return
+ * @throws IOException
+ */
+ static public Set<Integer> getPartitions(String seedBrokers, String
topic) throws IOException {
+ Set<Integer> partitions = new HashSet<Integer>();
+ for (InetSocketAddress seed :
SimpleConsumerManager.getBrokerList(seedBrokers)) {
+ SimpleConsumer consumer = null;
+ try {
+ consumer = new SimpleConsumer(seed.getHostName(), seed.getPort(),
CONSUMER_TIMEOUT, CONSUMER_BUFFER_SIZE,
+ SimpleConsumerManager.getIdentifier() + "partitionLookup");
+ List<String> topics = new ArrayList<String>();
+ topics.add(topic);
+ TopicMetadataRequest req = new TopicMetadataRequest(topics);
+ kafka.javaapi.TopicMetadataResponse resp = consumer.send(req);
+ // call to topicsMetadata() asks the Broker you are connected to
for all
+ // the details about the topic we are interested in
+ List<TopicMetadata> metaData = resp.topicsMetadata();
+ // loop on partitionsMetadata iterates through all the partitions
until
+ // we find the one we want.
+ for (TopicMetadata item : metaData) {
+ for (PartitionMetadata part : item.partitionsMetadata()) {
+ partitions.add(part.partitionId());
+ }
+ }
+ } catch (Exception e) {
+ LOG.error(e.getMessage(), e);
+ throw new IOException(e);
+ } finally {
+ if (consumer != null)
+ consumer.close();
+ }
+ }
+ return partitions;
+ }
+
+ static private List<InetSocketAddress> getBrokerList(String brokers) {
+ List<InetSocketAddress> brokerList = new
ArrayList<InetSocketAddress>();
+ for (String broker : brokers.split(",")) {
+ brokerList.add(NetUtils.createUnresolved(broker));
+ }
+ return brokerList;
+ }
+
+ /**
+ * Create identifier for SimpleConsumer. The SimpleConsumer connects at
kafka
+ * using this identifier.
+ *
+ * @return
+ */
+ static private String getIdentifier() {
+ Random r = new Random();
+ return r.nextLong() + "_" + System.currentTimeMillis();
+ }
+
+ synchronized public void close() {
--- End diff --
Could you explain why you use synchronized keyword?
> Kafka Consumer for kafka strage.
> --------------------------------
>
> Key: TAJO-1480
> URL: https://issues.apache.org/jira/browse/TAJO-1480
> Project: Tajo
> Issue Type: Sub-task
> Components: storage
> Reporter: YeonSu Han
> Assignee: YeonSu Han
> Labels: kafka_storage
>
> Scanner of kafka storage needs to implement a Kafka Consumer for to fetch
> data from kafka.
> The Kafka Consumer have methods like this,
> - getPartitions(): Get partition id list from specific topic.
> - fetch(): Fetch data from kafka.
> - getReadOffset(): Get offset from specific partition.
> - findLeader(): Find leader broker of specific partition.
> - etc..
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)