[ 
https://issues.apache.org/jira/browse/TAJO-1480?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14459565#comment-14459565
 ] 

ASF GitHub Bot commented on TAJO-1480:
--------------------------------------

Github user blrunner commented on a diff in the pull request:

    https://github.com/apache/tajo/pull/482#discussion_r27782949
  
    --- Diff: 
tajo-storage/tajo-storage-kafka/src/main/java/org/apache/tajo/storage/kafka/SimpleConsumerManager.java
 ---
    @@ -0,0 +1,314 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.tajo.storage.kafka;
    +
    +import java.io.IOException;
    +import java.net.InetSocketAddress;
    +import java.util.ArrayList;
    +import java.util.HashMap;
    +import java.util.HashSet;
    +import java.util.Iterator;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Random;
    +import java.util.Set;
    +
    +import kafka.api.FetchRequest;
    +import kafka.api.FetchRequestBuilder;
    +import kafka.api.PartitionOffsetRequestInfo;
    +import kafka.cluster.Broker;
    +import kafka.common.ErrorMapping;
    +import kafka.common.TopicAndPartition;
    +import kafka.javaapi.FetchResponse;
    +import kafka.javaapi.OffsetResponse;
    +import kafka.javaapi.PartitionMetadata;
    +import kafka.javaapi.TopicMetadata;
    +import kafka.javaapi.TopicMetadataRequest;
    +import kafka.javaapi.consumer.SimpleConsumer;
    +import kafka.message.MessageAndOffset;
    +
    +import org.apache.commons.collections.IteratorUtils;
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.apache.tajo.util.NetUtils;
    +
    +// SimpleConsumerManager is kafka client for KafkaScanner.
    +// It's one per partition. Each partition instantiate this class.
    +public class SimpleConsumerManager {
    +  private static final Log LOG = 
LogFactory.getLog(SimpleConsumerManager.class);
    +  // TODO: configurable setting.
    +  static final int CONSUMER_TIMEOUT = 30000;
    +  static final int CONSUMER_BUFFER_SIZE = 64 * 1024;
    +  static final int CONSUMER_FETCH_SIZE = 300 * 1024;
    +  static final int FETCH_TRY_NUM = 3;
    +
    +  private SimpleConsumer consumer = null;
    +  private List<InetSocketAddress> brokers = new 
ArrayList<InetSocketAddress>();
    +  private String topic;
    +  private int partition;
    +  private String clientId;
    +  // leader of this partition.
    +  private Broker leader;
    +
    +  public SimpleConsumerManager(String seedBrokers, String topic, int 
partition) throws IOException {
    +    this.topic = topic;
    +    this.partition = partition;
    +    // Identifier of simpleConsumer.
    +    this.clientId = SimpleConsumerManager.getIdentifier();
    +    this.brokers = SimpleConsumerManager.getBrokerList(seedBrokers);
    +    this.leader = findLeader(topic, partition);
    +    // consumer creation fail.
    +    if (null == leader) {
    +      throw new IOException("consumer creation fail");
    +    } else {
    +      consumer = new SimpleConsumer(leader.host(), leader.port(), 
CONSUMER_TIMEOUT, CONSUMER_BUFFER_SIZE, clientId);
    +    }
    +  }
    +
    +  /**
    +   * Create SimpleConsumer instance. seedBrokers is connection info of 
kafka
    +   * brokers. ex) localhost:9092,localhost:9091 topic is topic name. 
partition
    +   * is partition id.
    +   * 
    +   * @param seedBrokers
    +   * @param topic
    +   * @param partition
    +   * @return
    +   * @throws IOException
    +   */
    +  static public SimpleConsumerManager getSimpleConsumerManager(String 
seedBrokers, String topic, int partition)
    +      throws IOException {
    +    return new SimpleConsumerManager(seedBrokers, topic, partition);
    +  }
    +
    +  /**
    +   * Return partition ID list of specific topic. Check for seedBrokers.
    +   * seedBrokers is kafka brokers.
    +   * 
    +   * @param seedBrokers
    +   * @param topic
    +   * @return
    +   * @throws IOException
    +   */
    +  static public Set<Integer> getPartitions(String seedBrokers, String 
topic) throws IOException {
    +    Set<Integer> partitions = new HashSet<Integer>();
    +    for (InetSocketAddress seed : 
SimpleConsumerManager.getBrokerList(seedBrokers)) {
    +      SimpleConsumer consumer = null;
    +      try {
    +        consumer = new SimpleConsumer(seed.getHostName(), seed.getPort(), 
CONSUMER_TIMEOUT, CONSUMER_BUFFER_SIZE,
    +            SimpleConsumerManager.getIdentifier() + "partitionLookup");
    +        List<String> topics = new ArrayList<String>();
    +        topics.add(topic);
    +        TopicMetadataRequest req = new TopicMetadataRequest(topics);
    +        kafka.javaapi.TopicMetadataResponse resp = consumer.send(req);
    +        // call to topicsMetadata() asks the Broker you are connected to 
for all
    +        // the details about the topic we are interested in
    +        List<TopicMetadata> metaData = resp.topicsMetadata();
    +        // loop on partitionsMetadata iterates through all the partitions 
until
    +        // we find the one we want.
    +        for (TopicMetadata item : metaData) {
    +          for (PartitionMetadata part : item.partitionsMetadata()) {
    +            partitions.add(part.partitionId());
    +          }
    +        }
    +      } catch (Exception e) {
    +        LOG.error(e.getMessage(), e);
    +        throw new IOException(e);
    +      } finally {
    +        if (consumer != null)
    +          consumer.close();
    +      }
    +    }
    +    return partitions;
    +  }
    +
    +  static private List<InetSocketAddress> getBrokerList(String brokers) {
    +    List<InetSocketAddress> brokerList = new 
ArrayList<InetSocketAddress>();
    +    for (String broker : brokers.split(",")) {
    +      brokerList.add(NetUtils.createUnresolved(broker));
    +    }
    +    return brokerList;
    +  }
    +
    +  /**
    +   * Create identifier for SimpleConsumer. The SimpleConsumer connects at 
kafka
    +   * using this identifier.
    +   * 
    +   * @return
    +   */
    +  static private String getIdentifier() {
    --- End diff --
    
    Is it possible to duplicate other identifiers?


> Kafka Consumer for kafka strage.
> --------------------------------
>
>                 Key: TAJO-1480
>                 URL: https://issues.apache.org/jira/browse/TAJO-1480
>             Project: Tajo
>          Issue Type: Sub-task
>          Components: storage
>            Reporter: YeonSu Han
>            Assignee: YeonSu Han
>              Labels: kafka_storage
>
> Scanner of kafka storage needs to implement a Kafka Consumer for to fetch 
> data from kafka.
> The Kafka Consumer have methods like this,
> - getPartitions(): Get partition id list from specific topic.
> - fetch(): Fetch data from kafka.
> - getReadOffset(): Get offset from specific partition.
> - findLeader(): Find leader broker of specific partition.
> - etc..



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to