[GitHub] drill pull request #1027: DRILL-4779 : Kafka storage plugin

2017-11-21 Thread kameshb
Github user kameshb commented on a diff in the pull request:

https://github.com/apache/drill/pull/1027#discussion_r152310859
  
--- Diff: 
contrib/storage-kafka/src/test/java/org/apache/drill/exec/store/kafka/TestKafkaSuit.java
 ---
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.kafka;
+
+import java.util.Properties;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.I0Itec.zkclient.ZkClient;
+import org.I0Itec.zkclient.ZkConnection;
+import org.apache.drill.exec.store.kafka.cluster.EmbeddedKafkaCluster;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.runner.RunWith;
+import org.junit.runners.Suite;
+import org.junit.runners.Suite.SuiteClasses;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Joiner;
+
+import kafka.admin.AdminUtils;
+import kafka.admin.RackAwareMode;
+import kafka.utils.ZKStringSerializer$;
+import kafka.utils.ZkUtils;
+
+@RunWith(Suite.class)
+@SuiteClasses({ KafkaQueriesTest.class, MessageIteratorTest.class })
+public class TestKafkaSuit {
--- End diff --

Thanks for suggesting this frameworks.  I think this may take considerable 
amount of time and since release date is very close, we will address this 
refactoring by next release.


---


[GitHub] drill pull request #1027: DRILL-4779 : Kafka storage plugin

2017-11-21 Thread kameshb
Github user kameshb commented on a diff in the pull request:

https://github.com/apache/drill/pull/1027#discussion_r152303056
  
--- Diff: 
contrib/storage-kafka/src/test/java/org/apache/drill/exec/store/kafka/decoders/MessageReaderFactoryTest.java
 ---
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.kafka.decoders;
+
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.exec.proto.UserBitShared.DrillPBError.ErrorType;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class MessageReaderFactoryTest {
+
+  @Test
+  public void testShouldThrowExceptionAsMessageReaderIsNull() {
--- End diff --

taken care


---


[GitHub] drill pull request #1027: DRILL-4779 : Kafka storage plugin

2017-11-21 Thread kameshb
Github user kameshb commented on a diff in the pull request:

https://github.com/apache/drill/pull/1027#discussion_r152285907
  
--- Diff: 
contrib/storage-kafka/src/main/java/org/apache/drill/exec/store/kafka/KafkaRecordReader.java
 ---
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.kafka;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.ops.OperatorContext;
+import org.apache.drill.exec.physical.impl.OutputMutator;
+import org.apache.drill.exec.server.options.OptionManager;
+import org.apache.drill.exec.store.AbstractRecordReader;
+import org.apache.drill.exec.store.kafka.KafkaSubScan.KafkaSubScanSpec;
+import org.apache.drill.exec.store.kafka.decoders.MessageReader;
+import org.apache.drill.exec.store.kafka.decoders.MessageReaderFactory;
+import org.apache.drill.exec.util.Utilities;
+import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+public class KafkaRecordReader extends AbstractRecordReader {
+  private static final Logger logger = 
LoggerFactory.getLogger(KafkaRecordReader.class);
+  public static final long DEFAULT_MESSAGES_PER_BATCH = 4000;
+
+  private VectorContainerWriter writer;
+  private MessageReader messageReader;
+
+  private final boolean unionEnabled;
+  private final KafkaStoragePlugin plugin;
+  private final KafkaSubScanSpec subScanSpec;
+  private final long kafkaPollTimeOut;
+
+  private long currentOffset;
+  private MessageIterator msgItr;
+
+  private final boolean enableAllTextMode;
+  private final boolean readNumbersAsDouble;
+  private final String kafkaMsgReader;
+
+  public KafkaRecordReader(KafkaSubScan.KafkaSubScanSpec subScanSpec, 
List projectedColumns,
+  FragmentContext context, KafkaStoragePlugin plugin) {
+setColumns(projectedColumns);
+this.enableAllTextMode = 
context.getOptions().getOption(ExecConstants.KAFKA_ALL_TEXT_MODE).bool_val;
+this.readNumbersAsDouble = context.getOptions()
+
.getOption(ExecConstants.KAFKA_READER_READ_NUMBERS_AS_DOUBLE).bool_val;
+OptionManager options = context.getOptions();
+this.unionEnabled = options.getOption(ExecConstants.ENABLE_UNION_TYPE);
+this.kafkaMsgReader = 
options.getOption(ExecConstants.KAFKA_RECORD_READER).string_val;
+this.kafkaPollTimeOut = 
options.getOption(ExecConstants.KAFKA_POLL_TIMEOUT).num_val;
+this.plugin = plugin;
+this.subScanSpec = subScanSpec;
+  }
+
+  @Override
+  protected Collection transformColumns(Collection 
projectedColumns) {
+Set transformed = Sets.newLinkedHashSet();
+if (!isStarQuery()) {
+  for (SchemaPath column : projectedColumns) {
+transformed.add(column);
+  }
+} else {
+  transformed.add(Utilities.STAR_COLUMN);
+}
+return transformed;
+  }
+
+  @Override
+  public void setup(OperatorContext context, OutputMutator output) throws 
ExecutionSetupException {
+this.writer = new VectorContainerWriter(output, unionEnabled);
+messageReader = MessageReaderFactory.getMessageReader(kafkaMsgReader);
+messageReader.init(context.getManagedBuffer(), 
Lists.newArrayList(getColumns()), this.writer,
+this.enableAllTextMode, this

[GitHub] drill issue #1027: DRILL-4779 : Kafka storage plugin

2017-11-12 Thread kameshb
Github user kameshb commented on the issue:

https://github.com/apache/drill/pull/1027
  
@paul-rogers @arina-ielchiieva @vrozov 
Thanks for reviewing.  Anil & I have addressed review comments. Could you 
please go through the changes and also rest of the Kafka storage codebase.


---


[GitHub] drill pull request #1027: DRILL-4779 : Kafka storage plugin

2017-11-12 Thread kameshb
Github user kameshb commented on a diff in the pull request:

https://github.com/apache/drill/pull/1027#discussion_r150435308
  
--- Diff: 
contrib/storage-kafka/src/main/java/org/apache/drill/exec/store/kafka/KafkaRecordReader.java
 ---
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.kafka;
+
+import static 
org.apache.drill.exec.store.kafka.DrillKafkaConfig.DRILL_KAFKA_POLL_TIMEOUT;
+
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.ops.OperatorContext;
+import org.apache.drill.exec.physical.impl.OutputMutator;
+import org.apache.drill.exec.store.AbstractRecordReader;
+import org.apache.drill.exec.store.kafka.KafkaSubScan.KafkaSubScanSpec;
+import org.apache.drill.exec.store.kafka.decoders.MessageReader;
+import org.apache.drill.exec.store.kafka.decoders.MessageReaderFactory;
+import org.apache.drill.exec.util.Utilities;
+import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.TopicPartition;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+public class KafkaRecordReader extends AbstractRecordReader {
+  private static final Logger logger = 
LoggerFactory.getLogger(KafkaRecordReader.class);
+  public static final long DEFAULT_MESSAGES_PER_BATCH = 4000;
+
+  private VectorContainerWriter writer;
+  private MessageReader messageReader;
+
+  private boolean unionEnabled;
+  private KafkaConsumer<byte[], byte[]> kafkaConsumer;
+  private KafkaStoragePlugin plugin;
+  private KafkaSubScanSpec subScanSpec;
+  private long kafkaPollTimeOut;
+  private long endOffset;
+
+  private long currentOffset;
+  private long totalFetchTime = 0;
+
+  private List partitions;
+  private final boolean enableAllTextMode;
+  private final boolean readNumbersAsDouble;
+
+  private Iterator<ConsumerRecord<byte[], byte[]>> messageIter;
+
+  public KafkaRecordReader(KafkaSubScan.KafkaSubScanSpec subScanSpec, 
List projectedColumns,
+  FragmentContext context, KafkaStoragePlugin plugin) {
+setColumns(projectedColumns);
+this.enableAllTextMode = 
context.getOptions().getOption(ExecConstants.KAFKA_ALL_TEXT_MODE).bool_val;
+this.readNumbersAsDouble = context.getOptions()
+
.getOption(ExecConstants.KAFKA_READER_READ_NUMBERS_AS_DOUBLE).bool_val;
+this.unionEnabled = 
context.getOptions().getOption(ExecConstants.ENABLE_UNION_TYPE);
+this.plugin = plugin;
+this.subScanSpec = subScanSpec;
+this.endOffset = subScanSpec.getEndOffset();
+this.kafkaPollTimeOut = 
Long.valueOf(plugin.getConfig().getDrillKafkaProps().getProperty(DRILL_KAFKA_POLL_TIMEOUT));
+  }
+
+  @Override
+  protected Collection transformColumns(Collection 
projectedColumns) {
+Set transformed = Sets.newLinkedHashSet();
+if (!isStarQuery()) {
+  for (SchemaPath column : projectedColumns) {
+transformed.add(column);
+  }
+} else {
+  transformed.add(Utilities.STAR_COLUMN);
+}
+return transformed;
+  }
+
+  @Override
+  public void setup(OperatorContext context, OutputMutator output) throws 
ExecutionSetupException {
+this.writer = new Vect