DRILL-3423: Adding HTTPd Log Parsing functionality including full pushdown, 
type remapping and wildcard support.
 Pushed through the requested columns for push down to the parser. Added more 
tests to cover a few more use cases. Ensured that user query fields are now 
completely consistent with returned values.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/46c0f2a4
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/46c0f2a4
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/46c0f2a4

Branch: refs/heads/master
Commit: 46c0f2a4135450417dfebf52f11538f8926fd467
Parents: 818f945
Author: Jim Scott <[email protected]>
Authored: Tue Nov 3 15:43:54 2015 -0600
Committer: Parth Chandra <[email protected]>
Committed: Tue Nov 1 10:42:22 2016 -0700

----------------------------------------------------------------------
 exec/java-exec/pom.xml                          |   3 +-
 .../exec/store/dfs/easy/EasyFormatPlugin.java   |   1 +
 .../exec/store/httpd/HttpdLogFormatPlugin.java  | 246 +++++++++++++++
 .../drill/exec/store/httpd/HttpdLogRecord.java  | 299 +++++++++++++++++++
 .../drill/exec/store/httpd/HttpdParser.java     | 171 +++++++++++
 .../resources/bootstrap-storage-plugins.json    |   5 +-
 .../java/org/apache/drill/BaseTestQuery.java    |  76 ++++-
 .../drill/exec/store/httpd/HttpdParserTest.java |  48 +++
 .../store/httpd/TestHttpdLogFormatPlugin.java   |  97 ++++++
 .../drill/exec/store/text/TestTextColumn.java   | 103 ++-----
 .../resources/bootstrap-storage-plugins.json    |   4 +-
 .../resources/store/httpd/dfs-bootstrap.httpd   |   2 +
 .../store/httpd/dfs-test-bootstrap-test.httpd   |   5 +
 pom.xml                                         |   1 +
 14 files changed, 976 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/pom.xml
----------------------------------------------------------------------
diff --git a/exec/java-exec/pom.xml b/exec/java-exec/pom.xml
index e9b07be..6bf77c0 100644
--- a/exec/java-exec/pom.xml
+++ b/exec/java-exec/pom.xml
@@ -456,9 +456,8 @@
     <dependency>
       <groupId>nl.basjes.parse.httpdlog</groupId>
       <artifactId>httpdlog-parser</artifactId>
-      <version>2.1.1</version>
+      <version>2.4</version>
     </dependency>
-        
   </dependencies>
 
   <profiles>

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
index c7f9cf8..c09e009 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java
@@ -117,6 +117,7 @@ public abstract class EasyFormatPlugin<T extends 
FormatPluginConfig> implements
 
   /** Method indicates whether or not this format could also be in a 
compression container (for example: csv.gz versus csv).
    * If this format uses its own internal compression scheme, such as Parquet 
does, then this should return false.
+   * @return <code>true</code> if it is compressible
    */
   public boolean isCompressible() {
     return compressible;

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatPlugin.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatPlugin.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatPlugin.java
new file mode 100644
index 0000000..7a83d45
--- /dev/null
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatPlugin.java
@@ -0,0 +1,246 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding 
copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the "License"); you may not 
use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.drill.exec.store.httpd;
+
+import java.io.IOException;
+import java.util.List;
+
+import nl.basjes.parse.core.exceptions.DissectionFailure;
+import nl.basjes.parse.core.exceptions.InvalidDissectorException;
+import nl.basjes.parse.core.exceptions.MissingDissectorsException;
+
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.logical.FormatPluginConfig;
+import org.apache.drill.common.logical.StoragePluginConfig;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.ops.OperatorContext;
+import org.apache.drill.exec.physical.impl.OutputMutator;
+import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.store.AbstractRecordReader;
+import org.apache.drill.exec.store.RecordWriter;
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
+import org.apache.drill.exec.store.dfs.easy.EasyWriter;
+import org.apache.drill.exec.store.dfs.easy.FileWork;
+import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.LineRecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import java.util.Map;
+import org.apache.drill.exec.store.RecordReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HttpdLogFormatPlugin extends 
EasyFormatPlugin<HttpdLogFormatPlugin.HttpdLogFormatConfig> {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(HttpdLogFormatPlugin.class);
+  private static final String PLUGIN_EXTENSION = "httpd";
+  private static final int VECTOR_MEMORY_ALLOCATION = 4095;
+
+  public HttpdLogFormatPlugin(final String name, final DrillbitContext 
context, final Configuration fsConf,
+      final StoragePluginConfig storageConfig, final HttpdLogFormatConfig 
formatConfig) {
+
+    super(name, context, fsConf, storageConfig, formatConfig, true, false, 
true, true,
+        Lists.newArrayList(PLUGIN_EXTENSION), PLUGIN_EXTENSION);
+  }
+
+  /**
+   * This class is a POJO to hold the configuration for the HttpdLogFormat 
Parser. This is automatically
+   * serialized/deserialized from JSON format.
+   */
+  @JsonTypeName(PLUGIN_EXTENSION)
+  public static class HttpdLogFormatConfig implements FormatPluginConfig {
+
+    private String logFormat;
+    private String timestampFormat;
+
+    /**
+     * @return the logFormat
+     */
+    public String getLogFormat() {
+      return logFormat;
+    }
+
+    /**
+     * @return the timestampFormat
+     */
+    public String getTimestampFormat() {
+      return timestampFormat;
+    }
+  }
+
+  /**
+   * This class performs the work for the plugin. This is where all logic goes 
to read records. In this case httpd logs
+   * are lines terminated with a new line character.
+   */
+  private class HttpdLogRecordReader extends AbstractRecordReader {
+
+    private final DrillFileSystem fs;
+    private final FileWork work;
+    private final FragmentContext fragmentContext;
+    private ComplexWriter writer;
+    private HttpdParser parser;
+    private LineRecordReader lineReader;
+    private LongWritable lineNumber;
+
+    public HttpdLogRecordReader(final FragmentContext context, final 
DrillFileSystem fs, final FileWork work, final List<SchemaPath> columns) {
+      this.fs = fs;
+      this.work = work;
+      this.fragmentContext = context;
+      setColumns(columns);
+    }
+
+    /**
+     * The query fields passed in are formatted in a way that Drill requires. 
Those must be cleaned up to work with the
+     * parser.
+     *
+     * @return Map<DrillFieldNames, ParserFieldNames>
+     */
+    private Map<String, String> makeParserFields() {
+      final Map<String, String> fieldMapping = Maps.newHashMap();
+      for (final SchemaPath sp : getColumns()) {
+        final String drillField = sp.getRootSegment().getPath();
+        final String parserField = 
HttpdParser.parserFormattedFieldName(drillField);
+        fieldMapping.put(drillField, parserField);
+      }
+      return fieldMapping;
+    }
+
+    @Override
+    public void setup(final OperatorContext context, final OutputMutator 
output) throws ExecutionSetupException {
+      try {
+        /**
+         * Extract the list of field names for the parser to use if it is NOT 
a star query. If it is a star query just
+         * pass through an empty map, because the parser is going to have to 
build all possibilities.
+         */
+        final Map<String, String> fieldMapping = !isStarQuery() ? 
makeParserFields() : null;
+        writer = new VectorContainerWriter(output);
+        parser = new HttpdParser(writer.rootAsMap(), 
context.getManagedBuffer(),
+            HttpdLogFormatPlugin.this.getConfig().getLogFormat(),
+            HttpdLogFormatPlugin.this.getConfig().getTimestampFormat(),
+            fieldMapping);
+
+        final Path path = fs.makeQualified(new Path(work.getPath()));
+        FileSplit split = new FileSplit(path, work.getStart(), 
work.getLength(), new String[]{""});
+        TextInputFormat inputFormat = new TextInputFormat();
+        JobConf job = new JobConf(fs.getConf());
+        job.setInt("io.file.buffer.size", 
fragmentContext.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE));
+        job.setInputFormat(inputFormat.getClass());
+        lineReader = (LineRecordReader) inputFormat.getRecordReader(split, 
job, Reporter.NULL);
+        lineNumber = lineReader.createKey();
+      }
+      catch (NoSuchMethodException | MissingDissectorsException | 
InvalidDissectorException e) {
+        throw handleAndGenerate("Failure creating HttpdParser", e);
+      }
+      catch (IOException e) {
+        throw handleAndGenerate("Failure creating HttpdRecordReader", e);
+      }
+    }
+
+    private RuntimeException handleAndGenerate(final String s, final Exception 
e) {
+      throw UserException.dataReadError(e)
+          .message(s + "\n%s", e.getMessage())
+          .addContext("Path", work.getPath())
+          .addContext("Split Start", work.getStart())
+          .addContext("Split Length", work.getLength())
+          .addContext("Local Line Number", lineNumber.get())
+          .build(LOG);
+    }
+
+    /**
+     * This record reader is given a batch of records (lines) to read. Next 
acts upon a batch of records.
+     *
+     * @return Number of records in this batch.
+     */
+    @Override
+    public int next() {
+      try {
+        final Text line = lineReader.createValue();
+
+        writer.allocate();
+        writer.reset();
+
+        int recordCount = 0;
+        while (recordCount < VECTOR_MEMORY_ALLOCATION && 
lineReader.next(lineNumber, line)) {
+          writer.setPosition(recordCount);
+          parser.parse(line.toString());
+          recordCount++;
+        }
+        writer.setValueCount(recordCount);
+
+        return recordCount;
+      }
+      catch (DissectionFailure | InvalidDissectorException | 
MissingDissectorsException | IOException e) {
+        throw handleAndGenerate("Failure while parsing log record.", e);
+      }
+    }
+
+    @Override
+    public void close() throws Exception {
+      try {
+        if (lineReader != null) {
+          lineReader.close();
+        }
+      }
+      catch (IOException e) {
+        LOG.warn("Failure while closing Httpd reader.", e);
+      }
+    }
+
+  }
+
+  /**
+   * This plugin supports pushing down into the parser. Only fields 
specifically asked for within the configuration will
+   * be parsed. If no fields are asked for then all possible fields will be 
returned.
+   *
+   * @return true
+   */
+  @Override
+  public boolean supportsPushDown() {
+    return true;
+  }
+
+  @Override
+  public RecordReader getRecordReader(final FragmentContext context, final 
DrillFileSystem dfs, final FileWork fileWork, final List<SchemaPath> columns, 
final String userName) throws ExecutionSetupException {
+    return new HttpdLogRecordReader(context, dfs, fileWork, columns);
+  }
+
+  @Override
+  public RecordWriter getRecordWriter(final FragmentContext context, final 
EasyWriter writer) throws IOException {
+    throw new UnsupportedOperationException("Drill doesn't currently support 
writing HTTPd logs");
+  }
+
+  @Override
+  public int getReaderOperatorType() {
+    return -1;
+  }
+
+  @Override
+  public int getWriterOperatorType() {
+    return -1;
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogRecord.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogRecord.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogRecord.java
new file mode 100644
index 0000000..03f70c1
--- /dev/null
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogRecord.java
@@ -0,0 +1,299 @@
+/*
+ * Copyright 2015 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.httpd;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Maps;
+import io.netty.buffer.DrillBuf;
+import java.util.EnumSet;
+import java.util.Map;
+import nl.basjes.parse.core.Casts;
+import nl.basjes.parse.core.Parser;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.drill.exec.vector.complex.writer.BigIntWriter;
+import org.apache.drill.exec.vector.complex.writer.Float8Writer;
+import org.apache.drill.exec.vector.complex.writer.VarCharWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HttpdLogRecord {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(HttpdLogRecord.class);
+  private final Map<String, VarCharWriter> strings = Maps.newHashMap();
+  private final Map<String, BigIntWriter> longs = Maps.newHashMap();
+  private final Map<String, Float8Writer> doubles = Maps.newHashMap();
+  private final Map<String, MapWriter> wildcards = Maps.newHashMap();
+  private final Map<String, String> cleanExtensions = Maps.newHashMap();
+  private final Map<String, MapWriter> startedWildcards = Maps.newHashMap();
+  private final Map<String, MapWriter> wildcardWriters = Maps.newHashMap();
+  private DrillBuf managedBuffer;
+
+  public HttpdLogRecord(final DrillBuf managedBuffer) {
+    this.managedBuffer = managedBuffer;
+  }
+
+  /**
+   * Call this method after a record has been parsed. This finished the 
lifecycle of any maps that were written and
+   * removes all the entries for the next record to be able to work.
+   */
+  public void finishRecord() {
+    for (MapWriter writer : wildcardWriters.values()) {
+      writer.end();
+    }
+    wildcardWriters.clear();
+    startedWildcards.clear();
+  }
+
+  private DrillBuf buf(final int size) {
+    if (managedBuffer.capacity() < size) {
+      managedBuffer = managedBuffer.reallocIfNeeded(size);
+    }
+    return managedBuffer;
+  }
+
+  private void writeString(final VarCharWriter writer, final String value) {
+    final byte[] stringBytes = value.getBytes(Charsets.UTF_8);
+    final DrillBuf stringBuffer = buf(stringBytes.length);
+    stringBuffer.clear();
+    stringBuffer.writeBytes(stringBytes);
+    writer.writeVarChar(0, stringBytes.length, stringBuffer);
+  }
+
+  /**
+   * This method is referenced and called via reflection. This is added as a 
parsing target for the parser. It will get
+   * called when the value of a log field is a String data type.
+   *
+   * @param field name of field
+   * @param value value of field
+   */
+  @SuppressWarnings("unused")
+  public void set(final String field, final String value) {
+    if (value != null) {
+      final VarCharWriter w = strings.get(field);
+      if (w != null) {
+        LOG.trace("Parsed field: {}, as string: {}", field, value);
+        writeString(w, value);
+      }
+      else {
+        LOG.warn("No 'string' writer found for field: {}", field);
+      }
+    }
+  }
+
+  /**
+   * This method is referenced and called via reflection. This is added as a 
parsing target for the parser. It will get
+   * called when the value of a log field is a Long data type.
+   *
+   * @param field name of field
+   * @param value value of field
+   */
+  @SuppressWarnings("unused")
+  public void set(final String field, final Long value) {
+    if (value != null) {
+      final BigIntWriter w = longs.get(field);
+      if (w != null) {
+        LOG.trace("Parsed field: {}, as long: {}", field, value);
+        w.writeBigInt(value);
+      }
+      else {
+        LOG.warn("No 'long' writer found for field: {}", field);
+      }
+    }
+  }
+
+  /**
+   * This method is referenced and called via reflection. This is added as a 
parsing target for the parser. It will get
+   * called when the value of a log field is a Double data type.
+   *
+   * @param field name of field
+   * @param value value of field
+   */
+  @SuppressWarnings("unused")
+  public void set(final String field, final Double value) {
+    if (value != null) {
+      final Float8Writer w = doubles.get(field);
+      if (w != null) {
+        LOG.trace("Parsed field: {}, as double: {}", field, value);
+        w.writeFloat8(value);
+      }
+      else {
+        LOG.warn("No 'double' writer found for field: {}", field);
+      }
+    }
+  }
+
+  /**
+   * This method is referenced and called via reflection. When the parser 
processes a field like:
+   * HTTP.URI:request.firstline.uri.query.* where star is an arbitrary field 
that the parser found this method will be
+   * invoked. <br>
+   *
+   * @param field name of field
+   * @param value value of field
+   */
+  @SuppressWarnings("unused")
+  public void setWildcard(final String field, final String value) {
+    if (value != null) {
+      final MapWriter mapWriter = getWildcardWriter(field);
+      LOG.trace("Parsed wildcard field: {}, as string: {}", field, value);
+      final VarCharWriter w = mapWriter.varChar(cleanExtensions.get(field));
+      writeString(w, value);
+    }
+  }
+
+  /**
+   * This method is referenced and called via reflection. When the parser 
processes a field like:
+   * HTTP.URI:request.firstline.uri.query.* where star is an arbitrary field 
that the parser found this method will be
+   * invoked. <br>
+   *
+   * @param field name of field
+   * @param value value of field
+   */
+  @SuppressWarnings("unused")
+  public void setWildcard(final String field, final Long value) {
+    if (value != null) {
+      final MapWriter mapWriter = getWildcardWriter(field);
+      LOG.trace("Parsed wildcard field: {}, as long: {}", field, value);
+      final BigIntWriter w = mapWriter.bigInt(cleanExtensions.get(field));
+      w.writeBigInt(value);
+    }
+  }
+
+  /**
+   * This method is referenced and called via reflection. When the parser 
processes a field like:
+   * HTTP.URI:request.firstline.uri.query.* where star is an arbitrary field 
that the parser found this method will be
+   * invoked. <br>
+   *
+   * @param field name of field
+   * @param value value of field
+   */
+  @SuppressWarnings("unused")
+  public void setWildcard(final String field, final Double value) {
+    if (value != null) {
+      final MapWriter mapWriter = getWildcardWriter(field);
+      LOG.trace("Parsed wildcard field: {}, as double: {}", field, value);
+      final Float8Writer w = mapWriter.float8(cleanExtensions.get(field));
+      w.writeFloat8(value);
+    }
+  }
+
+  /**
+   * For a configuration like HTTP.URI:request.firstline.uri.query.*, a writer 
was created with name
+   * HTTP.URI:request.firstline.uri.query, we traverse the list of wildcard 
writers to see which one is the root of the
+   * name of the field passed in like 
HTTP.URI:request.firstline.uri.query.old. This is writer entry that is needed.
+   *
+   * @param field like HTTP.URI:request.firstline.uri.query.old where 'old' is 
one of many different parameter names.
+   * @return the writer to be used for this field.
+   */
+  private MapWriter getWildcardWriter(final String field) {
+    MapWriter writer = startedWildcards.get(field);
+    if (writer == null) {
+      for (Map.Entry<String, MapWriter> entry : wildcards.entrySet()) {
+        final String root = entry.getKey();
+        if (field.startsWith(root)) {
+          writer = entry.getValue();
+
+          /**
+           * In order to save some time, store the cleaned version of the 
field extension. It is possible it will have
+           * unsafe characters in it.
+           */
+          if (!cleanExtensions.containsKey(field)) {
+            final String extension = field.substring(root.length() + 1, 
field.length());
+            final String cleanExtension = 
HttpdParser.drillFormattedFieldName(extension);
+            cleanExtensions.put(field, cleanExtension);
+            LOG.debug("Added extension: field='{}' with cleanExtension='{}'", 
field, cleanExtension);
+          }
+
+          /**
+           * We already know we have the writer, but if we have put this 
writer in the started list, do NOT call start
+           * again.
+           */
+          if (!wildcardWriters.containsKey(root)) {
+            /**
+             * Start and store this root map writer for later retrieval.
+             */
+            LOG.debug("Starting new wildcard field writer: {}", field);
+            writer.start();
+            startedWildcards.put(field, writer);
+            wildcardWriters.put(root, writer);
+          }
+
+          /**
+           * Break out of the for loop when we find a root writer that matches 
the field.
+           */
+          break;
+        }
+      }
+    }
+
+    return writer;
+  }
+
+  public Map<String, VarCharWriter> getStrings() {
+    return strings;
+  }
+
+  public Map<String, BigIntWriter> getLongs() {
+    return longs;
+  }
+
+  public Map<String, Float8Writer> getDoubles() {
+    return doubles;
+  }
+
+  /**
+   * This record will be used with a single parser. For each field that is to 
be parsed a setter will be called. It
+   * registers a setter method for each field being parsed. It also builds the 
data writers to hold the data beings
+   * parsed.
+   *
+   * @param parser
+   * @param mapWriter
+   * @param type
+   * @param parserFieldName
+   * @param drillFieldName
+   * @throws NoSuchMethodException
+   */
+  public void addField(final Parser<HttpdLogRecord> parser, final MapWriter 
mapWriter, final EnumSet<Casts> type, final String parserFieldName, final 
String drillFieldName) throws NoSuchMethodException {
+    final boolean hasWildcard = 
parserFieldName.endsWith(HttpdParser.PARSER_WILDCARD);
+
+    /**
+     * This is a dynamic way to map the setter for each specified field type. 
<br/>
+     * e.g. a TIME.STAMP may map to a LONG while a referrer may map to a STRING
+     */
+    if (hasWildcard) {
+      final String cleanName = parserFieldName.substring(0, 
parserFieldName.length() - HttpdParser.PARSER_WILDCARD.length());
+      LOG.debug("Adding WILDCARD parse target: {} as {}, with field name: {}", 
parserFieldName, cleanName, drillFieldName);
+      parser.addParseTarget(this.getClass().getMethod("setWildcard", 
String.class, String.class), parserFieldName);
+      parser.addParseTarget(this.getClass().getMethod("setWildcard", 
String.class, Double.class), parserFieldName);
+      parser.addParseTarget(this.getClass().getMethod("setWildcard", 
String.class, Long.class), parserFieldName);
+      wildcards.put(cleanName, mapWriter.map(drillFieldName));
+    }
+    else if (type.contains(Casts.DOUBLE)) {
+      LOG.debug("Adding DOUBLE parse target: {}, with field name: {}", 
parserFieldName, drillFieldName);
+      parser.addParseTarget(this.getClass().getMethod("set", String.class, 
Double.class), parserFieldName);
+      doubles.put(parserFieldName, mapWriter.float8(drillFieldName));
+    }
+    else if (type.contains(Casts.LONG)) {
+      LOG.debug("Adding LONG parse target: {}, with field name: {}", 
parserFieldName, drillFieldName);
+      parser.addParseTarget(this.getClass().getMethod("set", String.class, 
Long.class), parserFieldName);
+      longs.put(parserFieldName, mapWriter.bigInt(drillFieldName));
+    }
+    else {
+      LOG.debug("Adding STRING parse target: {}, with field name: {}", 
parserFieldName, drillFieldName);
+      parser.addParseTarget(this.getClass().getMethod("set", String.class, 
String.class), parserFieldName);
+      strings.put(parserFieldName, mapWriter.varChar(drillFieldName));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdParser.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdParser.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdParser.java
new file mode 100644
index 0000000..114a7f4
--- /dev/null
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdParser.java
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2015 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.httpd;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+import io.netty.buffer.DrillBuf;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Map;
+import nl.basjes.parse.core.Casts;
+import nl.basjes.parse.core.Parser;
+import nl.basjes.parse.core.exceptions.DissectionFailure;
+import nl.basjes.parse.core.exceptions.InvalidDissectorException;
+import nl.basjes.parse.core.exceptions.MissingDissectorsException;
+import nl.basjes.parse.httpdlog.HttpdLoglineParser;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HttpdParser {
+
+  private static final Logger LOG = LoggerFactory.getLogger(HttpdParser.class);
+  public static final String PARSER_WILDCARD = ".*";
+  public static final String SAFE_WILDCARD = "_$";
+  public static final String SAFE_SEPARATOR = "_";
+  public static final String REMAPPING_FLAG = "#";
+  private final Parser<HttpdLogRecord> parser;
+  private final HttpdLogRecord record;
+
+  public HttpdParser(final MapWriter mapWriter, final DrillBuf managedBuffer, 
final String logFormat,
+      final String timestampFormat, final Map<String, String> fieldMapping)
+      throws NoSuchMethodException, MissingDissectorsException, 
InvalidDissectorException {
+
+    Preconditions.checkArgument(logFormat != null && 
!logFormat.trim().isEmpty(), "logFormat cannot be null or empty");
+
+    this.record = new HttpdLogRecord(managedBuffer);
+    this.parser = new HttpdLoglineParser<>(HttpdLogRecord.class, logFormat, 
timestampFormat);
+
+    setupParser(mapWriter, logFormat, fieldMapping);
+
+    if (timestampFormat != null && !timestampFormat.trim().isEmpty()) {
+      LOG.info("Custom timestamp format has been specified. This is an 
informational note only as custom timestamps is rather unusual.");
+    }
+    if (logFormat.contains("\n")) {
+      LOG.info("Specified logformat is a multiline log format: {}", logFormat);
+    }
+  }
+
+  /**
+   * We do not expose the underlying parser or the record which is used to 
manage the writers.
+   *
+   * @param line log line to tear apart.
+   *
+   * @throws DissectionFailure
+   * @throws InvalidDissectorException
+   * @throws MissingDissectorsException
+   */
+  public void parse(final String line) throws DissectionFailure, 
InvalidDissectorException, MissingDissectorsException {
+    parser.parse(record, line);
+    record.finishRecord();
+  }
+
+  /**
+   * In order to define a type remapping the format of the field configuration 
will look like: <br/>
+   * HTTP.URI:request.firstline.uri.query.[parameter name] <br/>
+   *
+   * @param parser Add type remapping to this parser instance.
+   * @param fieldName request.firstline.uri.query.[parameter_name]
+   * @param fieldType HTTP.URI, etc..
+   */
+  private void addTypeRemapping(final Parser<HttpdLogRecord> parser, final 
String fieldName, final String fieldType) {
+    LOG.debug("Adding type remapping - fieldName: {}, fieldType: {}", 
fieldName, fieldType);
+    parser.addTypeRemapping(fieldName, fieldType);
+  }
+
+  /**
+   * The parser deals with dots unlike Drill wanting underscores 
request_referer. For the sake of simplicity we are
+   * going replace the dots. The resultant output field will look like: 
request.referer.<br>
+   * Additionally, wild cards will get replaced with .*
+   *
+   * @param drillFieldName name to be cleansed.
+   * @return
+   */
+  public static String parserFormattedFieldName(final String drillFieldName) {
+    return drillFieldName.replace(SAFE_WILDCARD, 
PARSER_WILDCARD).replaceAll(SAFE_SEPARATOR, ".").replaceAll("\\.\\.", "_");
+  }
+
+  /**
+   * Drill cannot deal with fields with dots in them like request.referer. For 
the sake of simplicity we are going
+   * ensure the field name is cleansed. The resultant output field will look 
like: request_referer.<br>
+   * Additionally, wild cards will get replaced with _$
+   *
+   * @param parserFieldName name to be cleansed.
+   * @return
+   */
+  public static String drillFormattedFieldName(final String parserFieldName) {
+    return parserFieldName.replaceAll("_", "__").replace(PARSER_WILDCARD, 
SAFE_WILDCARD).replaceAll("\\.", SAFE_SEPARATOR);
+  }
+
+  private void setupParser(final MapWriter mapWriter, final String logFormat, 
final Map<String, String> fieldMapping)
+      throws NoSuchMethodException, MissingDissectorsException, 
InvalidDissectorException {
+
+    /**
+     * If the user has selected fields, then we will use them to configure the 
parser because this would be the most
+     * efficient way to parse the log.
+     */
+    final Map<String, String> requestedPaths;
+    final List<String> allParserPaths = parser.getPossiblePaths();
+    if (fieldMapping != null && !fieldMapping.isEmpty()) {
+      LOG.debug("Using fields defined by user.");
+      requestedPaths = fieldMapping;
+    }
+    else {
+      /**
+       * Use all possible paths that the parser has determined from the 
specified log format.
+       */
+      LOG.debug("No fields defined by user, defaulting to all possible 
fields.");
+      requestedPaths = Maps.newHashMap();
+      for (final String parserPath : allParserPaths) {
+        requestedPaths.put(drillFormattedFieldName(parserPath), parserPath);
+      }
+    }
+
+    /**
+     * By adding the parse target to the dummy instance we activate it for 
use. Which we can then use to find out which
+     * paths cast to which native data types. After we are done figuring this 
information out, we throw this away
+     * because this will be the slowest parsing path possible for the 
specified format.
+     */
+    Parser<Object> dummy = new HttpdLoglineParser<>(Object.class, logFormat);
+    dummy.addParseTarget(String.class.getMethod("indexOf", String.class), 
allParserPaths);
+
+    for (final Map.Entry<String, String> entry : requestedPaths.entrySet()) {
+      final EnumSet<Casts> casts;
+
+      /**
+       * Check the field specified by the user to see if it is supposed to be 
remapped.
+       */
+      if (entry.getValue().startsWith(REMAPPING_FLAG)) {
+        /**
+         * Because this field is being remapped we need to replace the field 
name that the parser uses.
+         */
+        entry.setValue(entry.getValue().substring(REMAPPING_FLAG.length()));
+
+        final String[] pieces = entry.getValue().split(":");
+        addTypeRemapping(parser, pieces[1], pieces[0]);
+
+        casts = Casts.STRING_ONLY;
+      }
+      else {
+        casts = dummy.getCasts(entry.getValue());
+      }
+
+      LOG.debug("Setting up drill field: {}, parser field: {}, which casts as: 
{}", entry.getKey(), entry.getValue(), casts);
+      record.addField(parser, mapWriter, casts, entry.getValue(), 
entry.getKey());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json 
b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json
index dab4ada..b5485d2 100644
--- a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json
+++ b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json
@@ -29,9 +29,10 @@
           extensions: [ "tsv" ],
           delimiter: "\t"
         },
-        "httpd" :{
+        "httpd" : {
           type: "httpd",
-          format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" 
\"%{User-Agent}i\" \"%{Cookie}i\""
+          logFormat: "%h %t \"%r\" %>s %b \"%{Referer}i\""
+          /* timestampFormat: "dd/MMM/yyyy:HH:mm:ss ZZ" */
         },
         "parquet" : {
           type: "parquet"

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java 
b/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java
index e528d0e..93916e9 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java
@@ -18,6 +18,7 @@
 package org.apache.drill;
 
 import static org.hamcrest.core.StringContains.containsString;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
@@ -65,6 +66,10 @@ import org.junit.runner.Description;
 import com.google.common.base.Charsets;
 import com.google.common.base.Preconditions;
 import com.google.common.io.Resources;
+import java.util.ArrayList;
+import java.util.Arrays;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.vector.ValueVector;
 
 public class BaseTestQuery extends ExecTest {
   private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(BaseTestQuery.class);
@@ -302,7 +307,7 @@ public class BaseTestQuery extends ExecTest {
       Preconditions.checkArgument(query instanceof String, "Expected a string 
as input query");
       query = QueryTestUtil.normalizeQuery((String)query);
       return client.runQuery(type, (String)query);
-    }
+  }
   }
 
   public static List<QueryDataBatch> 
testPreparedStatement(PreparedStatementHandle handle) throws Exception {
@@ -384,9 +389,9 @@ public class BaseTestQuery extends ExecTest {
       } catch (AssertionError e) {
         e.addSuppressed(actualException);
         throw e;
-      }
     }
   }
+  }
 
   /**
    * Utility method which tests given query produces a {@link UserException}
@@ -501,4 +506,69 @@ public class BaseTestQuery extends ExecTest {
 
     return formattedResults.toString();
   }
-}
+
+
+  public class TestResultSet {
+
+    private final List<List<String>> rows;
+
+    public TestResultSet() {
+      rows = new ArrayList<>();
+    }
+
+    public TestResultSet(List<QueryDataBatch> batches) throws 
SchemaChangeException {
+      rows = new ArrayList<>();
+      convert(batches);
+    }
+
+    public void addRow(String... cells) {
+      List<String> newRow = Arrays.asList(cells);
+      rows.add(newRow);
+    }
+
+    public int size() {
+      return rows.size();
+    }
+
+    @Override public boolean equals(Object o) {
+      boolean result = false;
+
+      if (this == o) {
+        result = true;
+      } else if (o instanceof TestResultSet) {
+        TestResultSet that = (TestResultSet) o;
+        assertEquals(this.size(), that.size());
+        for (int i = 0; i < this.rows.size(); i++) {
+          assertEquals(this.rows.get(i).size(), that.rows.get(i).size());
+          for (int j = 0; j < this.rows.get(i).size(); ++j) {
+            assertEquals(this.rows.get(i).get(j), that.rows.get(i).get(j));
+          }
+        }
+        result = true;
+      }
+
+      return result;
+    }
+
+    private void convert(List<QueryDataBatch> batches) throws 
SchemaChangeException {
+      RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
+      for (QueryDataBatch batch : batches) {
+        int rc = batch.getHeader().getRowCount();
+        if (batch.getData() != null) {
+          loader.load(batch.getHeader().getDef(), batch.getData());
+          for (int i = 0; i < rc; ++i) {
+            List<String> newRow = new ArrayList<>();
+            rows.add(newRow);
+            for (VectorWrapper<?> vw : loader) {
+              ValueVector.Accessor accessor = 
vw.getValueVector().getAccessor();
+              Object o = accessor.getObject(i);
+              newRow.add(o == null ? null : o.toString());
+            }
+          }
+        }
+        loader.clear();
+        batch.release();
+      }
+    }
+  }
+ }

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/HttpdParserTest.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/HttpdParserTest.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/HttpdParserTest.java
new file mode 100644
index 0000000..961d9a6
--- /dev/null
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/HttpdParserTest.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2015 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.httpd;
+
+import io.netty.buffer.DrillBuf;
+import java.util.Map;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HttpdParserTest {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(HttpdParserTest.class);
+
+  private void runTest(String logFormat, String logLine) throws Exception {
+    MapWriter mapWriter = null;
+    DrillBuf managedBuffer = null;
+    Map<String, String> configuredFields = null;
+    HttpdParser parser = new HttpdParser(mapWriter, managedBuffer, logFormat, 
null, configuredFields);
+    parser.parse(logLine);
+  }
+
+//  @Test
+  public void testFirstPattern() throws Exception {
+    LOG.info("testFirstPattern");
+//    final String format = "common";
+//    final String format = "%h %l %u %t \"%r\" %>s %b";
+    final String format = "%h %t \"%r\" %>s %b \"%{Referer}i\"";
+    final String line = "127.0.0.1 [31/Dec/2012:23:49:41 +0100] "
+        + "\"GET /foo HTTP/1.1\" 200 "
+        + "1213 \"http://localhost/index.php?mies=wim\"";;
+    runTest(format, line);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdLogFormatPlugin.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdLogFormatPlugin.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdLogFormatPlugin.java
new file mode 100644
index 0000000..6c2eaf8
--- /dev/null
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdLogFormatPlugin.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding 
copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the "License"); you may not 
use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.drill.exec.store.httpd;
+
+import java.util.List;
+import org.apache.drill.BaseTestQuery;
+import org.apache.drill.exec.rpc.user.QueryDataBatch;
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestHttpdLogFormatPlugin extends BaseTestQuery {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestHttpdLogFormatPlugin.class);
+
+  /**
+   * This test covers the test bootstrap-storage-plugins.json section of httpd.
+   *
+   * Indirectly this validates the HttpdLogFormatPlugin.HttpdLogFormatConfig 
deserializing properly.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testDfsTestBootstrap_star() throws Exception {
+    test("select * from 
dfs_test.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd`");
+  }
+
+  /**
+   * This test covers the test bootstrap-storage-plugins.json section of httpd.
+   *
+   * Indirectly this validates the HttpdLogFormatPlugin.HttpdLogFormatConfig 
deserializing properly.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testDfsTestBootstrap_notstar() throws Exception {
+    test("select `TIME_STAMP:request_receive_time`, 
`HTTP_METHOD:request_firstline_method`, `STRING:request_status_last`, 
`BYTES:response_body_bytesclf` \n"
+        + "from 
dfs_test.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd`");
+  }
+
+  /**
+   * This test covers the main bootstrap-storage-plugins.json section of httpd.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testDfsBootstrap_star() throws Exception {
+    test("select * from 
dfs.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-bootstrap.httpd`");
+  }
+
+  /**
+   * This test covers the main bootstrap-storage-plugins.json section of httpd.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testDfsBootstrap_wildcard() throws Exception {
+    test("select `STRING:request_referer_query_$` from 
dfs.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-bootstrap.httpd`");
+  }
+
+  /**
+   * This test covers the main bootstrap-storage-plugins.json section of httpd.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testDfsBootstrap_underscore() throws Exception {
+    test("select `TIME_DAY:request_receive_time_day__utc` from 
dfs.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-bootstrap.httpd`");
+  }
+
+  @Test
+  public void testGroupBy_1() throws Exception {
+    final List<QueryDataBatch> actualResults = testSqlWithResults(
+        "select `HTTP_METHOD:request_firstline_method` as http_method, 
`STRING:request_status_last` as status_code, 
sum(`BYTES:response_body_bytesclf`) as total_bytes \n"
+        + "from 
dfs_test.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd`\n"
+        + "group by `HTTP_METHOD:request_firstline_method`, 
`STRING:request_status_last`"
+    );
+
+    final TestResultSet expectedResultSet = new TestResultSet();
+    expectedResultSet.addRow("GET", "200", "46551");
+    expectedResultSet.addRow("POST", "302", "18186");
+
+    TestResultSet actualResultSet = new TestResultSet(actualResults);
+    assertTrue(expectedResultSet.equals(actualResultSet));
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java
index 882033a..1ff6818 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java
@@ -1,107 +1,58 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license agreements. See the NOTICE
+ * file distributed with this work for additional information regarding 
copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the "License"); you may not 
use this file except in compliance with the
+ * License. You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
  */
 package org.apache.drill.exec.store.text;
 
-import static org.junit.Assert.assertEquals;
-
-import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 import org.apache.drill.BaseTestQuery;
-import org.apache.drill.exec.exception.SchemaChangeException;
-import org.apache.drill.exec.record.RecordBatchLoader;
-import org.apache.drill.exec.record.VectorWrapper;
 import org.apache.drill.exec.rpc.user.QueryDataBatch;
-import org.apache.drill.exec.vector.ValueVector;
 import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestTextColumn extends BaseTestQuery {
 
-public class TestTextColumn extends BaseTestQuery{
   static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(TestTextColumn.class);
 
   @Test
-  public void testCsvColumnSelection() throws Exception{
+  public void testCsvColumnSelection() throws Exception {
     test("select columns[0] as region_id, columns[1] as country from 
dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/regions.csv`");
   }
 
   @Test
   public void testDefaultDelimiterColumnSelection() throws Exception {
-    List<QueryDataBatch> batches = testSqlWithResults("SELECT columns[0] as 
entire_row " +
-      "from 
dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/letters.txt`");
+    List<QueryDataBatch> actualResults = testSqlWithResults("SELECT columns[0] 
as entire_row "
+        + "from 
dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/letters.txt`");
 
-    List<List<String>> expectedOutput = Arrays.asList(
-      Arrays.asList("a, b,\",\"c\",\"d,, \\n e"),
-      Arrays.asList("d, e,\",\"f\",\"g,, \\n h"),
-      Arrays.asList("g, h,\",\"i\",\"j,, \\n k"));
+    final TestResultSet expectedResultSet = new TestResultSet();
+    expectedResultSet.addRow("a, b,\",\"c\",\"d,, \\n e");
+    expectedResultSet.addRow("d, e,\",\"f\",\"g,, \\n h");
+    expectedResultSet.addRow("g, h,\",\"i\",\"j,, \\n k");
 
-    List<List<String>> actualOutput = getOutput(batches);
-    System.out.println(actualOutput);
-    validateOutput(expectedOutput, actualOutput);
+    TestResultSet actualResultSet = new TestResultSet(actualResults);
+    assertTrue(expectedResultSet.equals(actualResultSet));
   }
 
   @Test
   public void testCsvColumnSelectionCommasInsideQuotes() throws Exception {
-    List<QueryDataBatch> batches = testSqlWithResults("SELECT columns[0] as 
col1, columns[1] as col2, columns[2] as col3," +
-      "columns[3] as col4 from 
dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/letters.csv`");
+    List<QueryDataBatch> actualResults = testSqlWithResults("SELECT columns[0] 
as col1, columns[1] as col2, columns[2] as col3,"
+        + "columns[3] as col4 from 
dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/letters.csv`");
 
-    List<List<String>> expectedOutput = Arrays.asList(
-      Arrays.asList("a, b,", "c", "d,, \\n e","f\\\"g"),
-      Arrays.asList("d, e,", "f", "g,, \\n h","i\\\"j"),
-      Arrays.asList("g, h,", "i", "j,, \\n k","l\\\"m"));
-
-    List<List<String>> actualOutput = getOutput(batches);
-    validateOutput(expectedOutput, actualOutput);
-  }
+    final TestResultSet expectedResultSet = new TestResultSet();
+    expectedResultSet.addRow("a, b,", "c", "d,, \\n e", "f\\\"g");
+    expectedResultSet.addRow("d, e,", "f", "g,, \\n h", "i\\\"j");
+    expectedResultSet.addRow("g, h,", "i", "j,, \\n k", "l\\\"m");
 
-  private List<List<String>> getOutput(List<QueryDataBatch> batches) throws 
SchemaChangeException {
-    List<List<String>> output = new ArrayList<>();
-    RecordBatchLoader loader = new RecordBatchLoader(getAllocator());
-    int last = 0;
-    for(QueryDataBatch batch : batches) {
-      int rows = batch.getHeader().getRowCount();
-      if(batch.getData() != null) {
-        loader.load(batch.getHeader().getDef(), batch.getData());
-        // TODO:  Clean:  DRILL-2933:  That load(...) no longer throws
-        // SchemaChangeException, so check/clean throws clause above.
-        for (int i = 0; i < rows; ++i) {
-          output.add(new ArrayList<String>());
-          for (VectorWrapper<?> vw: loader) {
-            ValueVector.Accessor accessor = vw.getValueVector().getAccessor();
-            Object o = accessor.getObject(i);
-            output.get(last).add(o == null ? null: o.toString());
-          }
-          ++last;
-        }
-      }
-      loader.clear();
-      batch.release();
-    }
-    return output;
+    TestResultSet actualResultSet = new TestResultSet(actualResults);
+    assertTrue(expectedResultSet.equals(actualResultSet));
   }
-
-  private void validateOutput(List<List<String>> expected, List<List<String>> 
actual) {
-    assertEquals(expected.size(), actual.size());
-    for (int i = 0 ; i < expected.size(); ++i) {
-      assertEquals(expected.get(i).size(), actual.get(i).size());
-      for (int j = 0; j < expected.get(i).size(); ++j) {
-        assertEquals(expected.get(i).get(j), actual.get(i).get(j));
-      }
-    }
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json 
b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
index 630db6b..7b977e2 100644
--- a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
+++ b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
@@ -40,9 +40,9 @@
         "json" : {
           type: "json"
         },
-        "httpd" :{
+        "httpd" : {
           type: "httpd",
-          format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" 
\"%{User-Agent}i\" \"%{Cookie}i\""
+          logFormat: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" 
\"%{User-agent}i\""
         },
         "txt" : {
           type : "text",

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/resources/store/httpd/dfs-bootstrap.httpd
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/httpd/dfs-bootstrap.httpd 
b/exec/java-exec/src/test/resources/store/httpd/dfs-bootstrap.httpd
new file mode 100644
index 0000000..fd12566
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/httpd/dfs-bootstrap.httpd
@@ -0,0 +1,2 @@
+127.0.0.1 [31/Dec/2012:23:49:41 +0100] "GET /foo HTTP/1.1" 200 1213 
"http://localhost/index.php?mies=wim&blue=red";
+127.0.0.1 [31/Dec/2012:23:49:41 +0100] "GET /foo HTTP/1.1" 200 1213 
"http://localhost/index.php?mies=wim&test=true";

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd 
b/exec/java-exec/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd
new file mode 100644
index 0000000..d48fa12
--- /dev/null
+++ 
b/exec/java-exec/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd
@@ -0,0 +1,5 @@
+195.154.46.135 - - [25/Oct/2015:04:11:25 +0100] "GET 
/linux/doing-pxe-without-dhcp-control HTTP/1.1" 200 24323 
"http://howto.basjes.nl/"; "Mozilla/5.0 (Windows NT 5.1; rv:35.0) Gecko/20100101 
Firefox/35.0"
+23.95.237.180 - - [25/Oct/2015:04:11:26 +0100] "GET /join_form HTTP/1.0" 200 
11114 "http://howto.basjes.nl/"; "Mozilla/5.0 (Windows NT 5.1; rv:35.0) 
Gecko/20100101 Firefox/35.0"
+23.95.237.180 - - [25/Oct/2015:04:11:27 +0100] "POST /join_form HTTP/1.1" 302 
9093 "http://howto.basjes.nl/join_form"; "Mozilla/5.0 (Windows NT 5.1; rv:35.0) 
Gecko/20100101 Firefox/35.0"
+158.222.5.157 - - [25/Oct/2015:04:24:31 +0100] "GET /join_form HTTP/1.0" 200 
11114 "http://howto.basjes.nl/"; "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) 
Gecko/20100101 Firefox/34.0 AlexaToolbar/alxf-2.21"
+158.222.5.157 - - [25/Oct/2015:04:24:32 +0100] "POST /join_form HTTP/1.1" 302 
9093 "http://howto.basjes.nl/join_form"; "Mozilla/5.0 (Windows NT 6.3; WOW64; 
rv:34.0) Gecko/20100101 Firefox/34.0 AlexaToolbar/alxf-2.21"

http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index dcb1b3d..e094c4b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -223,6 +223,7 @@
             <exclude>**/*.linux</exclude>
             <exclude>**/client/build/**</exclude>
             <exclude>**/*.tbl</exclude>
+            <exclude>**/*.httpd</exclude>
             <!-- TODO DRILL-4336: try to avoid the need to add this -->
             <exclude>dependency-reduced-pom.xml</exclude>
           </excludes>

Reply via email to