DRILL-3423: Adding HTTPd Log Parsing functionality including full pushdown, type remapping and wildcard support. Pushed through the requested columns for push down to the parser. Added more tests to cover a few more use cases. Ensured that user query fields are now completely consistent with returned values.
Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/46c0f2a4 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/46c0f2a4 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/46c0f2a4 Branch: refs/heads/master Commit: 46c0f2a4135450417dfebf52f11538f8926fd467 Parents: 818f945 Author: Jim Scott <[email protected]> Authored: Tue Nov 3 15:43:54 2015 -0600 Committer: Parth Chandra <[email protected]> Committed: Tue Nov 1 10:42:22 2016 -0700 ---------------------------------------------------------------------- exec/java-exec/pom.xml | 3 +- .../exec/store/dfs/easy/EasyFormatPlugin.java | 1 + .../exec/store/httpd/HttpdLogFormatPlugin.java | 246 +++++++++++++++ .../drill/exec/store/httpd/HttpdLogRecord.java | 299 +++++++++++++++++++ .../drill/exec/store/httpd/HttpdParser.java | 171 +++++++++++ .../resources/bootstrap-storage-plugins.json | 5 +- .../java/org/apache/drill/BaseTestQuery.java | 76 ++++- .../drill/exec/store/httpd/HttpdParserTest.java | 48 +++ .../store/httpd/TestHttpdLogFormatPlugin.java | 97 ++++++ .../drill/exec/store/text/TestTextColumn.java | 103 ++----- .../resources/bootstrap-storage-plugins.json | 4 +- .../resources/store/httpd/dfs-bootstrap.httpd | 2 + .../store/httpd/dfs-test-bootstrap-test.httpd | 5 + pom.xml | 1 + 14 files changed, 976 insertions(+), 85 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/pom.xml ---------------------------------------------------------------------- diff --git a/exec/java-exec/pom.xml b/exec/java-exec/pom.xml index e9b07be..6bf77c0 100644 --- a/exec/java-exec/pom.xml +++ b/exec/java-exec/pom.xml @@ -456,9 +456,8 @@ <dependency> <groupId>nl.basjes.parse.httpdlog</groupId> <artifactId>httpdlog-parser</artifactId> - <version>2.1.1</version> + <version>2.4</version> </dependency> - </dependencies> <profiles> http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java index c7f9cf8..c09e009 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java @@ -117,6 +117,7 @@ public abstract class EasyFormatPlugin<T extends FormatPluginConfig> implements /** Method indicates whether or not this format could also be in a compression container (for example: csv.gz versus csv). * If this format uses its own internal compression scheme, such as Parquet does, then this should return false. + * @return <code>true</code> if it is compressible */ public boolean isCompressible() { return compressible; http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatPlugin.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatPlugin.java new file mode 100644 index 0000000..7a83d45 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogFormatPlugin.java @@ -0,0 +1,246 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ +package org.apache.drill.exec.store.httpd; + +import java.io.IOException; +import java.util.List; + +import nl.basjes.parse.core.exceptions.DissectionFailure; +import nl.basjes.parse.core.exceptions.InvalidDissectorException; +import nl.basjes.parse.core.exceptions.MissingDissectorsException; + +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.logical.FormatPluginConfig; +import org.apache.drill.common.logical.StoragePluginConfig; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.ops.OperatorContext; +import org.apache.drill.exec.physical.impl.OutputMutator; +import org.apache.drill.exec.server.DrillbitContext; +import org.apache.drill.exec.store.AbstractRecordReader; +import org.apache.drill.exec.store.RecordWriter; +import org.apache.drill.exec.store.dfs.DrillFileSystem; +import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin; +import org.apache.drill.exec.store.dfs.easy.EasyWriter; +import org.apache.drill.exec.store.dfs.easy.FileWork; +import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter; +import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.LineRecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TextInputFormat; + +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import java.util.Map; +import org.apache.drill.exec.store.RecordReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HttpdLogFormatPlugin extends EasyFormatPlugin<HttpdLogFormatPlugin.HttpdLogFormatConfig> { + + private static final Logger LOG = LoggerFactory.getLogger(HttpdLogFormatPlugin.class); + private static final String PLUGIN_EXTENSION = "httpd"; + private static final int VECTOR_MEMORY_ALLOCATION = 4095; + + public HttpdLogFormatPlugin(final String name, final DrillbitContext context, final Configuration fsConf, + final StoragePluginConfig storageConfig, final HttpdLogFormatConfig formatConfig) { + + super(name, context, fsConf, storageConfig, formatConfig, true, false, true, true, + Lists.newArrayList(PLUGIN_EXTENSION), PLUGIN_EXTENSION); + } + + /** + * This class is a POJO to hold the configuration for the HttpdLogFormat Parser. This is automatically + * serialized/deserialized from JSON format. + */ + @JsonTypeName(PLUGIN_EXTENSION) + public static class HttpdLogFormatConfig implements FormatPluginConfig { + + private String logFormat; + private String timestampFormat; + + /** + * @return the logFormat + */ + public String getLogFormat() { + return logFormat; + } + + /** + * @return the timestampFormat + */ + public String getTimestampFormat() { + return timestampFormat; + } + } + + /** + * This class performs the work for the plugin. This is where all logic goes to read records. In this case httpd logs + * are lines terminated with a new line character. + */ + private class HttpdLogRecordReader extends AbstractRecordReader { + + private final DrillFileSystem fs; + private final FileWork work; + private final FragmentContext fragmentContext; + private ComplexWriter writer; + private HttpdParser parser; + private LineRecordReader lineReader; + private LongWritable lineNumber; + + public HttpdLogRecordReader(final FragmentContext context, final DrillFileSystem fs, final FileWork work, final List<SchemaPath> columns) { + this.fs = fs; + this.work = work; + this.fragmentContext = context; + setColumns(columns); + } + + /** + * The query fields passed in are formatted in a way that Drill requires. Those must be cleaned up to work with the + * parser. + * + * @return Map<DrillFieldNames, ParserFieldNames> + */ + private Map<String, String> makeParserFields() { + final Map<String, String> fieldMapping = Maps.newHashMap(); + for (final SchemaPath sp : getColumns()) { + final String drillField = sp.getRootSegment().getPath(); + final String parserField = HttpdParser.parserFormattedFieldName(drillField); + fieldMapping.put(drillField, parserField); + } + return fieldMapping; + } + + @Override + public void setup(final OperatorContext context, final OutputMutator output) throws ExecutionSetupException { + try { + /** + * Extract the list of field names for the parser to use if it is NOT a star query. If it is a star query just + * pass through an empty map, because the parser is going to have to build all possibilities. + */ + final Map<String, String> fieldMapping = !isStarQuery() ? makeParserFields() : null; + writer = new VectorContainerWriter(output); + parser = new HttpdParser(writer.rootAsMap(), context.getManagedBuffer(), + HttpdLogFormatPlugin.this.getConfig().getLogFormat(), + HttpdLogFormatPlugin.this.getConfig().getTimestampFormat(), + fieldMapping); + + final Path path = fs.makeQualified(new Path(work.getPath())); + FileSplit split = new FileSplit(path, work.getStart(), work.getLength(), new String[]{""}); + TextInputFormat inputFormat = new TextInputFormat(); + JobConf job = new JobConf(fs.getConf()); + job.setInt("io.file.buffer.size", fragmentContext.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE)); + job.setInputFormat(inputFormat.getClass()); + lineReader = (LineRecordReader) inputFormat.getRecordReader(split, job, Reporter.NULL); + lineNumber = lineReader.createKey(); + } + catch (NoSuchMethodException | MissingDissectorsException | InvalidDissectorException e) { + throw handleAndGenerate("Failure creating HttpdParser", e); + } + catch (IOException e) { + throw handleAndGenerate("Failure creating HttpdRecordReader", e); + } + } + + private RuntimeException handleAndGenerate(final String s, final Exception e) { + throw UserException.dataReadError(e) + .message(s + "\n%s", e.getMessage()) + .addContext("Path", work.getPath()) + .addContext("Split Start", work.getStart()) + .addContext("Split Length", work.getLength()) + .addContext("Local Line Number", lineNumber.get()) + .build(LOG); + } + + /** + * This record reader is given a batch of records (lines) to read. Next acts upon a batch of records. + * + * @return Number of records in this batch. + */ + @Override + public int next() { + try { + final Text line = lineReader.createValue(); + + writer.allocate(); + writer.reset(); + + int recordCount = 0; + while (recordCount < VECTOR_MEMORY_ALLOCATION && lineReader.next(lineNumber, line)) { + writer.setPosition(recordCount); + parser.parse(line.toString()); + recordCount++; + } + writer.setValueCount(recordCount); + + return recordCount; + } + catch (DissectionFailure | InvalidDissectorException | MissingDissectorsException | IOException e) { + throw handleAndGenerate("Failure while parsing log record.", e); + } + } + + @Override + public void close() throws Exception { + try { + if (lineReader != null) { + lineReader.close(); + } + } + catch (IOException e) { + LOG.warn("Failure while closing Httpd reader.", e); + } + } + + } + + /** + * This plugin supports pushing down into the parser. Only fields specifically asked for within the configuration will + * be parsed. If no fields are asked for then all possible fields will be returned. + * + * @return true + */ + @Override + public boolean supportsPushDown() { + return true; + } + + @Override + public RecordReader getRecordReader(final FragmentContext context, final DrillFileSystem dfs, final FileWork fileWork, final List<SchemaPath> columns, final String userName) throws ExecutionSetupException { + return new HttpdLogRecordReader(context, dfs, fileWork, columns); + } + + @Override + public RecordWriter getRecordWriter(final FragmentContext context, final EasyWriter writer) throws IOException { + throw new UnsupportedOperationException("Drill doesn't currently support writing HTTPd logs"); + } + + @Override + public int getReaderOperatorType() { + return -1; + } + + @Override + public int getWriterOperatorType() { + return -1; + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogRecord.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogRecord.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogRecord.java new file mode 100644 index 0000000..03f70c1 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdLogRecord.java @@ -0,0 +1,299 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.httpd; + +import com.google.common.base.Charsets; +import com.google.common.collect.Maps; +import io.netty.buffer.DrillBuf; +import java.util.EnumSet; +import java.util.Map; +import nl.basjes.parse.core.Casts; +import nl.basjes.parse.core.Parser; +import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter; +import org.apache.drill.exec.vector.complex.writer.BigIntWriter; +import org.apache.drill.exec.vector.complex.writer.Float8Writer; +import org.apache.drill.exec.vector.complex.writer.VarCharWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HttpdLogRecord { + + private static final Logger LOG = LoggerFactory.getLogger(HttpdLogRecord.class); + private final Map<String, VarCharWriter> strings = Maps.newHashMap(); + private final Map<String, BigIntWriter> longs = Maps.newHashMap(); + private final Map<String, Float8Writer> doubles = Maps.newHashMap(); + private final Map<String, MapWriter> wildcards = Maps.newHashMap(); + private final Map<String, String> cleanExtensions = Maps.newHashMap(); + private final Map<String, MapWriter> startedWildcards = Maps.newHashMap(); + private final Map<String, MapWriter> wildcardWriters = Maps.newHashMap(); + private DrillBuf managedBuffer; + + public HttpdLogRecord(final DrillBuf managedBuffer) { + this.managedBuffer = managedBuffer; + } + + /** + * Call this method after a record has been parsed. This finished the lifecycle of any maps that were written and + * removes all the entries for the next record to be able to work. + */ + public void finishRecord() { + for (MapWriter writer : wildcardWriters.values()) { + writer.end(); + } + wildcardWriters.clear(); + startedWildcards.clear(); + } + + private DrillBuf buf(final int size) { + if (managedBuffer.capacity() < size) { + managedBuffer = managedBuffer.reallocIfNeeded(size); + } + return managedBuffer; + } + + private void writeString(final VarCharWriter writer, final String value) { + final byte[] stringBytes = value.getBytes(Charsets.UTF_8); + final DrillBuf stringBuffer = buf(stringBytes.length); + stringBuffer.clear(); + stringBuffer.writeBytes(stringBytes); + writer.writeVarChar(0, stringBytes.length, stringBuffer); + } + + /** + * This method is referenced and called via reflection. This is added as a parsing target for the parser. It will get + * called when the value of a log field is a String data type. + * + * @param field name of field + * @param value value of field + */ + @SuppressWarnings("unused") + public void set(final String field, final String value) { + if (value != null) { + final VarCharWriter w = strings.get(field); + if (w != null) { + LOG.trace("Parsed field: {}, as string: {}", field, value); + writeString(w, value); + } + else { + LOG.warn("No 'string' writer found for field: {}", field); + } + } + } + + /** + * This method is referenced and called via reflection. This is added as a parsing target for the parser. It will get + * called when the value of a log field is a Long data type. + * + * @param field name of field + * @param value value of field + */ + @SuppressWarnings("unused") + public void set(final String field, final Long value) { + if (value != null) { + final BigIntWriter w = longs.get(field); + if (w != null) { + LOG.trace("Parsed field: {}, as long: {}", field, value); + w.writeBigInt(value); + } + else { + LOG.warn("No 'long' writer found for field: {}", field); + } + } + } + + /** + * This method is referenced and called via reflection. This is added as a parsing target for the parser. It will get + * called when the value of a log field is a Double data type. + * + * @param field name of field + * @param value value of field + */ + @SuppressWarnings("unused") + public void set(final String field, final Double value) { + if (value != null) { + final Float8Writer w = doubles.get(field); + if (w != null) { + LOG.trace("Parsed field: {}, as double: {}", field, value); + w.writeFloat8(value); + } + else { + LOG.warn("No 'double' writer found for field: {}", field); + } + } + } + + /** + * This method is referenced and called via reflection. When the parser processes a field like: + * HTTP.URI:request.firstline.uri.query.* where star is an arbitrary field that the parser found this method will be + * invoked. <br> + * + * @param field name of field + * @param value value of field + */ + @SuppressWarnings("unused") + public void setWildcard(final String field, final String value) { + if (value != null) { + final MapWriter mapWriter = getWildcardWriter(field); + LOG.trace("Parsed wildcard field: {}, as string: {}", field, value); + final VarCharWriter w = mapWriter.varChar(cleanExtensions.get(field)); + writeString(w, value); + } + } + + /** + * This method is referenced and called via reflection. When the parser processes a field like: + * HTTP.URI:request.firstline.uri.query.* where star is an arbitrary field that the parser found this method will be + * invoked. <br> + * + * @param field name of field + * @param value value of field + */ + @SuppressWarnings("unused") + public void setWildcard(final String field, final Long value) { + if (value != null) { + final MapWriter mapWriter = getWildcardWriter(field); + LOG.trace("Parsed wildcard field: {}, as long: {}", field, value); + final BigIntWriter w = mapWriter.bigInt(cleanExtensions.get(field)); + w.writeBigInt(value); + } + } + + /** + * This method is referenced and called via reflection. When the parser processes a field like: + * HTTP.URI:request.firstline.uri.query.* where star is an arbitrary field that the parser found this method will be + * invoked. <br> + * + * @param field name of field + * @param value value of field + */ + @SuppressWarnings("unused") + public void setWildcard(final String field, final Double value) { + if (value != null) { + final MapWriter mapWriter = getWildcardWriter(field); + LOG.trace("Parsed wildcard field: {}, as double: {}", field, value); + final Float8Writer w = mapWriter.float8(cleanExtensions.get(field)); + w.writeFloat8(value); + } + } + + /** + * For a configuration like HTTP.URI:request.firstline.uri.query.*, a writer was created with name + * HTTP.URI:request.firstline.uri.query, we traverse the list of wildcard writers to see which one is the root of the + * name of the field passed in like HTTP.URI:request.firstline.uri.query.old. This is writer entry that is needed. + * + * @param field like HTTP.URI:request.firstline.uri.query.old where 'old' is one of many different parameter names. + * @return the writer to be used for this field. + */ + private MapWriter getWildcardWriter(final String field) { + MapWriter writer = startedWildcards.get(field); + if (writer == null) { + for (Map.Entry<String, MapWriter> entry : wildcards.entrySet()) { + final String root = entry.getKey(); + if (field.startsWith(root)) { + writer = entry.getValue(); + + /** + * In order to save some time, store the cleaned version of the field extension. It is possible it will have + * unsafe characters in it. + */ + if (!cleanExtensions.containsKey(field)) { + final String extension = field.substring(root.length() + 1, field.length()); + final String cleanExtension = HttpdParser.drillFormattedFieldName(extension); + cleanExtensions.put(field, cleanExtension); + LOG.debug("Added extension: field='{}' with cleanExtension='{}'", field, cleanExtension); + } + + /** + * We already know we have the writer, but if we have put this writer in the started list, do NOT call start + * again. + */ + if (!wildcardWriters.containsKey(root)) { + /** + * Start and store this root map writer for later retrieval. + */ + LOG.debug("Starting new wildcard field writer: {}", field); + writer.start(); + startedWildcards.put(field, writer); + wildcardWriters.put(root, writer); + } + + /** + * Break out of the for loop when we find a root writer that matches the field. + */ + break; + } + } + } + + return writer; + } + + public Map<String, VarCharWriter> getStrings() { + return strings; + } + + public Map<String, BigIntWriter> getLongs() { + return longs; + } + + public Map<String, Float8Writer> getDoubles() { + return doubles; + } + + /** + * This record will be used with a single parser. For each field that is to be parsed a setter will be called. It + * registers a setter method for each field being parsed. It also builds the data writers to hold the data beings + * parsed. + * + * @param parser + * @param mapWriter + * @param type + * @param parserFieldName + * @param drillFieldName + * @throws NoSuchMethodException + */ + public void addField(final Parser<HttpdLogRecord> parser, final MapWriter mapWriter, final EnumSet<Casts> type, final String parserFieldName, final String drillFieldName) throws NoSuchMethodException { + final boolean hasWildcard = parserFieldName.endsWith(HttpdParser.PARSER_WILDCARD); + + /** + * This is a dynamic way to map the setter for each specified field type. <br/> + * e.g. a TIME.STAMP may map to a LONG while a referrer may map to a STRING + */ + if (hasWildcard) { + final String cleanName = parserFieldName.substring(0, parserFieldName.length() - HttpdParser.PARSER_WILDCARD.length()); + LOG.debug("Adding WILDCARD parse target: {} as {}, with field name: {}", parserFieldName, cleanName, drillFieldName); + parser.addParseTarget(this.getClass().getMethod("setWildcard", String.class, String.class), parserFieldName); + parser.addParseTarget(this.getClass().getMethod("setWildcard", String.class, Double.class), parserFieldName); + parser.addParseTarget(this.getClass().getMethod("setWildcard", String.class, Long.class), parserFieldName); + wildcards.put(cleanName, mapWriter.map(drillFieldName)); + } + else if (type.contains(Casts.DOUBLE)) { + LOG.debug("Adding DOUBLE parse target: {}, with field name: {}", parserFieldName, drillFieldName); + parser.addParseTarget(this.getClass().getMethod("set", String.class, Double.class), parserFieldName); + doubles.put(parserFieldName, mapWriter.float8(drillFieldName)); + } + else if (type.contains(Casts.LONG)) { + LOG.debug("Adding LONG parse target: {}, with field name: {}", parserFieldName, drillFieldName); + parser.addParseTarget(this.getClass().getMethod("set", String.class, Long.class), parserFieldName); + longs.put(parserFieldName, mapWriter.bigInt(drillFieldName)); + } + else { + LOG.debug("Adding STRING parse target: {}, with field name: {}", parserFieldName, drillFieldName); + parser.addParseTarget(this.getClass().getMethod("set", String.class, String.class), parserFieldName); + strings.put(parserFieldName, mapWriter.varChar(drillFieldName)); + } + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdParser.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdParser.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdParser.java new file mode 100644 index 0000000..114a7f4 --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdParser.java @@ -0,0 +1,171 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.httpd; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import io.netty.buffer.DrillBuf; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import nl.basjes.parse.core.Casts; +import nl.basjes.parse.core.Parser; +import nl.basjes.parse.core.exceptions.DissectionFailure; +import nl.basjes.parse.core.exceptions.InvalidDissectorException; +import nl.basjes.parse.core.exceptions.MissingDissectorsException; +import nl.basjes.parse.httpdlog.HttpdLoglineParser; +import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HttpdParser { + + private static final Logger LOG = LoggerFactory.getLogger(HttpdParser.class); + public static final String PARSER_WILDCARD = ".*"; + public static final String SAFE_WILDCARD = "_$"; + public static final String SAFE_SEPARATOR = "_"; + public static final String REMAPPING_FLAG = "#"; + private final Parser<HttpdLogRecord> parser; + private final HttpdLogRecord record; + + public HttpdParser(final MapWriter mapWriter, final DrillBuf managedBuffer, final String logFormat, + final String timestampFormat, final Map<String, String> fieldMapping) + throws NoSuchMethodException, MissingDissectorsException, InvalidDissectorException { + + Preconditions.checkArgument(logFormat != null && !logFormat.trim().isEmpty(), "logFormat cannot be null or empty"); + + this.record = new HttpdLogRecord(managedBuffer); + this.parser = new HttpdLoglineParser<>(HttpdLogRecord.class, logFormat, timestampFormat); + + setupParser(mapWriter, logFormat, fieldMapping); + + if (timestampFormat != null && !timestampFormat.trim().isEmpty()) { + LOG.info("Custom timestamp format has been specified. This is an informational note only as custom timestamps is rather unusual."); + } + if (logFormat.contains("\n")) { + LOG.info("Specified logformat is a multiline log format: {}", logFormat); + } + } + + /** + * We do not expose the underlying parser or the record which is used to manage the writers. + * + * @param line log line to tear apart. + * + * @throws DissectionFailure + * @throws InvalidDissectorException + * @throws MissingDissectorsException + */ + public void parse(final String line) throws DissectionFailure, InvalidDissectorException, MissingDissectorsException { + parser.parse(record, line); + record.finishRecord(); + } + + /** + * In order to define a type remapping the format of the field configuration will look like: <br/> + * HTTP.URI:request.firstline.uri.query.[parameter name] <br/> + * + * @param parser Add type remapping to this parser instance. + * @param fieldName request.firstline.uri.query.[parameter_name] + * @param fieldType HTTP.URI, etc.. + */ + private void addTypeRemapping(final Parser<HttpdLogRecord> parser, final String fieldName, final String fieldType) { + LOG.debug("Adding type remapping - fieldName: {}, fieldType: {}", fieldName, fieldType); + parser.addTypeRemapping(fieldName, fieldType); + } + + /** + * The parser deals with dots unlike Drill wanting underscores request_referer. For the sake of simplicity we are + * going replace the dots. The resultant output field will look like: request.referer.<br> + * Additionally, wild cards will get replaced with .* + * + * @param drillFieldName name to be cleansed. + * @return + */ + public static String parserFormattedFieldName(final String drillFieldName) { + return drillFieldName.replace(SAFE_WILDCARD, PARSER_WILDCARD).replaceAll(SAFE_SEPARATOR, ".").replaceAll("\\.\\.", "_"); + } + + /** + * Drill cannot deal with fields with dots in them like request.referer. For the sake of simplicity we are going + * ensure the field name is cleansed. The resultant output field will look like: request_referer.<br> + * Additionally, wild cards will get replaced with _$ + * + * @param parserFieldName name to be cleansed. + * @return + */ + public static String drillFormattedFieldName(final String parserFieldName) { + return parserFieldName.replaceAll("_", "__").replace(PARSER_WILDCARD, SAFE_WILDCARD).replaceAll("\\.", SAFE_SEPARATOR); + } + + private void setupParser(final MapWriter mapWriter, final String logFormat, final Map<String, String> fieldMapping) + throws NoSuchMethodException, MissingDissectorsException, InvalidDissectorException { + + /** + * If the user has selected fields, then we will use them to configure the parser because this would be the most + * efficient way to parse the log. + */ + final Map<String, String> requestedPaths; + final List<String> allParserPaths = parser.getPossiblePaths(); + if (fieldMapping != null && !fieldMapping.isEmpty()) { + LOG.debug("Using fields defined by user."); + requestedPaths = fieldMapping; + } + else { + /** + * Use all possible paths that the parser has determined from the specified log format. + */ + LOG.debug("No fields defined by user, defaulting to all possible fields."); + requestedPaths = Maps.newHashMap(); + for (final String parserPath : allParserPaths) { + requestedPaths.put(drillFormattedFieldName(parserPath), parserPath); + } + } + + /** + * By adding the parse target to the dummy instance we activate it for use. Which we can then use to find out which + * paths cast to which native data types. After we are done figuring this information out, we throw this away + * because this will be the slowest parsing path possible for the specified format. + */ + Parser<Object> dummy = new HttpdLoglineParser<>(Object.class, logFormat); + dummy.addParseTarget(String.class.getMethod("indexOf", String.class), allParserPaths); + + for (final Map.Entry<String, String> entry : requestedPaths.entrySet()) { + final EnumSet<Casts> casts; + + /** + * Check the field specified by the user to see if it is supposed to be remapped. + */ + if (entry.getValue().startsWith(REMAPPING_FLAG)) { + /** + * Because this field is being remapped we need to replace the field name that the parser uses. + */ + entry.setValue(entry.getValue().substring(REMAPPING_FLAG.length())); + + final String[] pieces = entry.getValue().split(":"); + addTypeRemapping(parser, pieces[1], pieces[0]); + + casts = Casts.STRING_ONLY; + } + else { + casts = dummy.getCasts(entry.getValue()); + } + + LOG.debug("Setting up drill field: {}, parser field: {}, which casts as: {}", entry.getKey(), entry.getValue(), casts); + record.addField(parser, mapWriter, casts, entry.getValue(), entry.getKey()); + } + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json index dab4ada..b5485d2 100644 --- a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json +++ b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json @@ -29,9 +29,10 @@ extensions: [ "tsv" ], delimiter: "\t" }, - "httpd" :{ + "httpd" : { type: "httpd", - format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%{Cookie}i\"" + logFormat: "%h %t \"%r\" %>s %b \"%{Referer}i\"" + /* timestampFormat: "dd/MMM/yyyy:HH:mm:ss ZZ" */ }, "parquet" : { type: "parquet" http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java b/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java index e528d0e..93916e9 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java +++ b/exec/java-exec/src/test/java/org/apache/drill/BaseTestQuery.java @@ -18,6 +18,7 @@ package org.apache.drill; import static org.hamcrest.core.StringContains.containsString; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import static org.junit.Assert.fail; @@ -65,6 +66,10 @@ import org.junit.runner.Description; import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.io.Resources; +import java.util.ArrayList; +import java.util.Arrays; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; public class BaseTestQuery extends ExecTest { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BaseTestQuery.class); @@ -302,7 +307,7 @@ public class BaseTestQuery extends ExecTest { Preconditions.checkArgument(query instanceof String, "Expected a string as input query"); query = QueryTestUtil.normalizeQuery((String)query); return client.runQuery(type, (String)query); - } + } } public static List<QueryDataBatch> testPreparedStatement(PreparedStatementHandle handle) throws Exception { @@ -384,9 +389,9 @@ public class BaseTestQuery extends ExecTest { } catch (AssertionError e) { e.addSuppressed(actualException); throw e; - } } } + } /** * Utility method which tests given query produces a {@link UserException} @@ -501,4 +506,69 @@ public class BaseTestQuery extends ExecTest { return formattedResults.toString(); } -} + + + public class TestResultSet { + + private final List<List<String>> rows; + + public TestResultSet() { + rows = new ArrayList<>(); + } + + public TestResultSet(List<QueryDataBatch> batches) throws SchemaChangeException { + rows = new ArrayList<>(); + convert(batches); + } + + public void addRow(String... cells) { + List<String> newRow = Arrays.asList(cells); + rows.add(newRow); + } + + public int size() { + return rows.size(); + } + + @Override public boolean equals(Object o) { + boolean result = false; + + if (this == o) { + result = true; + } else if (o instanceof TestResultSet) { + TestResultSet that = (TestResultSet) o; + assertEquals(this.size(), that.size()); + for (int i = 0; i < this.rows.size(); i++) { + assertEquals(this.rows.get(i).size(), that.rows.get(i).size()); + for (int j = 0; j < this.rows.get(i).size(); ++j) { + assertEquals(this.rows.get(i).get(j), that.rows.get(i).get(j)); + } + } + result = true; + } + + return result; + } + + private void convert(List<QueryDataBatch> batches) throws SchemaChangeException { + RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); + for (QueryDataBatch batch : batches) { + int rc = batch.getHeader().getRowCount(); + if (batch.getData() != null) { + loader.load(batch.getHeader().getDef(), batch.getData()); + for (int i = 0; i < rc; ++i) { + List<String> newRow = new ArrayList<>(); + rows.add(newRow); + for (VectorWrapper<?> vw : loader) { + ValueVector.Accessor accessor = vw.getValueVector().getAccessor(); + Object o = accessor.getObject(i); + newRow.add(o == null ? null : o.toString()); + } + } + } + loader.clear(); + batch.release(); + } + } + } + } http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/HttpdParserTest.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/HttpdParserTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/HttpdParserTest.java new file mode 100644 index 0000000..961d9a6 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/HttpdParserTest.java @@ -0,0 +1,48 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.httpd; + +import io.netty.buffer.DrillBuf; +import java.util.Map; +import org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HttpdParserTest { + + private static final Logger LOG = LoggerFactory.getLogger(HttpdParserTest.class); + + private void runTest(String logFormat, String logLine) throws Exception { + MapWriter mapWriter = null; + DrillBuf managedBuffer = null; + Map<String, String> configuredFields = null; + HttpdParser parser = new HttpdParser(mapWriter, managedBuffer, logFormat, null, configuredFields); + parser.parse(logLine); + } + +// @Test + public void testFirstPattern() throws Exception { + LOG.info("testFirstPattern"); +// final String format = "common"; +// final String format = "%h %l %u %t \"%r\" %>s %b"; + final String format = "%h %t \"%r\" %>s %b \"%{Referer}i\""; + final String line = "127.0.0.1 [31/Dec/2012:23:49:41 +0100] " + + "\"GET /foo HTTP/1.1\" 200 " + + "1213 \"http://localhost/index.php?mies=wim\""; + runTest(format, line); + } + +} http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdLogFormatPlugin.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdLogFormatPlugin.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdLogFormatPlugin.java new file mode 100644 index 0000000..6c2eaf8 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdLogFormatPlugin.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ +package org.apache.drill.exec.store.httpd; + +import java.util.List; +import org.apache.drill.BaseTestQuery; +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import static org.junit.Assert.*; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestHttpdLogFormatPlugin extends BaseTestQuery { + + private static final Logger LOG = LoggerFactory.getLogger(TestHttpdLogFormatPlugin.class); + + /** + * This test covers the test bootstrap-storage-plugins.json section of httpd. + * + * Indirectly this validates the HttpdLogFormatPlugin.HttpdLogFormatConfig deserializing properly. + * + * @throws Exception + */ + @Test + public void testDfsTestBootstrap_star() throws Exception { + test("select * from dfs_test.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd`"); + } + + /** + * This test covers the test bootstrap-storage-plugins.json section of httpd. + * + * Indirectly this validates the HttpdLogFormatPlugin.HttpdLogFormatConfig deserializing properly. + * + * @throws Exception + */ + @Test + public void testDfsTestBootstrap_notstar() throws Exception { + test("select `TIME_STAMP:request_receive_time`, `HTTP_METHOD:request_firstline_method`, `STRING:request_status_last`, `BYTES:response_body_bytesclf` \n" + + "from dfs_test.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd`"); + } + + /** + * This test covers the main bootstrap-storage-plugins.json section of httpd. + * + * @throws Exception + */ + @Test + public void testDfsBootstrap_star() throws Exception { + test("select * from dfs.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-bootstrap.httpd`"); + } + + /** + * This test covers the main bootstrap-storage-plugins.json section of httpd. + * + * @throws Exception + */ + @Test + public void testDfsBootstrap_wildcard() throws Exception { + test("select `STRING:request_referer_query_$` from dfs.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-bootstrap.httpd`"); + } + + /** + * This test covers the main bootstrap-storage-plugins.json section of httpd. + * + * @throws Exception + */ + @Test + public void testDfsBootstrap_underscore() throws Exception { + test("select `TIME_DAY:request_receive_time_day__utc` from dfs.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-bootstrap.httpd`"); + } + + @Test + public void testGroupBy_1() throws Exception { + final List<QueryDataBatch> actualResults = testSqlWithResults( + "select `HTTP_METHOD:request_firstline_method` as http_method, `STRING:request_status_last` as status_code, sum(`BYTES:response_body_bytesclf`) as total_bytes \n" + + "from dfs_test.`${WORKING_PATH}/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd`\n" + + "group by `HTTP_METHOD:request_firstline_method`, `STRING:request_status_last`" + ); + + final TestResultSet expectedResultSet = new TestResultSet(); + expectedResultSet.addRow("GET", "200", "46551"); + expectedResultSet.addRow("POST", "302", "18186"); + + TestResultSet actualResultSet = new TestResultSet(actualResults); + assertTrue(expectedResultSet.equals(actualResultSet)); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java index 882033a..1ff6818 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestTextColumn.java @@ -1,107 +1,58 @@ /** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the + * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. */ package org.apache.drill.exec.store.text; -import static org.junit.Assert.assertEquals; - -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.apache.drill.BaseTestQuery; -import org.apache.drill.exec.exception.SchemaChangeException; -import org.apache.drill.exec.record.RecordBatchLoader; -import org.apache.drill.exec.record.VectorWrapper; import org.apache.drill.exec.rpc.user.QueryDataBatch; -import org.apache.drill.exec.vector.ValueVector; import org.junit.Test; +import static org.junit.Assert.*; + +public class TestTextColumn extends BaseTestQuery { -public class TestTextColumn extends BaseTestQuery{ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestTextColumn.class); @Test - public void testCsvColumnSelection() throws Exception{ + public void testCsvColumnSelection() throws Exception { test("select columns[0] as region_id, columns[1] as country from dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/regions.csv`"); } @Test public void testDefaultDelimiterColumnSelection() throws Exception { - List<QueryDataBatch> batches = testSqlWithResults("SELECT columns[0] as entire_row " + - "from dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/letters.txt`"); + List<QueryDataBatch> actualResults = testSqlWithResults("SELECT columns[0] as entire_row " + + "from dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/letters.txt`"); - List<List<String>> expectedOutput = Arrays.asList( - Arrays.asList("a, b,\",\"c\",\"d,, \\n e"), - Arrays.asList("d, e,\",\"f\",\"g,, \\n h"), - Arrays.asList("g, h,\",\"i\",\"j,, \\n k")); + final TestResultSet expectedResultSet = new TestResultSet(); + expectedResultSet.addRow("a, b,\",\"c\",\"d,, \\n e"); + expectedResultSet.addRow("d, e,\",\"f\",\"g,, \\n h"); + expectedResultSet.addRow("g, h,\",\"i\",\"j,, \\n k"); - List<List<String>> actualOutput = getOutput(batches); - System.out.println(actualOutput); - validateOutput(expectedOutput, actualOutput); + TestResultSet actualResultSet = new TestResultSet(actualResults); + assertTrue(expectedResultSet.equals(actualResultSet)); } @Test public void testCsvColumnSelectionCommasInsideQuotes() throws Exception { - List<QueryDataBatch> batches = testSqlWithResults("SELECT columns[0] as col1, columns[1] as col2, columns[2] as col3," + - "columns[3] as col4 from dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/letters.csv`"); + List<QueryDataBatch> actualResults = testSqlWithResults("SELECT columns[0] as col1, columns[1] as col2, columns[2] as col3," + + "columns[3] as col4 from dfs_test.`[WORKING_PATH]/src/test/resources/store/text/data/letters.csv`"); - List<List<String>> expectedOutput = Arrays.asList( - Arrays.asList("a, b,", "c", "d,, \\n e","f\\\"g"), - Arrays.asList("d, e,", "f", "g,, \\n h","i\\\"j"), - Arrays.asList("g, h,", "i", "j,, \\n k","l\\\"m")); - - List<List<String>> actualOutput = getOutput(batches); - validateOutput(expectedOutput, actualOutput); - } + final TestResultSet expectedResultSet = new TestResultSet(); + expectedResultSet.addRow("a, b,", "c", "d,, \\n e", "f\\\"g"); + expectedResultSet.addRow("d, e,", "f", "g,, \\n h", "i\\\"j"); + expectedResultSet.addRow("g, h,", "i", "j,, \\n k", "l\\\"m"); - private List<List<String>> getOutput(List<QueryDataBatch> batches) throws SchemaChangeException { - List<List<String>> output = new ArrayList<>(); - RecordBatchLoader loader = new RecordBatchLoader(getAllocator()); - int last = 0; - for(QueryDataBatch batch : batches) { - int rows = batch.getHeader().getRowCount(); - if(batch.getData() != null) { - loader.load(batch.getHeader().getDef(), batch.getData()); - // TODO: Clean: DRILL-2933: That load(...) no longer throws - // SchemaChangeException, so check/clean throws clause above. - for (int i = 0; i < rows; ++i) { - output.add(new ArrayList<String>()); - for (VectorWrapper<?> vw: loader) { - ValueVector.Accessor accessor = vw.getValueVector().getAccessor(); - Object o = accessor.getObject(i); - output.get(last).add(o == null ? null: o.toString()); - } - ++last; - } - } - loader.clear(); - batch.release(); - } - return output; + TestResultSet actualResultSet = new TestResultSet(actualResults); + assertTrue(expectedResultSet.equals(actualResultSet)); } - - private void validateOutput(List<List<String>> expected, List<List<String>> actual) { - assertEquals(expected.size(), actual.size()); - for (int i = 0 ; i < expected.size(); ++i) { - assertEquals(expected.get(i).size(), actual.get(i).size()); - for (int j = 0; j < expected.get(i).size(); ++j) { - assertEquals(expected.get(i).get(j), actual.get(i).get(j)); - } - } - } - } http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json index 630db6b..7b977e2 100644 --- a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json +++ b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json @@ -40,9 +40,9 @@ "json" : { type: "json" }, - "httpd" :{ + "httpd" : { type: "httpd", - format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%{Cookie}i\"" + logFormat: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" }, "txt" : { type : "text", http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/resources/store/httpd/dfs-bootstrap.httpd ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/httpd/dfs-bootstrap.httpd b/exec/java-exec/src/test/resources/store/httpd/dfs-bootstrap.httpd new file mode 100644 index 0000000..fd12566 --- /dev/null +++ b/exec/java-exec/src/test/resources/store/httpd/dfs-bootstrap.httpd @@ -0,0 +1,2 @@ +127.0.0.1 [31/Dec/2012:23:49:41 +0100] "GET /foo HTTP/1.1" 200 1213 "http://localhost/index.php?mies=wim&blue=red" +127.0.0.1 [31/Dec/2012:23:49:41 +0100] "GET /foo HTTP/1.1" 200 1213 "http://localhost/index.php?mies=wim&test=true" http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/exec/java-exec/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd b/exec/java-exec/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd new file mode 100644 index 0000000..d48fa12 --- /dev/null +++ b/exec/java-exec/src/test/resources/store/httpd/dfs-test-bootstrap-test.httpd @@ -0,0 +1,5 @@ +195.154.46.135 - - [25/Oct/2015:04:11:25 +0100] "GET /linux/doing-pxe-without-dhcp-control HTTP/1.1" 200 24323 "http://howto.basjes.nl/" "Mozilla/5.0 (Windows NT 5.1; rv:35.0) Gecko/20100101 Firefox/35.0" +23.95.237.180 - - [25/Oct/2015:04:11:26 +0100] "GET /join_form HTTP/1.0" 200 11114 "http://howto.basjes.nl/" "Mozilla/5.0 (Windows NT 5.1; rv:35.0) Gecko/20100101 Firefox/35.0" +23.95.237.180 - - [25/Oct/2015:04:11:27 +0100] "POST /join_form HTTP/1.1" 302 9093 "http://howto.basjes.nl/join_form" "Mozilla/5.0 (Windows NT 5.1; rv:35.0) Gecko/20100101 Firefox/35.0" +158.222.5.157 - - [25/Oct/2015:04:24:31 +0100] "GET /join_form HTTP/1.0" 200 11114 "http://howto.basjes.nl/" "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0 AlexaToolbar/alxf-2.21" +158.222.5.157 - - [25/Oct/2015:04:24:32 +0100] "POST /join_form HTTP/1.1" 302 9093 "http://howto.basjes.nl/join_form" "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0 AlexaToolbar/alxf-2.21" http://git-wip-us.apache.org/repos/asf/drill/blob/46c0f2a4/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index dcb1b3d..e094c4b 100644 --- a/pom.xml +++ b/pom.xml @@ -223,6 +223,7 @@ <exclude>**/*.linux</exclude> <exclude>**/client/build/**</exclude> <exclude>**/*.tbl</exclude> + <exclude>**/*.httpd</exclude> <!-- TODO DRILL-4336: try to avoid the need to add this --> <exclude>dependency-reduced-pom.xml</exclude> </excludes>
