Repository: drill Updated Branches: refs/heads/master 1e6fa00cd -> 83513daf0
DRILL-3423: Initial HTTPD log plugin. Needs tests. Would be good to improve the timestamp and cookies behaviors since we can make those more type specific. Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/818f9450 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/818f9450 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/818f9450 Branch: refs/heads/master Commit: 818f94507b9d1792282a4bb8abd2d22d2fc2eb57 Parents: 1e6fa00 Author: Jacques Nadeau <[email protected]> Authored: Tue Jul 28 15:29:32 2015 -0700 Committer: Parth Chandra <[email protected]> Committed: Tue Nov 1 10:42:21 2016 -0700 ---------------------------------------------------------------------- exec/java-exec/pom.xml | 6 + .../exec/store/dfs/easy/EasyFormatPlugin.java | 15 +- .../exec/store/httpd/HttpdFormatPlugin.java | 487 +++++++++++++++++++ .../resources/bootstrap-storage-plugins.json | 4 + .../drill/exec/store/httpd/TestHttpdPlugin.java | 31 ++ .../resources/bootstrap-storage-plugins.json | 4 + .../test/resources/store/httpd/example1.httpd | 1 + 7 files changed, 542 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/818f9450/exec/java-exec/pom.xml ---------------------------------------------------------------------- diff --git a/exec/java-exec/pom.xml b/exec/java-exec/pom.xml index ceb0b62..e9b07be 100644 --- a/exec/java-exec/pom.xml +++ b/exec/java-exec/pom.xml @@ -453,6 +453,12 @@ </exclusion> </exclusions> </dependency> + <dependency> + <groupId>nl.basjes.parse.httpdlog</groupId> + <artifactId>httpdlog-parser</artifactId> + <version>2.1.1</version> + </dependency> + </dependencies> <profiles> http://git-wip-us.apache.org/repos/asf/drill/blob/818f9450/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java index f95a323..c7f9cf8 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyFormatPlugin.java @@ -52,11 +52,14 @@ import org.apache.drill.exec.store.dfs.FormatMatcher; import org.apache.drill.exec.store.dfs.FormatPlugin; import org.apache.drill.exec.store.schedule.CompleteFileWork; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; public abstract class EasyFormatPlugin<T extends FormatPluginConfig> implements FormatPlugin { + + @SuppressWarnings("unused") private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(EasyFormatPlugin.class); private final BasicFormatMatcher matcher; @@ -66,7 +69,7 @@ public abstract class EasyFormatPlugin<T extends FormatPluginConfig> implements private final boolean blockSplittable; private final Configuration fsConf; private final StoragePluginConfig storageConfig; - protected final FormatPluginConfig formatConfig; + protected final T formatConfig; private final String name; private final boolean compressible; @@ -129,7 +132,7 @@ public abstract class EasyFormatPlugin<T extends FormatPluginConfig> implements scan = new EasySubScan(scan.getUserName(), scan.getWorkUnits(), scan.getFormatPlugin(), columnExplorer.getTableColumns(), scan.getSelectionRoot()); scan.setOperatorId(scan.getOperatorId()); - } + } OperatorContext oContext = context.newOperatorContext(scan); final DrillFileSystem dfs; @@ -142,21 +145,21 @@ public abstract class EasyFormatPlugin<T extends FormatPluginConfig> implements List<RecordReader> readers = Lists.newArrayList(); List<Map<String, String>> implicitColumns = Lists.newArrayList(); Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap(); - for(FileWork work : scan.getWorkUnits()) { + for(FileWork work : scan.getWorkUnits()){ RecordReader recordReader = getRecordReader(context, dfs, work, scan.getColumns(), scan.getUserName()); readers.add(recordReader); Map<String, String> implicitValues = columnExplorer.populateImplicitColumns(work, scan.getSelectionRoot()); implicitColumns.add(implicitValues); if (implicitValues.size() > mapWithMaxColumns.size()) { mapWithMaxColumns = implicitValues; + } } - } // all readers should have the same number of implicit columns, add missing ones with value null Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null)); for (Map<String, String> map : implicitColumns) { map.putAll(Maps.difference(map, diff).entriesOnlyOnRight()); - } + } return new ScanBatch(scan, context, oContext, readers.iterator(), implicitColumns); } @@ -194,7 +197,7 @@ public abstract class EasyFormatPlugin<T extends FormatPluginConfig> implements } @Override - public FormatPluginConfig getConfig() { + public T getConfig() { return formatConfig; } http://git-wip-us.apache.org/repos/asf/drill/blob/818f9450/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdFormatPlugin.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdFormatPlugin.java new file mode 100644 index 0000000..7b8dc0e --- /dev/null +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/httpd/HttpdFormatPlugin.java @@ -0,0 +1,487 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.httpd; + +import io.netty.buffer.DrillBuf; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import nl.basjes.parse.core.Parser; +import nl.basjes.parse.core.exceptions.DissectionFailure; +import nl.basjes.parse.core.exceptions.InvalidDissectorException; +import nl.basjes.parse.core.exceptions.MissingDissectorsException; +import nl.basjes.parse.httpdlog.ApacheHttpdLogFormatDissector; +import nl.basjes.parse.httpdlog.dissectors.HttpFirstLineDissector; + +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.logical.FormatPluginConfig; +import org.apache.drill.common.logical.StoragePluginConfig; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.ops.OperatorContext; +import org.apache.drill.exec.physical.impl.OutputMutator; +import org.apache.drill.exec.server.DrillbitContext; +import org.apache.drill.exec.store.AbstractRecordReader; +import org.apache.drill.exec.store.RecordWriter; +import org.apache.drill.exec.store.dfs.DrillFileSystem; +import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin; +import org.apache.drill.exec.store.dfs.easy.EasyWriter; +import org.apache.drill.exec.store.dfs.easy.FileWork; +import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter; +import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter; +import org.apache.drill.exec.vector.complex.writer.BigIntWriter; +import org.apache.drill.exec.vector.complex.writer.Float8Writer; +import org.apache.drill.exec.vector.complex.writer.VarCharWriter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.LineRecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TextInputFormat; + +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.common.base.Charsets; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +public class HttpdFormatPlugin extends EasyFormatPlugin<HttpdFormatPlugin.HttpdLogFormatConfig> { + + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HttpdFormatPlugin.class); + + private static final String DEFAULT_EXTENSION = "httpd"; + + public HttpdFormatPlugin( + String name, + DrillbitContext context, + Configuration fsConf, + StoragePluginConfig storageConfig, + HttpdLogFormatConfig formatConfig) { + super(name, context, fsConf, storageConfig, formatConfig, true, false, true, true, + Lists.newArrayList(DEFAULT_EXTENSION), DEFAULT_EXTENSION); + } + + @JsonTypeName("httpd") + public static class HttpdLogFormatConfig implements FormatPluginConfig { + public String format; + } + + private class RecordReader extends AbstractRecordReader { + + private final DrillFileSystem fs; + private final FileWork work; + private final FragmentContext fragmentContext; + + private ComplexWriter writer; + private Parser<ComplexWriterFacade> parser; + private LineRecordReader lineReader; + private LongWritable lineNumber; + private ComplexWriterFacade record; + private DrillBuf managedBuffer; + + public RecordReader(FragmentContext context, DrillFileSystem fs, FileWork work) { + this.fs = fs; + this.work = work; + fragmentContext = context; + managedBuffer = context.getManagedBuffer(); + } + + @Override + public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException { + + try { + + parser = new PartiallyDissectedParser<ComplexWriterFacade>( + ComplexWriterFacade.class, + HttpdFormatPlugin.this.getConfig().format); + writer = new VectorContainerWriter(output); + record = new ComplexWriterFacade(writer); + record.addAsParseTarget(parser); + + final Path path = fs.makeQualified(new Path(work.getPath())); + FileSplit split = new FileSplit(path, work.getStart(), work.getLength(), new String[] { "" }); + TextInputFormat inputFormat = new TextInputFormat(); + JobConf job = new JobConf(fs.getConf()); + job.setInt("io.file.buffer.size", fragmentContext.getConfig() + .getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE)); + job.setInputFormat(inputFormat.getClass()); + lineReader = (LineRecordReader) inputFormat.getRecordReader(split, job, Reporter.NULL); + lineNumber = lineReader.createKey(); + } catch (Exception e) { + throw handleAndGenerate("Failure in creating record reader", e); + } + + + } + + private DrillBuf buf(int size) { + if (managedBuffer.capacity() < size) { + managedBuffer = managedBuffer.reallocIfNeeded(size); + } + return managedBuffer; + } + + protected RuntimeException handleAndGenerate(String s, Exception e) { + throw UserException.dataReadError(e) + .message(s + "\n%s", e.getMessage()) + .addContext("Path", work.getPath()) + .addContext("Split Start", work.getStart()) + .addContext("Split Length", work.getLength()) + .addContext("Local Line Number", lineNumber.get()) + .build(logger); + } + + @Override + public int next() { + try { + final Text currentText = lineReader.createValue(); + + writer.allocate(); + writer.reset(); + int recordCount = 0; + + for (; recordCount < 4095 && lineReader.next(lineNumber, currentText); recordCount++) { + writer.setPosition(recordCount); + parser.parse(record, currentText.toString()); + } + + writer.setValueCount(recordCount); + return recordCount; + } catch (DissectionFailure | InvalidDissectorException | MissingDissectorsException | IOException e) { + throw handleAndGenerate("Failure while reading httpd log record.", e); + } + } + + @Override + public void cleanup() { + + try { + if (lineReader != null) { + lineReader.close(); + } + } catch (IOException e) { + logger.warn("Failure while closing Httpd reader.", e); + } + } + + /** + * Maps Httpd Log Libraries calls to Drills ComplexWriter interface. + */ + public class ComplexWriterFacade { + private final ComplexWriter writer; + private final Map<String, VarCharWriter> stringWriters = Maps.newHashMap(); + private final Map<String, BigIntWriter> longWriters = Maps.newHashMap(); + private final Map<String, Float8Writer> doubleWriters = Maps.newHashMap(); + + private ComplexWriterFacade(ComplexWriter writer) { + this.writer = writer; + } + + @SuppressWarnings("unused") + public void set(final String name, final String value) { + if (value != null) { + final byte[] stringBytes = value.getBytes(Charsets.UTF_8); + final DrillBuf stringBuffer = buf(stringBytes.length); + stringBuffer.clear(); + stringBuffer.writeBytes(stringBytes); + final VarCharWriter writer = stringWriters.get(name); + if (writer != null) { + writer.writeVarChar(0, stringBytes.length, stringBuffer); + } else { + logger.warn("Dropped string. Name: {}, Value: {}", name, value); + } + } + } + + @SuppressWarnings("unused") + public void set(String name, Long value) { + if (value != null) { + longWriters.get(name).writeBigInt(value); + } + } + + @SuppressWarnings("unused") + public void set(String name, Double value) { + if (value != null) { + doubleWriters.get(name).writeFloat8(value); + } + } + + private void add(Parser<ComplexWriterFacade> parser, String path, VarCharWriter writer) + throws NoSuchMethodException, + SecurityException { + stringWriters.put(path, writer); + parser.addParseTarget( + ComplexWriterFacade.class.getMethod("set", new Class[] { String.class, String.class }), + path); + } + + @SuppressWarnings("unused") + private void add(Parser<ComplexWriterFacade> parser, String path, Float8Writer writer) + throws NoSuchMethodException, + SecurityException { + doubleWriters.put(path, writer); + parser.addParseTarget( + ComplexWriterFacade.class.getMethod("set", new Class[] { String.class, Double.class }), + path); + } + + private void add(Parser<ComplexWriterFacade> parser, String path, BigIntWriter writer) + throws NoSuchMethodException, + SecurityException { + longWriters.put(path, writer); + parser.addParseTarget( + ComplexWriterFacade.class.getMethod("set", new Class[] { String.class, Long.class }), + path); + } + + public void addAsParseTarget(Parser<ComplexWriterFacade> parser) { + try { + + for (final String path : parser.getPossiblePaths()) { + switch (path) { + case "IP:connection.client.ip": + add(parser, path, writer.rootAsMap().map("client").varChar("ip")); + break; + case "IP:connection.client.peerip": + add(parser, path, writer.rootAsMap().map("client").varChar("peer_ip")); + break; + case "IP:connection.server.ip": + add(parser, path, writer.rootAsMap().map("server").varChar("ip")); + break; + case "BYTES:response.body.bytes": + add(parser, path, writer.rootAsMap().map("response").bigInt("bytes")); + break; + case "BYTES:response.body.bytesclf": + add(parser, path, writer.rootAsMap().map("response").bigInt("bytes")); + break; + case "HTTP.COOKIE:request.cookies.": + add(parser, path, writer.rootAsMap().map("request").varChar("cookies")); + break; + case "MICROSECONDS:server.process.time": + add(parser, path, writer.rootAsMap().map("response").bigInt("process_time")); + break; + case "FILENAME:server.filename": + add(parser, path, writer.rootAsMap().map("response").varChar("filename")); + break; + case "IP:connection.client.host": + add(parser, path, writer.rootAsMap().map("client").varChar("host")); + break; + case "PROTOCOL:request.protocol": + add(parser, path, writer.rootAsMap().map("request").varChar("protocol")); + break; + case "HTTP.HEADER:request.header.": + add(parser, path, writer.rootAsMap().map("request").varChar("header")); + break; + case "NUMBER:connection.keepalivecount": + add(parser, path, writer.rootAsMap().map("client").bigInt("keepalivecount")); + break; + case "NUMBER:connection.client.logname": + add(parser, path, writer.rootAsMap().map("request").bigInt("logname")); + break; + case "STRING:request.errorlogid": + add(parser, path, writer.rootAsMap().map("request").varChar("errorlogid")); + break; + case "HTTP.METHOD:request.method": + add(parser, path, writer.rootAsMap().map("request").varChar("method")); + break; + case "PORT:request.server.port.canonical": + add(parser, path, writer.rootAsMap().map("server").bigInt("canonical_port")); + break; + case "PORT:connection.server.port.canonical": + add(parser, path, writer.rootAsMap().map("server").bigInt("canonical_port")); + break; + case "PORT:connection.client.port": + add(parser, path, writer.rootAsMap().map("client").bigInt("port")); + break; + case "NUBMER:connection.server.child.processid": + add(parser, path, writer.rootAsMap().map("server").bigInt("process_id")); + break; + case "NUMBER:connection.server.child.threadid": + add(parser, path, writer.rootAsMap().map("server").bigInt("thread_id")); + break; + case "STRING:connection.server.child.hexthreadid": + add(parser, path, writer.rootAsMap().map("connection").varChar("hex_thread_id")); + break; + case "HTTP.QUERYSTRING:request.querystring": + add(parser, path, writer.rootAsMap().map("").varChar("")); + break; + case "HTTP.FIRSTLINE:request.firstline": + add(parser, path, writer.rootAsMap().map("").varChar("")); + break; + case "STRING:request.handler": + add(parser, path, writer.rootAsMap().map("request").varChar("handler")); + break; + case "STRING:request.status.original": + add(parser, path, writer.rootAsMap().map("request").varChar("status_original")); + break; + case "STRING:request.status.last": + add(parser, path, writer.rootAsMap().map("request").varChar("status_last")); + break; + case "TIME.STAMP:request.receive.time": + add(parser, path, writer.rootAsMap().map("request").varChar("timestamp")); + break; + case "TIME.EPOCH:request.receive.time.begin.msec": + add(parser, path, writer.rootAsMap().map("request").bigInt("begin_msec")); + break; + case "TIME.EPOCH:request.receive.time.end.msec": + add(parser, path, writer.rootAsMap().map("request").bigInt("end_msec")); + break; + case "TIME.EPOCH.USEC:request.receive.time.begin.usec": + add(parser, path, writer.rootAsMap().map("request").bigInt("begin_usec")); + break; + case "TIME.EPOCH.USEC:request.receive.time.end.usec": + add(parser, path, writer.rootAsMap().map("request").bigInt("end_usec")); + break; + case "TIME.EPOCH:request.receive.time.begin.msec_frac": + add(parser, path, writer.rootAsMap().map("request").bigInt("begin_msec_frac")); + break; + case "TIME.EPOCH:request.receive.time.end.msec_frac": + add(parser, path, writer.rootAsMap().map("request").varChar("end_msec_frac")); + break; + case "TIME.EPOCH.USEC_FRAC:request.receive.time.begin.usec_frac": + add(parser, path, writer.rootAsMap().map("request").varChar("begin_usec_frac")); + break; + case "TIME.EPOCH.USEC_FRAC:request.receive.time.end.usec_frac": + add(parser, path, writer.rootAsMap().map("request").varChar("end_usec_frac")); + break; + case "SECONDS:response.server.processing.time": + add(parser, path, writer.rootAsMap().map("response").varChar("processing_time")); + break; + case "STRING:connection.client.user": + add(parser, path, writer.rootAsMap().map("client").varChar("user")); + break; + case "URI:request.urlpath": + add(parser, path, writer.rootAsMap().map("request").varChar("url")); + break; + case "STRING:connection.server.name.canonical": + add(parser, path, writer.rootAsMap().map("server").varChar("canonical_name")); + break; + case "STRING:connection.server.name": + add(parser, path, writer.rootAsMap().map("server").varChar("name")); + break; + case "HTTP.CONNECTSTATUS:response.connection.status": + add(parser, path, writer.rootAsMap().map("response").varChar("connection_status")); + break; + case "BYTES:request.bytes": + add(parser, path, writer.rootAsMap().map("request").varChar("bytes")); + break; + case "BYTES:response.bytes": + add(parser, path, writer.rootAsMap().map("response").bigInt("bytes")); + break; + case "HTTP.COOKIES:request.cookies": + add(parser, path, writer.rootAsMap().map("request").varChar("cookies")); + break; + case "HTTP.SETCOOKIES:response.cookies": + add(parser, path, writer.rootAsMap().map("response").varChar("cookies")); + break; + case "HTTP.USERAGENT:request.user-agent": + add(parser, path, writer.rootAsMap().map("request").varChar("useragent")); + break; + case "HTTP.URI:request.referer": + add(parser, path, writer.rootAsMap().map("request").varChar("referer")); + break; + case "HTTP.METHOD:method": + add(parser, path, writer.rootAsMap().map("request").varChar("method")); + break; + case "HTTP.URI:uri": + add(parser, path, writer.rootAsMap().map("request").varChar("uri")); + break; + case "HTTP.PROTOCOL:protocol": + add(parser, path, writer.rootAsMap().map("request").varChar("protocol")); + break; + case "HTTP.PROTOCOL.VERSION:protocol.version": + add(parser, path, writer.rootAsMap().map("request").varChar("protocol_version")); + break; + case "HTTP.METHOD:request.firstline.method": + add(parser, path, writer.rootAsMap().map("request").varChar("method")); + break; + case "HTTP.URI:request.firstline.uri": + add(parser, path, writer.rootAsMap().map("request").varChar("uri")); + break; + case "HTTP.PROTOCOL:request.firstline.protocol": + add(parser, path, writer.rootAsMap().map("request").varChar("protocol")); + break; + case "HTTP.PROTOCOL.VERSION:request.firstline.protocol.version": + add(parser, path, writer.rootAsMap().map("request").varChar("protocol_version")); + break; + default: + + // if we don't know what to do, just write the raw value. + parser.addParseTarget( + ComplexWriterFacade.class.getMethod("set", new Class[] { String.class, String.class }), + path); + final String noPeriodPath = path.replace(".", "_"); + stringWriters.put(path, writer.rootAsMap().varChar(noPeriodPath)); + break; + + } + } + + + } catch (MissingDissectorsException | SecurityException | NoSuchMethodException | InvalidDissectorException e) { + throw handleAndGenerate("Failure while setting up log mappings.", e); + } + } + } + } + + @Override + public boolean supportsPushDown() { + return true; + } + + + @Override + public RecordReader getRecordReader(FragmentContext context, DrillFileSystem dfs, + FileWork fileWork, List<SchemaPath> columns) throws ExecutionSetupException { + return new RecordReader(context, dfs, fileWork); + } + + @Override + public RecordWriter getRecordWriter(FragmentContext context, EasyWriter writer) throws IOException { + throw new UnsupportedOperationException("Drill doesn't currently support writing to HTTPD logs."); + } + + @Override + public int getReaderOperatorType() { + return -1; + } + + @Override + public int getWriterOperatorType() { + return -1; + } + + private class PartiallyDissectedParser<RECORD> extends Parser<RECORD> { + public PartiallyDissectedParser(Class<RECORD> clazz, final String logformat) { + super(clazz); + + addDissector(new ApacheHttpdLogFormatDissector(logformat)); + addDissector(new HttpFirstLineDissector()); + setRootType(ApacheHttpdLogFormatDissector.INPUT_TYPE); + } + + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/818f9450/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json index 13d29ea..dab4ada 100644 --- a/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json +++ b/exec/java-exec/src/main/resources/bootstrap-storage-plugins.json @@ -29,6 +29,10 @@ extensions: [ "tsv" ], delimiter: "\t" }, + "httpd" :{ + type: "httpd", + format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%{Cookie}i\"" + }, "parquet" : { type: "parquet" }, http://git-wip-us.apache.org/repos/asf/drill/blob/818f9450/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdPlugin.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdPlugin.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdPlugin.java new file mode 100644 index 0000000..ce1f685 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHttpdPlugin.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.httpd; + +import org.apache.drill.BaseTestQuery; +import org.junit.Test; + +public class TestHttpdPlugin extends BaseTestQuery { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestHttpdPlugin.class); + + @Test + public void tryBasicQuery() throws Exception { + // test("select * from cp.`store/httpd/example1.httpd`"); + test("select * from dfs.`${WORKING_PATH}/src/test/resources/store/httpd/example1.httpd`"); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/818f9450/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json index ec840a6..630db6b 100644 --- a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json +++ b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json @@ -40,6 +40,10 @@ "json" : { type: "json" }, + "httpd" :{ + type: "httpd", + format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%{Cookie}i\"" + }, "txt" : { type : "text", extensions: [ "txt" ], http://git-wip-us.apache.org/repos/asf/drill/blob/818f9450/exec/java-exec/src/test/resources/store/httpd/example1.httpd ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/httpd/example1.httpd b/exec/java-exec/src/test/resources/store/httpd/example1.httpd new file mode 100644 index 0000000..531503b --- /dev/null +++ b/exec/java-exec/src/test/resources/store/httpd/example1.httpd @@ -0,0 +1 @@ +2001:980:91c0:1:8d31:a232:25e5:85d - - [05/Sep/2010:11:27:50 +0200] "GET /b/ss/advbolprod2/1/H.22.1/s73176445413647?AQB=1&pccr=true&vidn=27F07A1B85012045-4000011500517C43&&ndh=1&t=19%2F5%2F2012%2023%3A51%3A27%202%20-120&ce=UTF-8&ns=bol&pageName=%2Fnl%2Fp%2Ffissler-speciaal-pannen-grillpan-28-x-28-cm%2F9200000002876066%2F&g=http%3A%2F%2Fwww.bol.com%2Fnl%2Fp%2Ffissler-speciaal-pannen-grillpan-28-x-28-cm%2F9200000002876066%2F%3Fpromo%3Dkoken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066%26bltg.pg_nm%3Dkoken-pannen%26bltg.slt_id%3D303%26bltg.slt_nm%3Dhs-koken-pannen-afj-120601%26bltg.slt_p&r=http%3A%2F%2Fwww.bol.com%2Fnl%2Fm%2Fkoken-tafelen%2Fkoken-pannen%2FN%2F11766%2Findex.html%3Fblabla%3Dblablawashere&cc=EUR&ch=D%3Dv3&server=ps316&events=prodView%2Cevent1%2Cevent2%2Cevent31&products=%3B9200000002876066%3B%3B%3B%3Bevar3%3Dkth%7Cevar8%3D9200000002876066_Fissler%20Speciaal%20Pannen%20-%20Grillpan%20-%2028%20x%2028%20cm%7Cevar35%3D170%7Cevar47%3DKTH%7Cevar9%3DNew%7C evar40%3Dno%20reviews%2C%3B%3B%3B%3Bevent31%3D423&c1=catalog%3Akth%3Aproduct-detail&v1=D%3Dc1&h1=catalog%2Fkth%2Fproduct-detail&h2=D%3DpageName&v3=kth&l3=endeca_001-mensen_default%2Cendeca_exact-boeken_default%2Cendeca_verschijningsjaar_default%2Cendeca_hardgoodscategoriesyn_default%2Cendeca_searchrank-hadoop_default%2Cendeca_genre_default%2Cendeca_uitvoering_default&v4=ps316&v6=koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066&v10=Tu%2023%3A30&v12=logged%20in&v13=New&c25=niet%20ssl&c26=3631&c30=84.106.227.113.1323208998208762&v31=2000285551&c45=20120619235127&c46=20120501%204.3.4.1&c47=D%3Ds_vi&c49=%2Fnl%2Fcatalog%2Fproduct-detail.jsp&c50=%2Fnl%2Fcatalog%2Fproduct-detail.jsp&v51=www.bol.com&s=1280x800&c=24&j=1.7&v=N&k=Y&bw=1280&bh=272&p=Shockwave%20Flash%3B&AQE=1 HTTP/1.1" 200 23617 "http://www.google.nl/imgres?imgurl=http://daniel_en_sander.basjes.nl/fotos/geboorte-kaartje/geboortekaartje-binnenkant.jpg&imgrefurl=http://daniel_en_sander.basjes.nl/fotos/gebo orte-kaartje&usg=__LDxRMkacRs6yLluLcIrwoFsXY6o=&h=521&w=1024&sz=41&hl=nl&start=13&zoom=1&um=1&itbs=1&tbnid=Sqml3uGbjoyBYM:&tbnh=76&tbnw=150&prev=/images%3Fq%3Dbinnenkant%2Bgeboortekaartje%26um%3D1%26hl%3Dnl%26sa%3DN%26biw%3D1882%26bih%3D1014%26tbs%3Disch:1" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; nl-nl) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8" "jquery-ui-theme=Eggplant; BuI=SomeThing; Apache=127.0.0.1.1351111543699529" \ No newline at end of file
