This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 08b794b2b9 DRILL-8317: Convert LogRegex Format Plugin to EVF V2 (#2659)
08b794b2b9 is described below
commit 08b794b2b9775767cff4fb779ce9f1f964cc7a9c
Author: Charles S. Givre <[email protected]>
AuthorDate: Fri Sep 23 08:09:35 2022 -0400
DRILL-8317: Convert LogRegex Format Plugin to EVF V2 (#2659)
---
.../store/log => contrib/format-log}/README.md | 0
contrib/format-log/pom.xml | 83 ++++++++++++++++++++++
.../drill/exec/store/log/LogBatchReader.java | 39 ++++------
.../drill/exec/store/log/LogFormatConfig.java | 0
.../drill/exec/store/log/LogFormatField.java | 0
.../drill/exec/store/log/LogFormatPlugin.java | 32 ++++-----
.../src/main/resources/drill-module.conf | 23 ++++++
.../apache/drill/exec/store/log/TestLogReader.java | 0
.../drill/exec/store/log/TestLogReaderIssue.java | 0
.../src/test/resources/regex/baddates.log2 | 0
.../src/test/resources/regex/firewall.ssdlog | 0
.../src/test/resources/regex/large.log1 | 0
.../src/test/resources/regex/mysql.sqllog | 0
.../src/test/resources/regex/mysql.sqllog2 | 0
.../src/test/resources/regex/simple.log1 | 0
.../src/test/resources/regex/simple.log2 | 0
contrib/pom.xml | 1 +
distribution/pom.xml | 5 ++
distribution/src/assemble/component.xml | 1 +
19 files changed, 139 insertions(+), 45 deletions(-)
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/README.md
b/contrib/format-log/README.md
similarity index 100%
rename from
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/README.md
rename to contrib/format-log/README.md
diff --git a/contrib/format-log/pom.xml b/contrib/format-log/pom.xml
new file mode 100644
index 0000000000..aed06493e2
--- /dev/null
+++ b/contrib/format-log/pom.xml
@@ -0,0 +1,83 @@
+<?xml version="1.0"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <artifactId>drill-contrib-parent</artifactId>
+ <groupId>org.apache.drill.contrib</groupId>
+ <version>2.0.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>drill-format-log</artifactId>
+ <name>Drill : Contrib : Format : Log Regex</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.drill.exec</groupId>
+ <artifactId>drill-java-exec</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <!-- Test dependencies -->
+ <dependency>
+ <groupId>org.apache.drill.exec</groupId>
+ <artifactId>drill-java-exec</artifactId>
+ <classifier>tests</classifier>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.drill</groupId>
+ <artifactId>drill-common</artifactId>
+ <classifier>tests</classifier>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-java-sources</id>
+ <phase>process-sources</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+
<outputDirectory>${basedir}/target/classes/org/apache/drill/exec/store/log
+ </outputDirectory>
+ <resources>
+ <resource>
+
<directory>src/main/java/org/apache/drill/exec/store/log</directory>
+ <filtering>true</filtering>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
similarity index 88%
rename from
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
rename to
contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
index 1fd1ac3f2a..91e97a6dd7 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
+++
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
@@ -20,8 +20,9 @@ package org.apache.drill.exec.store.log;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.exec.physical.impl.scan.convert.StandardConversions;
-import
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileSchemaNegotiator;
-import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileDescrip;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileSchemaNegotiator;
import org.apache.drill.exec.physical.resultSet.ResultSetLoader;
import org.apache.drill.exec.physical.resultSet.RowSetLoader;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
@@ -30,7 +31,6 @@ import org.apache.drill.exec.vector.accessor.ScalarWriter;
import org.apache.drill.exec.vector.accessor.TupleWriter;
import org.apache.drill.exec.vector.accessor.ValueWriter;
import org.apache.drill.shaded.guava.com.google.common.base.Charsets;
-import org.apache.hadoop.mapred.FileSplit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -41,7 +41,7 @@ import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-public class LogBatchReader implements ManagedReader<FileSchemaNegotiator> {
+public class LogBatchReader implements ManagedReader {
private static final Logger logger =
LoggerFactory.getLogger(LogBatchReader.class);
public static final String RAW_LINE_COL_NAME = "_raw";
public static final String UNMATCHED_LINE_COL_NAME = "_unmatched_rows";
@@ -126,8 +126,7 @@ public class LogBatchReader implements
ManagedReader<FileSchemaNegotiator> {
}
private final LogReaderConfig config;
- private final int maxRecords;
- private FileSplit split;
+ private final FileDescrip file;
private BufferedReader reader;
private ResultSetLoader loader;
private VectorWriter vectorWriter;
@@ -137,19 +136,13 @@ public class LogBatchReader implements
ManagedReader<FileSchemaNegotiator> {
private int lineNumber;
private int errorCount;
- public LogBatchReader(LogReaderConfig config, int maxRecords) {
+ public LogBatchReader(LogReaderConfig config, FileSchemaNegotiator
negotiator) {
this.config = config;
- this.maxRecords = maxRecords;
- }
-
- @Override
- public boolean open(FileSchemaNegotiator negotiator) {
- split = negotiator.split();
+ this.file = negotiator.file();
negotiator.tableSchema(config.tableSchema, true);
loader = negotiator.build();
bindColumns(loader.writer());
- openFile(negotiator);
- return true;
+ openFile();
}
private void bindColumns(RowSetLoader writer) {
@@ -188,15 +181,15 @@ public class LogBatchReader implements
ManagedReader<FileSchemaNegotiator> {
}
}
- private void openFile(FileSchemaNegotiator negotiator) {
+ private void openFile() {
InputStream in;
try {
- in =
negotiator.fileSystem().openPossiblyCompressedStream(split.getPath());
+ in =
file.fileSystem().openPossiblyCompressedStream(file.split().getPath());
} catch (Exception e) {
throw UserException
.dataReadError(e)
.message("Failed to open input file")
- .addContext(String.format("File path: %s", split.getPath()))
+ .addContext(String.format("File path: %s", file.split().getPath()))
.addContext(loader.errorContext())
.build(logger);
}
@@ -215,10 +208,6 @@ public class LogBatchReader implements
ManagedReader<FileSchemaNegotiator> {
}
private boolean nextLine(RowSetLoader rowWriter) {
- if (rowWriter.limitReached(maxRecords)) {
- return false;
- }
-
String line;
try {
line = reader.readLine();
@@ -226,7 +215,7 @@ public class LogBatchReader implements
ManagedReader<FileSchemaNegotiator> {
throw UserException
.dataReadError(e)
.message("Error reading file")
- .addContext(String.format("File: %s", split.getPath()))
+ .addContext(String.format("File: %s", file.split().getPath()))
.addContext(loader.errorContext())
.build(logger);
}
@@ -278,7 +267,7 @@ public class LogBatchReader implements
ManagedReader<FileSchemaNegotiator> {
try {
reader.close();
} catch (IOException e) {
- logger.warn("Error when closing file: " + split.getPath(), e);
+ logger.warn("Error when closing file: " + file.split().getPath(), e);
} finally {
reader = null;
}
@@ -288,6 +277,6 @@ public class LogBatchReader implements
ManagedReader<FileSchemaNegotiator> {
public String toString() {
return String.format(
"LogRecordReader[File=%s, Line=%d]",
- split.getPath(), lineNumber);
+ file.split().getPath(), lineNumber);
}
}
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
similarity index 100%
rename from
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
rename to
contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
similarity index 100%
rename from
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
rename to
contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
similarity index 92%
rename from
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
rename to
contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
index b80bed4052..c75efdf461 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
+++
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
@@ -18,23 +18,21 @@
package org.apache.drill.exec.store.log;
-import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.physical.impl.scan.columns.ColumnsScanFramework;
-import
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileReaderFactory;
-import
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileScanBuilder;
-import
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileSchemaNegotiator;
-import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileReaderFactory;
+import
org.apache.drill.exec.physical.impl.scan.v3.file.FileScanLifecycleBuilder;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileSchemaNegotiator;
import org.apache.drill.exec.record.metadata.ColumnMetadata;
import org.apache.drill.exec.record.metadata.MetadataUtils;
import org.apache.drill.exec.record.metadata.Propertied;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.server.DrillbitContext;
-import org.apache.drill.exec.server.options.OptionSet;
import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
import org.apache.drill.exec.store.dfs.easy.EasySubScan;
import org.apache.drill.exec.store.log.LogBatchReader.LogReaderConfig;
@@ -61,16 +59,14 @@ public class LogFormatPlugin extends
EasyFormatPlugin<LogFormatConfig> {
private static class LogReaderFactory extends FileReaderFactory {
private final LogReaderConfig readerConfig;
- private final int maxRecords;
- public LogReaderFactory(LogReaderConfig config, int maxRecords) {
+ public LogReaderFactory(LogReaderConfig config) {
readerConfig = config;
- this.maxRecords = maxRecords;
}
@Override
- public ManagedReader<? extends FileSchemaNegotiator> newReader() {
- return new LogBatchReader(readerConfig, maxRecords);
+ public ManagedReader newReader(FileSchemaNegotiator negotiator) {
+ return new LogBatchReader(readerConfig, negotiator);
}
}
@@ -91,7 +87,7 @@ public class LogFormatPlugin extends
EasyFormatPlugin<LogFormatConfig> {
.fsConf(fsConf)
.defaultName(PLUGIN_NAME)
.readerOperatorType(OPERATOR_TYPE)
- .scanVersion(ScanFrameworkVersion.EVF_V1)
+ .scanVersion(ScanFrameworkVersion.EVF_V2)
.supportsLimitPushdown(true)
.build();
}
@@ -143,8 +139,7 @@ public class LogFormatPlugin extends
EasyFormatPlugin<LogFormatConfig> {
* </ul>
*/
@Override
- protected FileScanBuilder frameworkBuilder(EasySubScan scan, OptionSet
options) throws ExecutionSetupException {
-
+ protected void configureScan(FileScanLifecycleBuilder builder, EasySubScan
scan) {
// Pattern and schema identical across readers; define
// up front.
final TupleMetadata providedSchema = scan.getSchema();
@@ -169,8 +164,6 @@ public class LogFormatPlugin extends
EasyFormatPlugin<LogFormatConfig> {
// Use the file framework to enable support for implicit and partition
// columns.
- final FileScanBuilder builder = new FileScanBuilder();
- initScanBuilder(builder, scan);
if (hasSchema) {
if (!hasColumns) {
@@ -207,14 +200,13 @@ public class LogFormatPlugin extends
EasyFormatPlugin<LogFormatConfig> {
// Pass along the class that will create a batch reader on demand for
// each input file.
- builder.setReaderFactory(new LogReaderFactory(
- new LogReaderConfig(this, pattern, providedSchema, tableSchema,
- readerSchema, !hasSchema, groupCount, maxErrors(providedSchema)),
scan.getMaxRecords()));
+ builder.readerFactory(new LogReaderFactory(
+ new LogReaderConfig(this, pattern, providedSchema, tableSchema,
+ readerSchema, !hasSchema, groupCount, maxErrors(providedSchema))));
// The default type of regex columns is nullable VarChar,
// so let's use that as the missing column type.
builder.nullType(Types.optional(MinorType.VARCHAR));
- return builder;
}
/**
diff --git a/contrib/format-log/src/main/resources/drill-module.conf
b/contrib/format-log/src/main/resources/drill-module.conf
new file mode 100644
index 0000000000..68df1ddac0
--- /dev/null
+++ b/contrib/format-log/src/main/resources/drill-module.conf
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file tells Drill to consider this module when class path scanning.
+# This file can also include any supplementary configuration information.
+# This file is in HOCON format, see
https://github.com/typesafehub/config/blob/master/HOCON.md for more information.
+
+drill.classpath.scanning.packages += "org.apache.drill.exec.store.log"
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
b/contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
similarity index 100%
rename from
exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
rename to
contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
diff --git
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
b/contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
similarity index 100%
rename from
exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
rename to
contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
diff --git a/exec/java-exec/src/test/resources/regex/baddates.log2
b/contrib/format-log/src/test/resources/regex/baddates.log2
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/baddates.log2
rename to contrib/format-log/src/test/resources/regex/baddates.log2
diff --git a/exec/java-exec/src/test/resources/regex/firewall.ssdlog
b/contrib/format-log/src/test/resources/regex/firewall.ssdlog
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/firewall.ssdlog
rename to contrib/format-log/src/test/resources/regex/firewall.ssdlog
diff --git a/exec/java-exec/src/test/resources/regex/large.log1
b/contrib/format-log/src/test/resources/regex/large.log1
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/large.log1
rename to contrib/format-log/src/test/resources/regex/large.log1
diff --git a/exec/java-exec/src/test/resources/regex/mysql.sqllog
b/contrib/format-log/src/test/resources/regex/mysql.sqllog
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/mysql.sqllog
rename to contrib/format-log/src/test/resources/regex/mysql.sqllog
diff --git a/exec/java-exec/src/test/resources/regex/mysql.sqllog2
b/contrib/format-log/src/test/resources/regex/mysql.sqllog2
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/mysql.sqllog2
rename to contrib/format-log/src/test/resources/regex/mysql.sqllog2
diff --git a/exec/java-exec/src/test/resources/regex/simple.log1
b/contrib/format-log/src/test/resources/regex/simple.log1
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/simple.log1
rename to contrib/format-log/src/test/resources/regex/simple.log1
diff --git a/exec/java-exec/src/test/resources/regex/simple.log2
b/contrib/format-log/src/test/resources/regex/simple.log2
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/simple.log2
rename to contrib/format-log/src/test/resources/regex/simple.log2
diff --git a/contrib/pom.xml b/contrib/pom.xml
index d77bfa2a39..6fd6925aae 100644
--- a/contrib/pom.xml
+++ b/contrib/pom.xml
@@ -48,6 +48,7 @@
<module>format-excel</module>
<module>format-httpd</module>
<module>format-esri</module>
+ <module>format-log</module>
<module>format-pdf</module>
<module>format-hdf5</module>
<module>format-sas</module>
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 60a2bb1d14..ce39807138 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -469,6 +469,11 @@
<artifactId>drill-format-excel</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.drill.contrib</groupId>
+ <artifactId>drill-format-log</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<dependency>
<groupId>org.apache.drill.contrib</groupId>
<artifactId>drill-druid-storage</artifactId>
diff --git a/distribution/src/assemble/component.xml
b/distribution/src/assemble/component.xml
index 6927b05ff0..38d79f01ae 100644
--- a/distribution/src/assemble/component.xml
+++ b/distribution/src/assemble/component.xml
@@ -49,6 +49,7 @@
<include>org.apache.drill.contrib:drill-format-image:jar</include>
<include>org.apache.drill.contrib:drill-format-pcapng:jar</include>
<include>org.apache.drill.contrib:drill-format-hdf5:jar</include>
+ <include>org.apache.drill.contrib:drill-format-log:jar</include>
<include>org.apache.drill.contrib:drill-format-ltsv:jar</include>
<include>org.apache.drill.contrib:drill-format-httpd:jar</include>
<include>org.apache.drill.contrib:drill-format-pdf:jar</include>